Compare commits

...

10 Commits
master ... r1.5

Author SHA1 Message Date
  lujiale 9bce7edf7f !2089 upgrade ascend 1012 3 years ago
  yanghaoran 3b517344f0 upgrade ascend 1012 3 years ago
  lujiale 854631a7b5 !2088 upgrade ascend 1009 3 years ago
  yanghaoran 051ef7a385 upgrade ascend 1009 3 years ago
  lujiale d86bc5beb4 !2087 upgrade ascend 0928 3 years ago
  yanghaoran 7f0b65e699 upgrade ascend 0928 3 years ago
  lujiale ecf9e8eec7 !2086 upgrade ascend 0925 3 years ago
  yanghaoran 5e3b7eb856 upgrade ascend 0925 3 years ago
  lujiale 15116d1c0c !2085 upgarde ascend 0916 3 years ago
  yanghaoran c28af94028 upgarde ascend 0916 3 years ago
64 changed files with 2454 additions and 859 deletions
Split View
  1. +4
    -5
      ge/common/model/ge_model.cc
  2. +7
    -5
      ge/common/model/ge_model.h
  3. +1
    -0
      inc/external/acl/OWNERS
  4. +3
    -3
      inc/external/acl/acl_mdl.h
  5. +38
    -1
      inc/external/acl/acl_rt.h
  6. +426
    -0
      inc/external/acl/acl_tdt_queue.h
  7. +1
    -0
      inc/external/acl/error_codes/rt_error_codes.h
  8. +1
    -1
      inc/external/acl/ops/acl_dvpp.h
  9. +13
    -7
      inc/external/ge/ge_api_error_codes.h
  10. +1
    -0
      inc/external/ge/ge_api_types.h
  11. +53
    -35
      inc/framework/common/debug/ge_log.h
  12. +70
    -70
      inc/framework/common/debug/log.h
  13. +21
    -21
      inc/framework/common/ge_inner_error_codes.h
  14. +13
    -4
      inc/framework/common/ge_types.h
  15. +1
    -1
      inc/framework/common/op/ge_op_utils.h
  16. +2
    -2
      inc/framework/common/string_util.h
  17. +2
    -0
      inc/framework/common/types.h
  18. +95
    -109
      inc/framework/common/util.h
  19. +20
    -3
      inc/framework/engine/dnnengine.h
  20. +10
    -3
      inc/framework/generator/ge_generator.h
  21. +1
    -0
      inc/framework/omg/omg_inner_types.h
  22. +1
    -1
      inc/framework/omg/version.h
  23. +1
    -1
      metadef
  24. +107
    -109
      third_party/fwkacllib/inc/external/runtime/rt_error_codes.h
  25. +4
    -4
      third_party/fwkacllib/inc/ops/array_ops.h
  26. +58
    -0
      third_party/fwkacllib/inc/ops/cluster.h
  27. +5
    -0
      third_party/fwkacllib/inc/ops/data_flow_ops.h
  28. +27
    -27
      third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
  29. +4
    -4
      third_party/fwkacllib/inc/ops/linalg_ops.h
  30. +53
    -2
      third_party/fwkacllib/inc/ops/math_ops.h
  31. +72
    -8
      third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
  32. +23
    -0
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  33. +111
    -138
      third_party/fwkacllib/inc/ops/nn_calculation_ops.h
  34. +172
    -2
      third_party/fwkacllib/inc/ops/nn_detect_ops.h
  35. +14
    -1
      third_party/fwkacllib/inc/ops/nn_training_ops.h
  36. +42
    -0
      third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
  37. +32
    -0
      third_party/fwkacllib/inc/ops/pad_ops.h
  38. +19
    -1
      third_party/fwkacllib/inc/ops/random_ops.h
  39. +9
    -2
      third_party/fwkacllib/inc/ops/reduce_ops.h
  40. +50
    -6
      third_party/fwkacllib/inc/ops/rnn.h
  41. +34
    -0
      third_party/fwkacllib/inc/ops/selection_ops.h
  42. +8
    -2
      third_party/fwkacllib/inc/ops/transformation_ops.h
  43. +82
    -1
      third_party/fwkacllib/inc/ops/vector_search.h
  44. +2
    -14
      third_party/fwkacllib/inc/runtime/base.h
  45. +31
    -28
      third_party/fwkacllib/inc/runtime/config.h
  46. +3
    -3
      third_party/fwkacllib/inc/runtime/context.h
  47. +29
    -10
      third_party/fwkacllib/inc/runtime/dev.h
  48. +3
    -3
      third_party/fwkacllib/inc/runtime/dvfsprofile.h
  49. +9
    -9
      third_party/fwkacllib/inc/runtime/event.h
  50. +29
    -26
      third_party/fwkacllib/inc/runtime/kernel.h
  51. +75
    -44
      third_party/fwkacllib/inc/runtime/mem.h
  52. +3
    -3
      third_party/fwkacllib/inc/runtime/rt.h
  53. +6
    -4
      third_party/fwkacllib/inc/runtime/rt_ffts.h
  54. +7
    -3
      third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
  55. +7
    -7
      third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h
  56. +416
    -0
      third_party/fwkacllib/inc/runtime/rt_mem_queue.h
  57. +12
    -12
      third_party/fwkacllib/inc/runtime/rt_model.h
  58. +4
    -4
      third_party/fwkacllib/inc/runtime/rt_stars.h
  59. +3
    -3
      third_party/fwkacllib/inc/runtime/rt_stars_define.h
  60. +15
    -15
      third_party/fwkacllib/inc/runtime/stream.h
  61. +59
    -59
      third_party/fwkacllib/inc/toolchain/plog.h
  62. +28
    -28
      third_party/fwkacllib/inc/toolchain/prof_acl_api.h
  63. +1
    -1
      third_party/fwkacllib/inc/toolchain/prof_callback.h
  64. +1
    -4
      third_party/fwkacllib/inc/toolchain/prof_reporter.h

+ 4
- 5
ge/common/model/ge_model.cc View File

@@ -34,7 +34,6 @@ void GeModel::Init() {
}

GeModel::GeModel() {
attrs_.InitDefault();
Init();
}

@@ -78,12 +77,12 @@ void GeModel::SetPlatformVersion(const std::string &platform_version) { this->pl

void GeModel::SetPlatformType(uint8_t platform_type) { this->platform_type_ = platform_type; }

void GeModel::SetAttr(const ProtoAttrMapHelper &attrs) { attrs_ = attrs; }
void GeModel::SetAttr(const ProtoAttrMap &attrs) { attrs_ = attrs; }

ProtoAttrMapHelper GeModel::MutableAttrMap() { return attrs_; }
ProtoAttrMap &GeModel::MutableAttrMap() { return attrs_; }

ConstProtoAttrMapHelper GeModel::GetAttrMap() const {
return ConstProtoAttrMapHelper(attrs_.GetProtoOwner(), attrs_.GetProtoMsg());
ConstProtoAttrMap &GeModel::GetAttrMap() const {
return attrs_;
}

Status GeModel::GetSessionId(uint32_t model_id, uint64_t &session_id) const {


+ 7
- 5
ge/common/model/ge_model.h View File

@@ -17,10 +17,12 @@
#ifndef GE_MODEL_GE_MODEL_H_
#define GE_MODEL_GE_MODEL_H_

#include <securec.h>
#include <map>
#include <memory>
#include <string>

#include "securec.h"
#include "runtime/rt.h"
#include "common/tbe_kernel_store.h"
#include "common/cust_aicpu_kernel_store.h"
#include "framework/common/debug/log.h"
@@ -60,9 +62,9 @@ class GeModel : public AttrHolder {
void SetPlatformVersion(const std::string &platform_version);
void SetPlatformType(uint8_t platform_type);

void SetAttr(const ProtoAttrMapHelper &attrs);
void SetAttr(const ProtoAttrMap &attrs);

ProtoAttrMapHelper MutableAttrMap() override;
ProtoAttrMap &MutableAttrMap() override;

using AttrHolder::SetAttr;
using AttrHolder::GetAllAttrs;
@@ -77,12 +79,12 @@ class GeModel : public AttrHolder {
}

protected:
ConstProtoAttrMapHelper GetAttrMap() const override;
ConstProtoAttrMap &GetAttrMap() const override;

private:
void Init();

ProtoAttrMapHelper attrs_; /*lint !e148*/
ProtoAttrMap attrs_; /*lint !e148*/

Graph graph_;
std::shared_ptr<domi::ModelTaskDef> task_; /*lint !e148*/


+ 1
- 0
inc/external/acl/OWNERS View File

@@ -5,5 +5,6 @@ approvers:
reviewers:
- justin_zhao
- zhangyongfeng88
- w00267184
options:
no_parent_owners: true

+ 3
- 3
inc/external/acl/acl_mdl.h View File

@@ -869,7 +869,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet,
*
* @see aclmdlCreateAIPP
*/
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0,
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t cscSwitch, int16_t cscMatrixR0C0,
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0,
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0,
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2,
@@ -1106,7 +1106,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
*
* @param modelId [IN] model id
* @param index [IN] index of tensor
* @param aippinfo [OUT] Pointer for static aipp info
* @param aippInfo [OUT] Pointer for static aipp info
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval ACL_ERROR_MODEL_AIPP_NOT_EXIST The tensor of index is not configured with aipp
@@ -1115,7 +1115,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, a
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem |
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName
*/
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo);
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippInfo);

/**
* @ingroup AscendCL


+ 38
- 1
inc/external/acl/acl_rt.h View File

@@ -541,7 +541,7 @@ ACL_FUNC_VISIBILITY aclError aclrtSynchronizeEvent(aclrtEvent event);
*
* @see aclrtCreateEvent | aclrtRecordEvent | aclrtSynchronizeStream
*/
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, aclrtEvent end);
ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent startEvent, aclrtEvent endEvent);

/**
* @ingroup AscendCL
@@ -733,6 +733,43 @@ ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const v

/**
* @ingroup AscendCL
* @brief synchronous memory replication of two-dimensional matrix between host and device
*
* @param dst [IN] destination address pointer
* @param dpitch [IN] pitch of destination memory
* @param src [IN] source address pointer
* @param spitch [IN] pitch of source memory
* @param width [IN] width of matrix transfer
* @param height [IN] height of matrix transfer
* @param kind [IN] memcpy type
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy2d(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
size_t height, aclrtMemcpyKind kind);

/**
* @ingroup AscendCL
* @brief asynchronous memory replication of two-dimensional matrix between host and device
*
* @param dst [IN] destination address pointer
* @param dpitch [IN] pitch of destination memory
* @param src [IN] source address pointer
* @param spitch [IN] pitch of source memory
* @param width [IN] width of matrix transfer
* @param height [IN] height of matrix transfer
* @param kind [IN] memcpy type
* @param stream [IN] asynchronized task stream
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError aclrtMemcpy2dAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width,
size_t height, aclrtMemcpyKind kind, aclrtStream stream);

/**
* @ingroup AscendCL
* @brief Asynchronous initialize memory
* and set contents of memory to specified value async
*


+ 426
- 0
inc/external/acl/acl_tdt_queue.h View File

@@ -0,0 +1,426 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_
#define INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_

#include "acl/acl_base.h"

#ifdef __cplusplus
extern "C" {
#endif

#define ACL_TDT_QUEUE_PERMISSION_MANAGE 1
#define ACL_TDT_QUEUE_PERMISSION_DEQUEUE 2
#define ACL_TDT_QUEUE_PERMISSION_ENQUEUE 4

typedef void *acltdtBuf;
typedef struct tagMemQueueAttr acltdtQueueAttr;
typedef struct acltdtQueueRouteList acltdtQueueRouteList;
typedef struct acltdtQueueRouteQueryInfo acltdtQueueRouteQueryInfo;
typedef struct acltdtQueueRoute acltdtQueueRoute;

typedef enum { ACL_TDT_QUEUE_NAME_PTR = 0, ACL_TDT_QUEUE_DEPTH_UINT32 } acltdtQueueAttrType;

typedef enum {
ACL_TDT_QUEUE_ROUTE_SRC_UINT32 = 0,
ACL_TDT_QUEUE_ROUTE_DST_UINT32,
ACL_TDT_QUEUE_ROUTE_STATUS_INT32
} acltdtQueueRouteParamType;

typedef enum {
ACL_TDT_QUEUE_ROUTE_QUERY_SRC = 0,
ACL_TDT_QUEUE_ROUTE_QUERY_DST,
ACL_TDT_QUEUE_ROUTE_QUERY_SRC_AND_DST
} acltdtQueueRouteQueryMode;

typedef enum {
ACL_TDT_QUEUE_ROUTE_QUERY_MODE_ENUM = 0,
ACL_TDT_QUEUE_ROUTE_QUERY_SRC_ID_UINT32,
ACL_TDT_QUEUE_ROUTE_QUERY_DST_ID_UINT32
} acltdtQueueRouteQueryInfoParamType;

/**
* @ingroup AscendCL
* @brief create queue
*
* @param attr [IN] pointer to the queue attr
* @param qid [OUT] pointer to the qid
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtDestroyQueue
*/
ACL_FUNC_VISIBILITY aclError acltdtCreateQueue(const acltdtQueueAttr *attr, uint32_t *qid);

/**
* @ingroup AscendCL
* @brief destroy queue
*
* @param qid [IN] qid which to be destroyed
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateQueue
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueue(uint32_t qid);

/**
* @ingroup AscendCL
* @brief enqueue function
*
* @param qid [IN] qid
* @param buf [IN] acltdtBuf
* @param timeout [IN] timeout
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtDequeue
*/
ACL_FUNC_VISIBILITY aclError acltdtEnqueue(uint32_t qid, acltdtBuf buf, int32_t timeout);

/**
* @ingroup AscendCL
* @brief dequeue function
*
* @param qid [IN] qid
* @param buf [OUT] pointer to the acltdtBuf
* @param timeout [IN] timeout
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtEnqueue
*/
ACL_FUNC_VISIBILITY aclError acltdtDequeue(uint32_t qid, acltdtBuf *buf, int32_t timeout);

/**
* @ingroup AscendCL
* @brief grant queue to other process
*
* @param qid [IN] qid
* @param pid [IN] pid of dst process
* @param permission [IN] permission of queue
* @param timeout [IN] timeout
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see ACL_TDT_QUEUE_PERMISSION_MANAGE | ACL_TDT_QUEUE_PERMISSION_DEQUEUE | ACL_TDT_QUEUE_PERMISSION_ENQUEUE
*/
ACL_FUNC_VISIBILITY aclError acltdtGrantQueue(uint32_t qid, int32_t pid, uint32_t permission, int32_t timeout);

/**
* @ingroup AscendCL
* @brief attach queue in current process
*
* @param qid [IN] qid
* @param timeout [IN] timeout
* @param permission [OUT] permission of queue
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtGrantQueue
*/
ACL_FUNC_VISIBILITY aclError acltdtAttachQueue(uint32_t qid, int32_t timeout, uint32_t *permission);

/**
* @ingroup AscendCL
* @brief bind queue routes
*
* @param qRouteList [IN|OUT] pointer to the route list
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acltdtBindQueueRoutes(acltdtQueueRouteList *qRouteList);

/**
* @ingroup AscendCL
* @brief unbind queue routes
*
* @param qRouteList [IN|OUT] pointer to the route list
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acltdtUnbindQueueRoutes(acltdtQueueRouteList *qRouteList);

/**
* @ingroup AscendCL
* @brief query queue routes according to query mode
*
* @param queryInfo [IN] pointer to the queue route query info
* @param qRouteList [IN|OUT] pointer to the route list
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*/
ACL_FUNC_VISIBILITY aclError acltdtQueryQueueRoutes(const acltdtQueueRouteQueryInfo *queryInfo,
acltdtQueueRouteList *qRouteList);

/**
* @ingroup AscendCL
* @brief alloc acltdtBuf
*
* @param size [IN] alloc buf size
* @param buf [OUT] pointer to the acltdtBuf
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtFreeBuf
*/
ACL_FUNC_VISIBILITY aclError acltdtAllocBuf(size_t size, acltdtBuf *buf);

/**
* @ingroup AscendCL
* @brief free acltdtBuf
*
* @param buf [IN] pointer to the acltdtBuf
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtAllocBuf
*/
ACL_FUNC_VISIBILITY aclError acltdtFreeBuf(acltdtBuf buf);

/**
* @ingroup AscendCL
* @brief get data buf address
*
* @param buf [IN] acltdtBuf
* @param dataPtr [OUT] pointer to the data ptr which is acquired from acltdtBuf
* @param size [OUT] pointer to the size
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtAllocBuf
*/
ACL_FUNC_VISIBILITY aclError acltdtGetBufData(const acltdtBuf buf, void **dataPtr, size_t *size);

/**
* @ingroup AscendCL
* @brief Create the queue attr
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyQueueAttr
*/
ACL_FUNC_VISIBILITY acltdtQueueAttr *acltdtCreateQueueAttr();

/**
* @ingroup AscendCL
* @brief Destroy the queue attr
*
* @param attr [IN] pointer to the queue attr
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateQueueAttr
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueAttr(const acltdtQueueAttr *attr);

/**
* @ingroup AscendCL
* @brief Set parameter for queue attr
*
* @param attr [IN|OUT] pointer to the queue attr
* @param type [IN] parameter type
* @param len [IN] parameter length
* @param param [IN] pointer to parameter value
*
* @retval ACL_SUCCESS for success, other for failure
*
* @see acltdtCreateQueueAttr
*/
ACL_FUNC_VISIBILITY aclError acltdtSetQueueAttr(acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len,
const void *param);

/**
* @ingroup AscendCL
*
* @brief Get parameter for queue attr.
*
* @param attr [IN] pointer to the queue attr
* @param type [IN] parameter type
* @param len [IN] parameter length
* @param paramRetSize [OUT] pointer to parameter real length
* @param param [OUT] pointer to parameter value
*
* @retval ACL_SUCCESS for success, other for failure
*
* @see acltdtCreateQueueAttr
*/
ACL_FUNC_VISIBILITY aclError acltdtGetQueueAttr(const acltdtQueueAttr *attr, acltdtQueueAttrType type, size_t len,
size_t *paramRetSize, void *param);

/**
* @ingroup AscendCL
* @brief Create the queue route
*
* @param srcId [IN] src id of queue route
* @param dstId [IN] dst id of queue route
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyQueueRoute
*/
ACL_FUNC_VISIBILITY acltdtQueueRoute *acltdtCreateQueueRoute(uint32_t srcId, uint32_t dstId);

/**
* @ingroup AscendCL
* @brief Destroy the queue attr
*
* @param route [IN] pointer to the queue route
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateQueueRoute
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRoute(const acltdtQueueRoute *route);

/**
* @ingroup AscendCL
*
* @brief Get parameter for queue route.
*
* @param route [IN] pointer to the queue route
* @param type [IN] parameter type
* @param len [IN] parameter length
* @param paramRetSize [OUT] pointer to parameter real length
* @param param [OUT] pointer to parameter value
*
* @retval ACL_SUCCESS for success, other for failure
*
* @see acltdtCreateQueueRoute
*/
ACL_FUNC_VISIBILITY aclError acltdtGetQueueRouteParam(const acltdtQueueRoute *route, acltdtQueueRouteParamType type,
size_t len, size_t *paramRetSize, void *param);

/**
* @ingroup AscendCL
* @brief Create the queue route list
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyQueueRouteList
*/
ACL_FUNC_VISIBILITY acltdtQueueRouteList *acltdtCreateQueueRouteList();

/**
* @ingroup AscendCL
* @brief Destroy the queue route list
*
* @param routeList [IN] pointer to the queue route list
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateQueueRouteList
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteList(const acltdtQueueRouteList *routeList);

/**
* @ingroup AscendCL
* @brief add queue route to the route list
*
* @param routeList [IN|OUT] pointer to the queue route list
* @param route [IN] pointer to the queue route
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute
*
*/
ACL_FUNC_VISIBILITY aclError acltdtAddQueueRoute(acltdtQueueRouteList *routeList, const acltdtQueueRoute *route);

/**
* @ingroup AscendCL
* @brief get queue route from route list
*
* @param routeList [IN] pointer to the queue route list
* @param index [IN] index of queue route in route list
* @param route [IN|OUT] pointer to the queue route
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateQueueRouteList | acltdtCreateQueueRoute
*
*/
ACL_FUNC_VISIBILITY aclError acltdtGetQueueRoute(const acltdtQueueRouteList *routeList, size_t index,
acltdtQueueRoute *route);

/**
* @ingroup AscendCL
* @brief Create the queue route query info
*
* @retval null for failed
* @retval OtherValues success
*
* @see acltdtDestroyQueueRouteQueryInfo
*/
ACL_FUNC_VISIBILITY acltdtQueueRouteQueryInfo *acltdtCreateQueueRouteQueryInfo();

/**
* @ingroup AscendCL
* @brief Destroy the queue route query info
*
* @param info [IN] pointer to the queue route info
*
* @retval ACL_SUCCESS The function is successfully executed.
* @retval OtherValues Failure
*
* @see acltdtCreateQueueRouteQueryInfo
*
*/
ACL_FUNC_VISIBILITY aclError acltdtDestroyQueueRouteQueryInfo(const acltdtQueueRouteQueryInfo *info);

/**
* @ingroup AscendCL
* @brief Set parameter for queue route info
*
* @param attr [IN|OUT] pointer to the queue route info
* @param type [IN] parameter type
* @param len [IN] parameter length
* @param param [IN] pointer to parameter value
*
* @retval ACL_SUCCESS for success, other for failure
*
* @see acltdtCreateQueueRouteQueryInfo
*/
ACL_FUNC_VISIBILITY aclError acltdtSetQueueRouteQueryInfo(acltdtQueueRouteQueryInfo *param,
acltdtQueueRouteQueryInfoParamType type, size_t len,
const void *value);

#ifdef __cplusplus
}
#endif

#endif // INC_EXTERNAL_ACL_ACL_TDT_QUEUE_H_

+ 1
- 0
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -56,6 +56,7 @@ static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event res
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error


+ 1
- 1
inc/external/acl/ops/acl_dvpp.h View File

@@ -125,7 +125,7 @@ enum acldvppPixelFormat {
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL };

// Supported Channel Mode
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 };
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4, DVPP_CHNMODE_PNGD = 8 };

// Supported Border Type
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 };


+ 13
- 7
inc/external/ge/ge_api_error_codes.h View File

@@ -72,17 +72,23 @@ class GE_FUNC_VISIBILITY StatusFactory {

class GE_FUNC_VISIBILITY ErrorNoRegisterar {
public:
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); }
ErrorNoRegisterar(uint32_t err, const std::string &desc) {
StatusFactory::Instance()->RegisterErrorNo(err, desc);
}
ErrorNoRegisterar(uint32_t err, const char *desc) {
StatusFactory::Instance()->RegisterErrorNo(err, desc);
}
~ErrorNoRegisterar() {}
};

// Code compose(4 byte), runtime: 2 bit, type: 2 bit, level: 3 bit, sysid: 8 bit, modid: 5 bit, value: 12 bit
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \
constexpr ge::Status name = \
((0xFF & (static_cast<uint8_t>(runtime))) << 30) | ((0xFF & (static_cast<uint8_t>(type))) << 28) | \
((0xFF & (static_cast<uint8_t>(level))) << 25) | ((0xFF & (static_cast<uint8_t>(sysid))) << 17) | \
((0xFF & (static_cast<uint8_t>(modid))) << 12) | (0x0FFF & (static_cast<uint16_t>(value))); \
#define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc) \
constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17) | \
(static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12) | \
(static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value))); \
const ErrorNoRegisterar g_##name##_errorno(name, desc);

#define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);


+ 1
- 0
inc/external/ge/ge_api_types.h View File

@@ -67,6 +67,7 @@ const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOp
const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";

// Option key: memory init
const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";


+ 53
- 35
inc/framework/common/debug/ge_log.h View File

@@ -33,7 +33,7 @@
extern "C" {
#endif

#define GE_MODULE_NAME static_cast<int>(GE)
#define GE_MODULE_NAME static_cast<int32_t>(GE)

// trace status of log
enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };
@@ -51,43 +51,61 @@ class GE_FUNC_VISIBILITY GeLog {
};

inline bool IsLogEnable(int module_name, int log_level) {
int32_t enable = CheckLogLevel(module_name, log_level);
const int32_t enable = CheckLogLevel(module_name, log_level);
// 1:enable, 0:disable
return (enable == 1);
}

#define GELOGE(ERROR_CODE, fmt, ...) \
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__)
#define GELOGW(fmt, ...) \
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
#define GELOGI(fmt, ...) \
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)
#define GELOGD(fmt, ...) \
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)

#define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__)

#define GELOGT(VALUE, fmt, ...) \
do { \
TraceStatus stat = VALUE; \
const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \
int idx = static_cast<int>(stat); \
char *k = const_cast<char *>("status"); \
char *v = const_cast<char *>(TraceStatStr[idx]); \
KeyValue kv = {k, v}; \
DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \
##__VA_ARGS__); \
} while (0)

#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__)
#define GELOGE(ERROR_CODE, fmt, ...) \
do { \
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__); \
} while (false)

#define GELOGW(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) { \
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
} while (false)

#define GELOGI(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) { \
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
} while (false)

#define GELOGD(fmt, ...) \
do { \
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { \
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} \
} while (false)

#define GEEVENT(fmt, ...) \
do { \
dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} while (false)

#define GELOGT(VALUE, fmt, ...) \
do { \
TraceStatus stat = VALUE; \
const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \
const int32_t idx = static_cast<int32_t>(stat); \
char *k = const_cast<char *>("status"); \
char *v = const_cast<char *>(TraceStatStr[idx]); \
KeyValue kv = {k, v}; \
DlogWithKV(GE_MODULE_NAME, DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), &__FUNCTION__[0], ##__VA_ARGS__); \
} while (false)

#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \
do { \
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
##__VA_ARGS__); \
} while (false)

// print memory when it is greater than 1KB.
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \
@@ -95,7 +113,7 @@ inline bool IsLogEnable(int module_name, int log_level) {
if ((SIZE) > 1024) { \
GELOGI("MallocMemory, func=%s, size=%zu, purpose=%s", (#FUNC), static_cast<size_t>(SIZE), (PURPOSE)); \
} \
} while (0);
} while (false)
#ifdef __cplusplus
}
#endif


+ 70
- 70
inc/framework/common/debug/log.h View File

@@ -52,82 +52,82 @@
GELOGW(__VA_ARGS__); \
}

#define GE_LOGE_IF(condition, ...) \
if ((condition)) { \
DOMI_LOGE(__VA_ARGS__); \
#define GE_LOGE_IF(condition, ...) \
if ((condition)) { \
GELOGE(ge::FAILED, __VA_ARGS__); \
}

// If expr is not SUCCESS, print the log and return the same value
#define GE_CHK_STATUS_RET(expr, ...) \
do { \
const ge::Status _status = (expr); \
if (_status != ge::SUCCESS) { \
DOMI_LOGE(__VA_ARGS__); \
return _status; \
} \
} while (0);
#define GE_CHK_STATUS_RET(expr, ...) \
do { \
const ge::Status _chk_status = (expr); \
if (_chk_status != ge::SUCCESS) { \
GELOGE(ge::FAILED, __VA_ARGS__); \
return _chk_status; \
} \
} while (false)

// If expr is not SUCCESS, print the log and do not execute return
#define GE_CHK_STATUS(expr, ...) \
do { \
const ge::Status _status = (expr); \
if (_status != ge::SUCCESS) { \
DOMI_LOGE(__VA_ARGS__); \
} \
} while (0);
#define GE_CHK_STATUS(expr, ...) \
do { \
const ge::Status _chk_status = (expr); \
if (_chk_status != ge::SUCCESS) { \
GELOGE(ge::FAILED, __VA_ARGS__); \
} \
} while (false)

// If expr is not SUCCESS, return the same value
#define GE_CHK_STATUS_RET_NOLOG(expr) \
do { \
const ge::Status _status = (expr); \
if (_status != ge::SUCCESS) { \
return _status; \
} \
} while (0);
#define GE_CHK_STATUS_RET_NOLOG(expr) \
do { \
const ge::Status _chk_status = (expr); \
if (_chk_status != ge::SUCCESS) { \
return _chk_status; \
} \
} while (false)

// If expr is not GRAPH_SUCCESS, print the log and return FAILED
#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \
do { \
if ((expr) != ge::GRAPH_SUCCESS) { \
REPORT_CALL_ERROR("E19999", "Operator graph failed"); \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
return FAILED; \
} \
} while (0);
} while (false)

// If expr is not SUCCESS, print the log and execute a custom statement
#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...) \
do { \
const ge::Status _status = (expr); \
GE_CHK_BOOL_EXEC(_status == SUCCESS, exec_expr, __VA_ARGS__); \
} while (0);
#define GE_CHK_STATUS_EXEC(expr, exec_expr, ...) \
do { \
const ge::Status _chk_status = (expr); \
GE_CHK_BOOL_EXEC(_chk_status == SUCCESS, exec_expr, __VA_ARGS__); \
} while (false)

// If expr is not true, print the log and return the specified status
#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \
do { \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
REPORT_INNER_ERROR("E19999", __VA_ARGS__); \
GELOGE(_status, __VA_ARGS__); \
return _status; \
} \
} while (0);
} while (false)

// If expr is not true, print the log and return the specified status
#define GE_CHK_BOOL_RET_STATUS_NOLOG(expr, _status, ...) \
do { \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
return _status; \
} \
} while (0);
} while (false)

// If expr is not true, print the log and execute a custom statement
#define GE_CHK_BOOL_EXEC(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
exec_expr; \
} \
}
@@ -135,7 +135,7 @@
// If expr is not true, print the log and execute a custom statement
#define GE_CHK_BOOL_EXEC_WARN(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
GELOGW(__VA_ARGS__); \
exec_expr; \
@@ -144,7 +144,7 @@
// If expr is not true, print the log and execute a custom statement
#define GE_CHK_BOOL_EXEC_INFO(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
GELOGI(__VA_ARGS__); \
exec_expr; \
@@ -154,7 +154,7 @@
// If expr is not true, print the log and execute a custom statement
#define GE_CHK_BOOL_TRUE_EXEC_INFO(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (b) { \
GELOGI(__VA_ARGS__); \
exec_expr; \
@@ -164,16 +164,16 @@
// If expr is true, print logs and execute custom statements
#define GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (b) { \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
exec_expr; \
} \
}
// If expr is true, print the Information log and execute a custom statement
#define GE_CHK_TRUE_EXEC_INFO(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (b) { \
GELOGI(__VA_ARGS__); \
exec_expr; \
@@ -183,9 +183,9 @@
// If expr is not SUCCESS, print the log and execute the expression + return
#define GE_CHK_BOOL_TRUE_RET_VOID(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (b) { \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
exec_expr; \
return; \
} \
@@ -194,10 +194,10 @@
// If expr is not SUCCESS, print the log and execute the expression + return _status
#define GE_CHK_BOOL_TRUE_EXEC_RET_STATUS(expr, _status, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (b) { \
REPORT_INNER_ERROR("E19999", __VA_ARGS__); \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
exec_expr; \
return _status; \
} \
@@ -206,7 +206,7 @@
// If expr is not true, execute a custom statement
#define GE_CHK_BOOL_EXEC_NOLOG(expr, exec_expr) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
exec_expr; \
} \
@@ -214,34 +214,34 @@

// -----------------runtime related macro definitions-------------------------------
// If expr is not RT_ERROR_NONE, print the log
#define GE_CHK_RT(expr) \
do { \
rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
} \
} while (0);
#define GE_CHK_RT(expr) \
do { \
const rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
} \
} while (false)

// If expr is not RT_ERROR_NONE, print the log and execute the exec_expr expression
#define GE_CHK_RT_EXEC(expr, exec_expr) \
{ \
rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
exec_expr; \
} \
}
#define GE_CHK_RT_EXEC(expr, exec_expr) \
do { \
const rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
exec_expr; \
} \
} while (false)

// If expr is not RT_ERROR_NONE, print the log and return
#define GE_CHK_RT_RET(expr) \
do { \
rtError_t _rt_ret = (expr); \
const rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
REPORT_CALL_ERROR("E19999", "Call %s fail, ret: 0x%X", #expr, _rt_ret); \
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
GELOGE(ge::FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
return RT_ERROR_TO_GE_STATUS(_rt_ret); \
} \
} while (0);
} while (false)

// If expr is true, execute exec_expr without printing logs
#define GE_IF_BOOL_EXEC(expr, exec_expr) \
@@ -256,7 +256,7 @@
try { \
exec_expr0; \
} catch (const std::bad_alloc &) { \
DOMI_LOGE("Make shared failed"); \
GELOGE(ge::FAILED, "Make shared failed"); \
exec_expr1; \
}

@@ -274,13 +274,13 @@

#define GE_CHK_LOG_AND_ERRORMSG(expr, _status, errormsg) \
do { \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
GELOGE(_status, "%s", errormsg); \
ErrorManager::GetInstance().ATCReportErrMessage("E19021", {"reason"}, {errormsg}); \
return _status; \
} \
} while (0)
} while (false)

template <typename T>
GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {


+ 21
- 21
inc/framework/common/ge_inner_error_codes.h View File

@@ -61,29 +61,29 @@ enum ErrorLevel {
CRITICAL_LEVEL = 0b100,
};

// Each module defines error codes using the following macros
// Each module defines error codes using the following macros, name can not be modified to (name)
#define GE_ERRORNO_COMMON(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc))
#define GE_ERRORNO_CLIENT(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc))
#define GE_ERRORNO_INIT(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc))
#define GE_ERRORNO_SESSION(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc))
#define GE_ERRORNO_GRAPH(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc))
#define GE_ERRORNO_ENGINE(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc))
#define GE_ERRORNO_OPS(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc))
#define GE_ERRORNO_PLUGIN(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc))
#define GE_ERRORNO_RUNTIME(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc))
#define GE_ERRORNO_EXECUTOR(name, value, desc) \
GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, value, desc)
GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc))
#define GE_ERRORNO_GENERATOR(name, value, desc) \
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, value, desc)
GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc))

// Get error code description
#define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)
@@ -125,13 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized.
GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963

// Init module error code definition
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050
GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051
GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050
GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051
GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052
GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5,
"Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053
"Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053

// Session module error code definition
GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144
@@ -216,8 +216,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed.");
GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device"); // 1343246338

// Optimize errocode
GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303
GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304
GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303
GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304

// Ops module error code definition
GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore."); // 1343250432
@@ -313,7 +313,7 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed.");
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed.");

#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR)
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<const Status>(RT_ERROR)
} // namespace ge

#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_

+ 13
- 4
inc/framework/common/ge_types.h View File

@@ -279,10 +279,19 @@ struct TaskDescInfo {
};

struct OpDescInfo {
std::string op_name;
std::string op_type;
uint32_t task_id;
uint32_t stream_id;
std::string op_name = "";
std::string op_type = "";
uint32_t task_id = 0;
uint32_t stream_id = 0;
uint32_t imply_type = 0;
uint32_t block_dim = 0;
std::string op_file_path = "";
std::string dev_func = "";
std::string tvm_magic = "";
uint32_t tiling_key = 0;
std::string tiling_data = "";
std::string node_info = "";
std::vector<int64_t> workspace_bytes;
std::vector<Format> input_format;
std::vector<std::vector<int64_t>> input_shape;
std::vector<DataType> input_data_type;


+ 1
- 1
inc/framework/common/op/ge_op_utils.h View File

@@ -95,7 +95,7 @@ class GE_FUNC_VISIBILITY OpUtils {
/// @param [out] aipp_params aipp parameters
/// @return enum of tagCCAippInputFormat
///
static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params);
static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params);
static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector);
template <typename T>
static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output,


+ 2
- 2
inc/framework/common/string_util.h View File

@@ -78,8 +78,8 @@ class GE_FUNC_VISIBILITY StringUtils {
/// @param [in] delim separator
/// @return string array after segmentation
///
static std::vector<std::string> Split(const std::string &str, char delim) {
std::vector<std::string> elems;
static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, char delim) {
std::vector<std::string, std::allocator<std::string>> elems;

if (str.empty()) {
elems.emplace_back("");


+ 2
- 0
inc/framework/common/types.h View File

@@ -339,6 +339,8 @@ REGISTER_OPTYPE_DECLARE(PLACEHOLDER, "PlaceHolder");
REGISTER_OPTYPE_DECLARE(END, "End");
REGISTER_OPTYPE_DECLARE(BASICLSTMCELL, "BasicLSTMCell");
REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext");
REGISTER_OPTYPE_DECLARE(ITERATOR, "Iterator");
REGISTER_OPTYPE_DECLARE(ITERATORV2, "IteratorV2");
REGISTER_OPTYPE_DECLARE(INITDATA, "InitData");
REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape")
REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity");


+ 95
- 109
inc/framework/common/util.h View File

@@ -18,8 +18,8 @@
#define INC_FRAMEWORK_COMMON_UTIL_H_

#include <google/protobuf/text_format.h>
#include <limits.h>
#include <math.h>
#include <climits>
#include <cmath>
#include <sstream>
#include <string>
#include <vector>
@@ -30,17 +30,17 @@
#include "framework/common/ge_inner_error_codes.h"
#include "mmpa/mmpa_api.h"

#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \
do { \
if (size <= 0) { \
DOMI_LOGE("param[%s] is not a positive number", #size); \
return PARAM_INVALID; \
} \
} while (0)
#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \
do { \
if (size <= 0) { \
GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \
return PARAM_INVALID; \
} \
} while (false)

#define CHECK_FALSE_EXEC(expr, exec_expr, ...) \
{ \
bool b = (expr); \
const bool b = (expr); \
if (!b) { \
exec_expr; \
} \
@@ -59,131 +59,133 @@
});

// For propagating errors when calling a function.
#define GE_RETURN_IF_ERROR(expr) \
do { \
const ::ge::Status _status = (expr); \
if (_status) return _status; \
} while (0)
#define GE_RETURN_IF_ERROR(expr) \
do { \
const ge::Status _chk_status = (expr); \
if (_chk_status != ge::SUCCESS) { \
return _chk_status; \
} \
} while (false)

#define GE_RETURN_WITH_LOG_IF_ERROR(expr, ...) \
do { \
const ::ge::Status _status = (expr); \
if (_status) { \
DOMI_LOGE(__VA_ARGS__); \
return _status; \
const ge::Status _chk_status = (expr); \
if (_chk_status != ge::SUCCESS) { \
GELOGE(ge::FAILED, __VA_ARGS__); \
return _chk_status; \
} \
} while (0)
} while (false)

// check whether the parameter is true. If it is, return FAILED and record the error log
#define GE_RETURN_WITH_LOG_IF_TRUE(condition, ...) \
do { \
if (condition) { \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
return ge::FAILED; \
} \
} while (0)
} while (false)

// Check if the parameter is false. If yes, return FAILED and record the error log
#define GE_RETURN_WITH_LOG_IF_FALSE(condition, ...) \
do { \
bool _condition = (condition); \
const bool _condition = (condition); \
if (!_condition) { \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
return ge::FAILED; \
} \
} while (0)
} while (false)

// Checks whether the parameter is true. If so, returns PARAM_INVALID and records the error log
#define GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(condition, ...) \
do { \
if (condition) { \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
return ge::PARAM_INVALID; \
} \
} while (0)
} while (false)

// Check if the parameter is false. If yes, return PARAM_INVALID and record the error log
#define GE_RT_PARAM_INVALID_WITH_LOG_IF_FALSE(condition, ...) \
do { \
bool _condition = (condition); \
const bool _condition = (condition); \
if (!_condition) { \
DOMI_LOGE(__VA_ARGS__); \
GELOGE(ge::FAILED, __VA_ARGS__); \
return ge::PARAM_INVALID; \
} \
} while (0)
} while (false)

// Check if the parameter is null. If yes, return PARAM_INVALID and record the error
#define GE_CHECK_NOTNULL(val) \
do { \
if (val == nullptr) { \
REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \
DOMI_LOGE("[Check][Param:%s]null is invalid.", #val); \
GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val); \
return ge::PARAM_INVALID; \
} \
} while (0)
} while (false)

// Check if the parameter is null. If yes, just return and record the error
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \
do { \
if (val == nullptr) { \
DOMI_LOGE("param[%s] must not be null.", #val); \
return; \
} \
} while (0)
#define GE_CHECK_NOTNULL_JUST_RETURN(val) \
do { \
if (val == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
return; \
} \
} while (false)

// Check whether the parameter is null. If so, execute the exec_expr expression and record the error log
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \
do { \
if (val == nullptr) { \
DOMI_LOGE("param[%s] must not be null.", #val); \
exec_expr; \
} \
} while (0)
#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \
do { \
if (val == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
exec_expr; \
} \
} while (false)

// Check whether the parameter is null. If yes, return directly and record the error log
#define GE_RT_VOID_CHECK_NOTNULL(val) \
do { \
if (val == nullptr) { \
DOMI_LOGE("param[%s] must not be null.", #val); \
return; \
} \
} while (0)
#define GE_RT_VOID_CHECK_NOTNULL(val) \
do { \
if (val == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
return; \
} \
} while (false)

// Check if the parameter is null. If yes, return false and record the error log
#define GE_RT_FALSE_CHECK_NOTNULL(val) \
do { \
if (val == nullptr) { \
DOMI_LOGE("param[%s] must not be null.", #val); \
return false; \
} \
} while (0)
#define GE_RT_FALSE_CHECK_NOTNULL(val) \
do { \
if (val == nullptr) { \
GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
return false; \
} \
} while (false)

// Check if the parameter is out of bounds
#define GE_CHECK_SIZE(size) \
do { \
if (size == 0) { \
DOMI_LOGE("param[%s] is out of range", #size); \
return ge::PARAM_INVALID; \
} \
} while (0)
#define GE_CHECK_SIZE(size) \
do { \
if (size == 0) { \
GELOGE(ge::FAILED, "param[%s] is out of range", #size); \
return ge::PARAM_INVALID; \
} \
} while (false)

// Check if the value on the left is greater than or equal to the value on the right
#define GE_CHECK_GE(lhs, rhs) \
do { \
if (lhs < rhs) { \
DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \
return ge::PARAM_INVALID; \
} \
} while (0)
#define GE_CHECK_GE(lhs, rhs) \
do { \
if (lhs < rhs) { \
GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \
return ge::PARAM_INVALID; \
} \
} while (false)

// Check if the value on the left is less than or equal to the value on the right
#define GE_CHECK_LE(lhs, rhs) \
do { \
if (lhs > rhs) { \
DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \
return ge::PARAM_INVALID; \
} \
} while (0)
#define GE_CHECK_LE(lhs, rhs) \
do { \
if (lhs > rhs) { \
GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \
return ge::PARAM_INVALID; \
} \
} while (false)

#define GE_DELETE_NEW_SINGLE(var) \
do { \
@@ -191,7 +193,7 @@
delete var; \
var = nullptr; \
} \
} while (0)
} while (false)

#define GE_DELETE_NEW_ARRAY(var) \
do { \
@@ -199,18 +201,18 @@
delete[] var; \
var = nullptr; \
} \
} while (0)
} while (false)

#define GE_FREE_RT_LOG(addr) \
do { \
if (addr != nullptr) { \
rtError_t error = rtFree(addr); \
const rtError_t error = rtFree(addr); \
if (error != RT_ERROR_NONE) { \
GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \
} \
addr = nullptr; \
} \
} while (0)
} while (false)

/**
* @ingroup domi_common
@@ -228,12 +230,6 @@ using google::protobuf::Message;

///
/// @ingroup domi_common
/// @brief Maximum file path length
///
const int32_t DOMI_MAX_PATH_LEN = 256;

///
/// @ingroup domi_common
/// @brief Reads the proto structure from an array.
/// @param [in] data proto data to be read
/// @param [in] size proto data size
@@ -253,8 +249,6 @@ GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int size, Message *
///
GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message);

GE_FUNC_VISIBILITY bool ReadProtoFromMem(const char *data, int size, google::protobuf::Message *message);

///
/// @ingroup: domi_common
/// @brief: get length of file
@@ -306,10 +300,10 @@ GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) {
ss << x;
ss << ", ";
}
std::string strRet =
ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line.
strRet += "]";
return strRet;
// Delete the two extra characters at the end of the line.
std::string str = ss.str().substr(0u, ss.str().length() - 2u);
str += "]";
return str;
}

///
@@ -326,10 +320,10 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T>
ss << x;
ss << ", ";
}
std::string strRet =
ss.str().substr(0, ss.str().length() - 2); // Delete the two extra characters at the end of the line.
strRet += "]";
return strRet;
// Delete the two extra characters at the end of the line.
std::string str = ss.str().substr(0u, ss.str().length() - 2u);
str += "]";
return str;
}

///
@@ -394,14 +388,6 @@ GE_FUNC_VISIBILITY bool ValidateStr(const std::string &filePath, const std::stri

///
/// @ingroup domi_common
/// @brief Check whether the file is normal file.
/// @param [in] file_path file path
/// @param [out] result
///
GE_FUNC_VISIBILITY bool IsValidFile(const char *file_path);

///
/// @ingroup domi_common
/// @brief Check path invalid
/// @param [in] path, path to be checked
/// @param [in] length, length of path


+ 20
- 3
inc/framework/engine/dnnengine.h View File

@@ -43,14 +43,31 @@ struct DNNEngineAttribute {
// If engine input format must be specific, set this attribute, else set FORMAT_RESERVED
Format engine_input_format;
Format engine_output_format;
bool atomic_engine_flag;
};

class GE_FUNC_VISIBILITY DNNEngine {
public:
DNNEngine() = default;
explicit DNNEngine(const DNNEngineAttribute &attrs) {
engine_attribute_ = attrs;
}
virtual ~DNNEngine() = default;
virtual Status Initialize(const std::map<std::string, std::string> &options) = 0;
virtual Status Finalize() = 0;
virtual void GetAttributes(DNNEngineAttribute &attr) const = 0;
Status Initialize(const std::map<std::string, std::string> &options) {
return SUCCESS;
}
Status Finalize() {
return SUCCESS;
}
void GetAttributes(DNNEngineAttribute &attr) const {
attr = engine_attribute_;
}
bool IsAtomic() const {
return engine_attribute_.atomic_engine_flag;
}

protected:
DNNEngineAttribute engine_attribute_;
};
} // namespace ge



+ 10
- 3
inc/framework/generator/ge_generator.h View File

@@ -34,13 +34,16 @@ namespace ge {
class GeRootModel;
class GE_FUNC_VISIBILITY GeGenerator {
public:
using InOutTensorRef = std::pair<const vector<ge::GeTensor> &, const vector<ge::GeTensor> &>;
static GeGenerator &GetInstance() {
static GeGenerator Instance;
return Instance;
}
GeGenerator() = default;

~GeGenerator() { (void)Finalize(); }
~GeGenerator() {
(void)Finalize();
}

GeGenerator(const GeGenerator &) = delete;

@@ -94,8 +97,8 @@ class GE_FUNC_VISIBILITY GeGenerator {
/// @param [in] graph_name: graph name.
/// @param [out] graph: graph of single op.
/// @return SUCCESS or FAILED
Status BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
std::string graph_name, Graph &graph);
Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type);

private:
Status GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
@@ -110,6 +113,10 @@ class GE_FUNC_VISIBILITY GeGenerator {

using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>;
Status SetModelNameForDump(const GeRootModelPtr &ge_root_model);
Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs,
const std::vector<GeTensor> &outputs,
const std::vector<std::pair<std::string, std::string>> &inputs_name_type,
std::vector<ge::NamedAttrs> &generalized_build_attrs);

class Impl;



+ 1
- 0
inc/framework/omg/omg_inner_types.h View File

@@ -128,6 +128,7 @@ struct OmgContext {
bool fuzz_compile_flag = false;
std::string atc_cmdline;
bool user_attr_index_valid = false;
bool is_online_model = false;
};
} // namespace ge



+ 1
- 1
inc/framework/omg/version.h View File

@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager {
~PlatformVersionManager() = delete;
static Status GetPlatformVersion(std::string &ver) {
ver = "1.11.z";
std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
const std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;);

GELOGI("Read current platform version: %s.", ver.c_str());


+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 60df4b39a6f639c21dd7deb220b93345451938f5
Subproject commit f011a4c7ad36e1ec80990e659abefc78b0aa7543

+ 107
- 109
third_party/fwkacllib/inc/external/runtime/rt_error_codes.h View File

@@ -1,109 +1,107 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

static const int32_t ACL_RT_SUCCESS = 0; // success

static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout

static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource

static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal

static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect

#ifdef __cplusplus
}
#endif

#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
/**
* @file rt_error_codes.h
*
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal
static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038; // can not change die mode
static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039; // single die mode can not set die
static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040; // invalid die id
static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041; // die mode not set
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect
#ifdef __cplusplus
}
#endif
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

+ 4
- 4
third_party/fwkacllib/inc/ops/array_ops.h View File

@@ -1258,21 +1258,21 @@ REG_OP(ExpandD)
* Three inputs, including:
* @li bucket_list: A 1-D tensor of type int32 with the value of ivf_counts and ivf_offset index. \n
* @li ivf_counts: A 1-D tensor of type int32 with the value of ivf counts. \n
* @li ivf_offset: A 1-D tensor of type int32 with the value of ivf offset. \n
* @li ivf_offset: A 1-D tensor of type int32 or int64 with the value of ivf offset. \n

* @par Attributes:
* total_limit: A int64 type maximum value of the sum of ivf_counts corresponding to bucket_list. \n

* @par Outputs:
* @li buckets_limit: A 1-D tensor of type int32 with the sum <= total_limit. \n
* @li buckets_offset: A 1-D tensor of type int32 with the value of ivf_offset corresponding to bucket_list. \n
* @li buckets_offset: A 1-D tensor of type int32 or int64 with the value of ivf_offset corresponding to bucket_list. \n
*/
REG_OP(CalcBucketsLimitAndOffset)
.INPUT(bucket_list, TensorType({DT_INT32}))
.INPUT(ivf_counts, TensorType({DT_INT32}))
.INPUT(ivf_offset, TensorType({DT_INT32}))
.INPUT(ivf_offset, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(buckets_limit, TensorType({DT_INT32}))
.OUTPUT(buckets_offset, TensorType({DT_INT32}))
.OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
.REQUIRED_ATTR(total_limit, Int)
.OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)



+ 58
- 0
third_party/fwkacllib/inc/ops/cluster.h View File

@@ -0,0 +1,58 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*!
* \file cluster.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_
#define OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_

#include "graph/operator_reg.h"
#include "graph/operator.h"

namespace ge {
/**
* @brief Perform k-means clustering on a data matrix. \n

* @par Inputs:
* Three required inputs and one optional inputs, including: \n
* @li x: A 2D tensor of data type float32. \n
* @li y: A 2D tensor of data type float32. \n
* @li sum_square_x: An optional 2D tensor of data type float32. \n
* @li sum_square_y: A 2D tensor of data type float32. \n

* @par Attributes:
* use_actual_distance: Indicates whether to calculate the complete distance. \n

* @par Outputs:
* @li segment_sum: A tensor of data type float32. \n
* @li segment_count: A tensor of data type float32. \n
* @li k_mean_total_sum: A tensor of data type float32. \n
*/
REG_OP(KMeansCentroids)
.INPUT(x, TensorType({DT_FLOAT}))
.INPUT(y, TensorType({DT_FLOAT}))
.INPUT(sum_square_y, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(sum_square_x, TensorType({DT_FLOAT}))
.OUTPUT(segment_sum, TensorType({DT_FLOAT}))
.OUTPUT(segment_count, TensorType({DT_FLOAT}))
.OUTPUT(kmean_total_sum, TensorType({DT_FLOAT}))
.ATTR(use_actual_distance, Bool, false)
.OP_END_FACTORY_REG(KMeansCentroids)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_CLUSTER_H_

+ 5
- 0
third_party/fwkacllib/inc/ops/data_flow_ops.h View File

@@ -2336,12 +2336,14 @@ REG_OP(CacheAllIndexToLocal)

/**
*@brief LRUCacheV2, aicore LRUCache.

*@par Inputs:
*index_list: exchange index list
*data: host data
*cache: gm cache
*tag: cache's tag
*is_last_call: if is last call write all cache to data

*@par Outputs:
*data: output data
*cache: gm cache
@@ -2349,8 +2351,11 @@ REG_OP(CacheAllIndexToLocal)
*index_offset_list: index_offset_list
*not_in_cache_index_list: output not in cache's index_list
*not_in_cache_number: scalar

*@par Attributes:
*pre_route_count: types of all outputs

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(LRUCacheV2)


+ 27
- 27
third_party/fwkacllib/inc/ops/elewise_calculation_ops.h View File

@@ -3391,57 +3391,57 @@ REG_OP(TensorRedirect)
.OP_END_FACTORY_REG(TensorRedirect)

/**
* @brief Performs the element-wise division of tensor x2 by tensor x3,
* multiply the result by the scalar value and add it to tensor x1
* @brief Performs the element-wise division of tensor x1 by tensor x2,
* multiply the result by the scalar value and add it to tensor input_data.

* @par Inputs:
* Four inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32.
* @li x1: A mutable input Tensor of the same type as x1.
* @li x2: A mutable input Tensor of the same type as x1.
* float16, float32, double, int64.
* @li x1: A mutable input Tensor of the same type as input_data.
* @li x2: A mutable input Tensor of the same type as input_data.
* @li value: A mutable input Tensor. Must be one of the following types:
* float16, float32, int32. \n
* float16, float32, double, int64, int32. \n


* @par Outputs:
* y: A mutable Tensor. Has the same type as "x1". \n
* y: A mutable Tensor. Has the same type as input_data. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcdiv.
* Compatible with the Pytorch operator Addcdiv(version-1.5.0).
*/
REG_OP(Addcdiv)
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT32, DT_DOUBLE, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64}))
.OP_END_FACTORY_REG(Addcdiv)

/**
* @brief Performs the element-wise multiplication of tensor x2 by tensor x3,
* @brief Performs the element-wise multiplication of tensor x1 by tensor x2,
* multiply the result by the scalar value and add it to tensor input_data


* @par Inputs:
* Four inputs, including:
* @li input_data: A mutable input Tensor. Must be one of the following types:
* float16, float32, int8, int32, uint8.
* @li x1: A mutable input Tensor of the same type as x1.
* @li x2: A mutable input Tensor of the same type as x1.
* @li value: A tensor which includes only one element of the same type as x1. \n
* float16, float32, double, int64, int8, int32, uint8.
* @li x1: A mutable input Tensor of the same type as input_data.
* @li x2: A mutable input Tensor of the same type as input_data.
* @li value: A tensor which includes only one element of the same type as input_data. \n

* @par Outputs:
* y: A mutable output Tensor. Has the same type as "x1". \n
* y: A mutable output Tensor. Has the same type as input_data. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator Addcmul.
*/
REG_OP(Addcmul)
.INPUT(input_data, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.INPUT(value, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.OUTPUT(y, TensorType({ DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8 }))
.INPUT(input_data, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
.INPUT(value, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, DT_DOUBLE, DT_INT64}))
.OP_END_FACTORY_REG(Addcmul)

/**
@@ -3508,8 +3508,8 @@ REG_OP(StrideAdd)
* Compatible with the Pytorch equal operator. \n
*/
REG_OP(TensorEqual)
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
.INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8}))
.INPUT(input_y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_INT8, DT_UINT8}))
.OUTPUT(output_z, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(TensorEqual)



+ 4
- 4
third_party/fwkacllib/inc/ops/linalg_ops.h View File

@@ -410,10 +410,10 @@ form square matrices. \n
*/

REG_OP(Svd)
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT }))
.OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT }))
.INPUT(x, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
.OUTPUT(sigma, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
.OUTPUT(u, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
.OUTPUT(v, TensorType({ DT_DOUBLE, DT_FLOAT, DT_COMPLEX64, DT_COMPLEX128 }))
.ATTR(compute_uv, Bool, true)
.ATTR(full_matrices, Bool, false)
.OP_END_FACTORY_REG(Svd)


+ 53
- 2
third_party/fwkacllib/inc/ops/math_ops.h View File

@@ -480,7 +480,7 @@ REG_OP(HistogramFixedWidth)
.INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(nbins, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_INT32}))
.ATTR(dtype, String, "int32")
.ATTR(dtype, Int, 3)
.OP_END_FACTORY_REG(HistogramFixedWidth)

/**
@@ -511,7 +511,7 @@ REG_OP(HistogramFixedWidthD)
.INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.OUTPUT(y, TensorType({DT_INT32}))
.REQUIRED_ATTR(nbins, Int)
.ATTR(dtype, String, "int32")
.ATTR(dtype, Int, 3)
.OP_END_FACTORY_REG(HistogramFixedWidthD)

/**
@@ -939,6 +939,57 @@ REG_OP(LpNorm)
.OP_END_FACTORY_REG(LpNorm)

/**
* @brief Computes LpNormReduce.

* @par Inputs:
* x: An ND tensor of type float16, float32. \n
*
* @par Attributes:
* @li p: Int, "inf" or "-inf", default value is 2.
* @li axes: ListInt, {} means all axes will be computed.
* @li keepdim: Bool, default is false.
* @li epsilon: Float, default is 1e-12. \n

* @par Outputs:
* y: An ND tensor of type float16, float32. The shape of y is depending
* on axes and keepdim. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator LpNormReduce.
*/
REG_OP(LpNormReduce)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(p, Int, 2)
.ATTR(axes, ListInt, {})
.ATTR(keepdim, Bool, false)
.ATTR(epsilon, Float, 1e-12)
.OP_END_FACTORY_REG(LpNormReduce)

/**
* @brief Computes LpNormUpdate.

* @par Inputs:
* x: An ND tensor of type float16, float32. \n
*
* @par Attributes:
* @li p: Int, "inf" or "-inf", default value is 2.
* @li epsilon: Float, default is 1e-12. \n

* @par Outputs:
* y: An ND tensor of type float16, float32. \n

* @par Third-party framework compatibility
* Compatible with the Pytorch operator LpNormUpdate.
*/
REG_OP(LpNormUpdate)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(p, Int, 2)
.ATTR(epsilon, Float, 1e-12)
.OP_END_FACTORY_REG(LpNormUpdate)

/**
* @brief get complex.

* @par Inputs:


+ 72
- 8
third_party/fwkacllib/inc/ops/matrix_calculation_ops.h View File

@@ -49,10 +49,10 @@ namespace ge {
* Compatible with the TensorFlow operator BatchMatmul.
*/
REG_OP(MatMul)
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.ATTR(transpose_x1, Bool, false)
.ATTR(transpose_x2, Bool, false)
.OP_END_FACTORY_REG(MatMul)
@@ -88,10 +88,10 @@ REG_OP(MatMul)
* Compatible with the TensorFlow operator BatchMatmul.
*/
REG_OP(MatMulV2)
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8 DT_INT4}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
.INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
.INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
.ATTR(transpose_x1, Bool, false)
.ATTR(transpose_x2, Bool, false)
@@ -532,6 +532,36 @@ REG_OP(ScatterAdd)
.OP_END_FACTORY_REG(ScatterAdd)

/**
*@brief Adds sparse "updates" to a variable reference . \n

*@par Inputs:
* Three inputs, including:
*@li var: An ND Tensor .
*Must be one of the following types: float16, float32, int32, int8, uint8

*@li indices: An ND Tensor of type int32 or int64

*@li updates: An ND Tensor .
*Must be one of the following types: float16, float32, int32, int8, uint8

*@par Attributes:
* axis: An required int. The axis along which to index. \n

*@par Outputs:
*var: A Tensor. Has the same type and format as input "var" . \n

*@par Third-party framework compatibility
* Compatible with the pytorch operator ScatterAdd.
*/
REG_OP(ScatterAddWithAxis)
.INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.INPUT(indices, TensorType::IndexNumberType())
.INPUT(updates, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.OUTPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8}))
.REQUIRED_ATTR(axis, Int)
.OP_END_FACTORY_REG(ScatterAddWithAxis)

/**
*@brief Divides a variable reference by sparse updates . \n

*@par Inputs:
@@ -1067,6 +1097,40 @@ REG_OP(MatrixSetDiagV2)
.OP_END_FACTORY_REG(MatrixSetDiagV2)

/**
*@brief Returns a batched matrix tensor with new batched diagonal values . \n

*@par Inputs:
* Three inputs, including:
*@li input: "Rank `r+1`, where `r >= 1`. \n

*@li diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. \n

*@li k:
*Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main \n
*diagonal, and negative value means subdiagonals. `k` can be a single integer \n
*(for a single diagonal) or a pair of integers specifying the low and high ends \n
*of a matrix band. `k[0]` must not be larger than `k[1]`. \n

*@par Attributes:
*@li align: An optional string. Defaults to RIGHT_LEFT. It is a string specifying \n
*how superdiagonals and subdiagonals should be aligned, respectively. \n
*other optional: LEFT_RIGHT, LEFT_LEFT, and RIGHT_RIGHT.\n

*@par Outputs:
*output: Rank `r+1`, with `output.shape = input.shape` . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator ScatterUpdate.
*/
REG_OP(MatrixSetDiagV3)
.INPUT(input, TensorType::BasicType())
.INPUT(diagonal, TensorType::BasicType())
.INPUT(k, TensorType({DT_INT32}))
.OUTPUT(output, TensorType::BasicType())
.ATTR(align, String, "RIGHT_LEFT")
.OP_END_FACTORY_REG(MatrixSetDiagV3)

/**
*@brief Returns a batched diagonal tensor with given batched diagonal values . \n

*@par Inputs:


+ 23
- 0
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -142,6 +142,29 @@ REG_OP(BatchNorm)
.OP_END_FACTORY_REG(BatchNorm)

/**
*@brief part of SyncBatchNormBackward . \n

*@par Inputs:
* Three inputs, including:
*@li sum_dy: A Tensor. Must be one of the following types: float16, float32 .
*@li sum_dy_dx_pad: A Tensor. Must be one of the following types: float16, float32 .
*@li mean: A Tensor. Must be one of the following types: float16, float32 .
*@li invert_std: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Outputs:
*@li sum_dy_xmu: A Tensor. Has the same type and format as input "sum_dy"
*@li y: A Tensor. Has the same type and format as input "sum_dy" . \n
*/
REG_OP(SyncBatchNormBackwardReduce)
.INPUT(sum_dy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(sum_dy_dx_pad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(invert_std, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(sum_dy_xmu, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(SyncBatchNormBackwardReduce)
/**
*@brief Performs batch normalization . \n

*@par Inputs:


+ 111
- 138
third_party/fwkacllib/inc/ops/nn_calculation_ops.h View File

@@ -369,16 +369,14 @@ REG_OP(BiasAddGrad)
*\n
* The following are the supported data types and data formats:\n
*\n
| Tensor | out_bckprop | filter | y\n
------------|-------------|---------|--------\n
| Data Type | float16 | float16 | float16\n
| |-------------|---------|--------\n
| | float32 | float32 | float32\n
| |-------------|---------|--------\n
| | float64 | float64 | float64\n
------------|-------------|---------|--------\n
| Format | NCHW | NCHW | NCHW\n
| | NHWC | HWCN | NHWC\n
*\n
| Tensor | out_bckprop | filter | y |\n
|-----------|-------------|---------|--------|\n
| Data Type | float16 | float16 | float16|\n
| | float32 | float32 | float32|\n
| | float64 | float64 | float64|\n
| Format | NCHW | NCHW | NCHW |\n
| | NHWC | HWCN | NHWC |\n
*\n
* For float32 and float64 type, the actual calculation on the chip is based on
* float16.
@@ -400,30 +398,25 @@ REG_OP(BiasAddGrad)
*\n
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| input_size | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Filter | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| out_backprop | H*strideH| [1, 200000]\n
| | W*strideW| [1, 4096]\n
-------------------|----------|--------------\n
| y(fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
*\n
| Name | Field | Scope |\n
|------------------|----------|--------------|\n
| input_size | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Filter | H | [1, 255] |\n
| | W | [1, 255] |\n
| out_backprop | H*strideH| [1, 200000] |\n
| | W*strideW| [1, 4096] |\n
| y(fmap) | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Stride | H | [1, 63] |\n
| | W | [1, 63] |\n
| Padding | Top | [0, 255] |\n
| | Bottom | [0, 255] |\n
| | Left | [0, 255] |\n
| | Right | [0, 255] |\n
| Dilation | H | [1, 255] |\n
| | W | [1, 255] |\n
*\n

* In Ascend910, fmap or out_backprop's H and W not support 1 when
@@ -495,9 +488,9 @@ REG_OP(Conv2DBackpropInput)
* Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead.
*/
REG_OP(Conv2DBackpropInputD)
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32}))
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8, DT_BF16}))
.INPUT(out_backprop, TensorType({DT_FLOAT16, DT_INT8, DT_BF16}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT32, DT_BF16}))
.REQUIRED_ATTR(input_size, ListInt)
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
@@ -523,13 +516,12 @@ REG_OP(Conv2DBackpropInputD)
*\n
* The following are the supported data types and data formats:\n
*\n
| Tensor | x | filter | bias | y\n
------------|---------|---------|---------|--------\n
| Data Type | float16 | float16 | float16 | float16\n
| |---------|---------|---------|--------\n
| | int8 | int8 | int32 | int32\n
------------|---------|---------|---------|--------\n
| Format | NCHW | NCHW | ND | NCHW\n
*\n
| Tensor | x | filter | bias | y |\n
|-----------|---------|---------|---------|--------|\n
| Data Type | float16 | float16 | float16 | float16|\n
| | int8 | int8 | int32 | int32 |\n
| Format | NCHW | NCHW | ND | NCHW |\n
*\n
* For int8, a dequant or requant operator must be followed.
*\n
@@ -553,29 +545,24 @@ REG_OP(Conv2DBackpropInputD)
*\n
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| x (out_backprop) | H*strideH| [1, 200000]\n
| | W*strideW| [1, 4096]\n
-------------------|----------|--------------\n
| Filter | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| y (fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| Offset_x | | [-128, 127]\n
*\n
| Name | Field | Scope |\n
|------------------|----------|--------------|\n
| x (out_backprop) | H*strideH| [1, 200000] |\n
| | W*strideW| [1, 4096] |\n
| Filter | H | [1, 255] |\n
| | W | [1, 255] |\n
| y (fmap) | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Stride | H | [1, 63] |\n
| | W | [1, 63] |\n
| Padding | Top | [0, 255] |\n
| | Bottom | [0, 255] |\n
| | Left | [0, 255] |\n
| | Right | [0, 255] |\n
| Dilation | H | [1, 255] |\n
| | W | [1, 255] |\n
| Offset_x | | [-128, 127] |\n
*\n
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
@@ -631,16 +618,14 @@ REG_OP(Deconvolution)
*\n
* The following are the supported data types and data formats:\n
*\n
| Tensor | x | out_backprop | y\n
------------|---------|--------------|---------\n
| Data Type | float16 | float16 | float16\n
| |---------|--------------|---------\n
| | float32 | float32 | float32\n
| |---------|--------------|---------\n
| | float64 | float64 | float64\n
|-----------|---------|--------------|---------\n
| Format | NCHW | NCHW | NCHW\n
| | NHWC | NHWC | HWCN\n
*\n
| Tensor | x | out_backprop | y |\n
|-----------|---------|--------------|---------|\n
| Data Type | float16 | float16 | float16 |\n
| | float32 | float32 | float32 |\n
| | float64 | float64 | float64 |\n
| Format | NCHW | NCHW | NCHW |\n
| | NHWC | NHWC | HWCN |\n
*\n
* For float32 and float64 type of x and outbackprop, the actual calculation on the chip
* is based on float16.
@@ -662,30 +647,25 @@ REG_OP(Deconvolution)
*\n
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| x(fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Filter Size | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| out_backprop | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| y | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
*\n
| Name | Field | Scope |\n
|------------------|----------|--------------|\n
| x(fmap) | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Filter Size | H | [1, 255] |\n
| | W | [1, 255] |\n
| out_backprop | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| y | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Stride | H | [1, 63] |\n
| | W | [1, 63] |\n
| Padding | Top | [0, 255] |\n
| | Bottom | [0, 255] |\n
| | Left | [0, 255] |\n
| | Right | [0, 255] |\n
| Dilation | H | [1, 255] |\n
| | W | [1, 255] |\n
*\n
*@par Outputs:
* y: A Tensor. Has the same type as x, has the same format as filter_size.
@@ -853,11 +833,11 @@ REG_OP(Conv2DBackpropFilterD)
*@li Compatible with the Caffe operator 2D "Convolution".
*/
REG_OP(Conv2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16}))
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_BF16}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_BF16}))
.REQUIRED_ATTR(strides, ListInt)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -1441,14 +1421,13 @@ REG_OP(Conv3DTransposeD)
*\n
* The following are the supported data types and data formats:\n
*\n
| Tensor | x | filter | bias | y\n
------------|---------|---------|---------|--------\n
| Data Type | float16 | float16 | float16 | float16\n
| |---------|---------|---------|--------\n
| | int8 | int8 | int32 | int32\n
------------|---------|---------|---------|--------\n
| Format | NCHW | NCHW | ND | NCHW\n
| | NHWC | HWCN | | NHWC\n
*\n
| Tensor | x | filter | bias | y |\n
|-----------|---------|---------|---------|--------|\n
| Data Type | float16 | float16 | float16 | float16|\n
| | int8 | int8 | int32 | int32 |\n
| Format | NCHW | NCHW | ND | NCHW |\n
| | NHWC | HWCN | | NHWC |\n
*\n
* For int8, a dequant or requant operator must be followed.
*\n
@@ -1476,32 +1455,26 @@ REG_OP(Conv3DTransposeD)
*\n
* The following value range restrictions must be met:\n
*\n
| Name | Field | Scope\n
-------------------|----------|--------------\n
| input_size | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| x (out_backprop) | H*strideH| [1, 200000]\n
| | W*strideW| [1, 4096]\n
-------------------|----------|--------------\n
| filter | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| y (fmap) | H | [1, 200000]\n
| | W | [1, 4096]\n
-------------------|----------|--------------\n
| Stride | H | [1, 63]\n
| | W | [1, 63]\n
-------------------|----------|--------------\n
| Padding | Top | [0, 255]\n
| | Bottom | [0, 255]\n
| | Left | [0, 255]\n
| | Right | [0, 255]\n
-------------------|----------|--------------\n
| Dilation | H | [1, 255]\n
| | W | [1, 255]\n
-------------------|----------|--------------\n
| Offset_x | | [-128, 127]\n
*\n
| Name | Field | Scope |\n
|------------------|----------|--------------|\n
| input_size | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| x (out_backprop) | H*strideH| [1, 200000] |\n
| | W*strideW| [1, 4096] |\n
| filter | H | [1, 255] |\n
| | W | [1, 255] |\n
| y (fmap) | H | [1, 200000] |\n
| | W | [1, 4096] |\n
| Stride | H | [1, 63] |\n
| | W | [1, 63] |\n
| Padding | Top | [0, 255] |\n
| | Bottom | [0, 255] |\n
| | Left | [0, 255] |\n
| | Right | [0, 255] |\n
| Dilation | H | [1, 255] |\n
| | W | [1, 255] |\n
| Offset_x | | [-128, 127] |\n
*\n
* In Ascend910, fmap or out_backprop's H and W not support 1 when
* fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1


+ 172
- 2
third_party/fwkacllib/inc/ops/nn_detect_ops.h View File

@@ -135,7 +135,8 @@ REG_OP(CheckValid)
* the value "4" refers to "x0", "x1", "y0", and "y1" . \n

*@par Attributes:
*mode: Computation mode, a character string with the value range of [iou, iof] . \n
*@li mode: Computation mode, a character string with the value range of [iou, iof]
*@li eps: An optional float, prevent division by 0, default value is 1.0 . \n

*@par Outputs:
*overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
@@ -150,6 +151,7 @@ REG_OP(Iou)
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(mode, String, "iou")
.ATTR(eps, Float, 1.0)
.OP_END_FACTORY_REG(Iou)

/**
@@ -205,7 +207,8 @@ the value "5" indicates the indexes of images where the ROIs are located, "x0",
*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" .
*@li roi_end_mode: An optional attribute of type int, specifying the align mode .\n

*@par Outputs:
*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
@@ -220,6 +223,7 @@ REG_OP(ROIAlignGrad)
.REQUIRED_ATTR(pooled_height, Int)
.REQUIRED_ATTR(spatial_scale, Float)
.ATTR(sample_num, Int, 2)
.ATTR(roi_end_mode, Int, 1)
.OP_END_FACTORY_REG(ROIAlignGrad)

/**
@@ -578,6 +582,172 @@ REG_OP(Yolo)
.ATTR(background, Bool, false)
.ATTR(softmaxtree, Bool, false)
.OP_END_FACTORY_REG(Yolo)
/**
*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n

*@par Inputs:
*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n

*@par Attributes:
*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n

*@par Outputs:
*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n

*@attention Constraints:
*@li This operator applies to YOLO v2,v3 and v5 networks.
*@li The succeeding layer of the Yolo operator must be operator Yolov5DetectionOutput.
*@par Third-party framework compatibility
* It is a custom operator.
*/
REG_OP(YoloPreDetection)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(coord_data, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(obj_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(classes_prob, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(boxes, Int, 3)
.ATTR(coords, Int, 4)
.ATTR(classes, Int, 80)
.ATTR(yolo_version, String, "V5")
.ATTR(softmax, Bool, false)
.ATTR(background, Bool, false)
.ATTR(softmaxtree, Bool, false)
.OP_END_FACTORY_REG(YoloPreDetection)

/**
*@brief Performs YOLO V5 detection . \n

*@par Inputs:
*Ten inputs, including:
*@li Operator Yolov5DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
There are three Yolo operators at Yolov5DetectionOutput's preceding layer on Yolo v5. For details, see the description of operator Yolo.
*@li img_info: A float16 or float32, describing the image information including the required image height and width \n
* and the actual image height and width.

*@par Attributes:
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].

*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].

*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n

*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".

*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024

*@attention Constraints:\n
*@li This operator applies only to the YOLO v5 network.
*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.

*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator. It has no corresponding operator in Caffe.
*/
REG_OP(YoloV5DetectionOutput)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases, ListFloat)
.ATTR(boxes, Int, 3)
.ATTR(coords, Int, 4)
.ATTR(classes, Int, 80)
.ATTR(relative, Bool, true)
.ATTR(obj_threshold, Float, 0.5)
.ATTR(post_nms_topn, Int, 512)
.ATTR(score_threshold, Float, 0.5)
.ATTR(iou_threshold, Float, 0.45)
.ATTR(pre_nms_topn, Int, 512)
.ATTR(N, Int, 10)
.ATTR(resize_origin_img_to_net, Bool, false)
.ATTR(out_box_dim, Int, 3)
.ATTR(alpha, Float, 2.0)
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV5DetectionOutput)

/**
*@brief Performs YOLO V5 detection.

*@par Inputs:
*16 Input, including:
*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v5) are used as the inputs of operator Yolov5DetectionOutput.
* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
*@li imginfo: A float16, describing the image information including the required image height and width
* and the actual image height and width.
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n

*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
*@par Attributes:
*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
*@li coords: Specifies the number of coordinate parameters. Must be 4.
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
*@li relative: An optional bool. Defaults to and must be "true".
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
*@li post_nms_topn: An optional int32. This attribute is reserved.
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
*
*@par Outputs:
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
* describing the information of each output box.
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
*
*@attention Constraints:
*@li This operator applies only to the YOLO v5 network.
*@li The preceding layer of operator Yolov5DetectionOutput must be three Yolo operators.
*@see Yolo()
*@par Third-party framework compatibility
* It is a custom operator.
*/
REG_OP(YoloV5DetectionOutputD)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16, DT_FLOAT}))
.DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(biases, ListFloat)
.ATTR(boxes, Int, 3)
.ATTR(coords, Int, 4)
.ATTR(classes, Int, 80)
.ATTR(relative, Bool, true)
.ATTR(obj_threshold, Float, 0.5)
.ATTR(post_nms_topn, Int, 512)
.ATTR(score_threshold, Float, 0.5)
.ATTR(iou_threshold, Float, 0.45)
.ATTR(pre_nms_topn, Int, 512)
.ATTR(N, Int, 10)
.ATTR(resize_origin_img_to_net, Bool, false)
.ATTR(out_box_dim, Int, 3)
.ATTR(alpha, Float, 2.0)
.OUTPUT(box_out, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(box_out_num, TensorType({DT_INT32}))
.OP_END_FACTORY_REG(YoloV5DetectionOutputD)

/**
*@brief Performs YOLO V2 detection . \n


+ 14
- 1
third_party/fwkacllib/inc/ops/nn_training_ops.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2021. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -2645,6 +2645,19 @@ REG_OP(SparseApplyAdadeltaD)
REG_OP(AtomicAddrClean)
.ATTR(automic_add_mem_size, ListInt, {})
.OP_END_FACTORY_REG(AtomicAddrClean)

/**
*@brief Clean memory of workspace list . \n

*@par Attributes:
* @li workspace_size: sizes of workspaces . \n

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DynamicAtomicAddrClean)
.ATTR(automic_add_mem_size, ListInt, {})
.OP_END_FACTORY_REG(DynamicAtomicAddrClean)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_

+ 42
- 0
third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h View File

@@ -59,6 +59,25 @@ REG_OP(HardSwish)
.OP_END_FACTORY_REG(HardSwish)

/**
*@brief Computes the gradient for the hard_swish of "x" . \n

* @par Inputs:
*Two inputs, including:
* @li grad: A Tensor. Must be one of the following types: float16, float32
* @li x: A Tensor of the same type as "grad" . \n

*@par Outputs:
*y: A Tensor. Has the same type as "grad".
* @par Third-party framework compatibility
* Compatible with the Torch operator HardSwishGrad.
*/
REG_OP(HardSwishGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(HardSwishGrad)

/**
*@brief Computes the for the Swish of "x" . \n

*@par Inputs:
@@ -81,6 +100,29 @@ REG_OP(Swish)
.OP_END_FACTORY_REG(Swish)

/**
*@brief Computes the gradient for the Swish of "x" . \n

*@par Inputs:
*Three inputs, including:
* @li grad: A Tensor. Must be one of the following types: float16, float32
* @li x: A Tensor of the same type as "grad".
* @li y: A Tensor of the same type as "grad" . \n
* @par Attributes:
* scale: A optional scalar. The data type is float . \n
*@par Outputs:
*grad_x: A Tensor. Has the same type as "grad".
*@par Third-party framework compatibility
*Compatible with the Torch operator SwishGrad
*/
REG_OP(SwishGrad)
.INPUT(grad, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(grad_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(scale, Float, 1.0)
.OP_END_FACTORY_REG(SwishGrad)

/**
*@brief Computes the gradient for the gelu of "x" . \n

*@par Inputs:


+ 32
- 0
third_party/fwkacllib/inc/ops/pad_ops.h View File

@@ -274,6 +274,38 @@ REG_OP(PadV3)
.ATTR(mode, String, "constant")
.ATTR(paddings_contiguous, Bool, true)
.OP_END_FACTORY_REG(PadV3)
/**
*@brief Cal the grad of Pads.

*@par Inputs:
*Two inputs, including:
* @li x: A Tensor. Must be one of the following types: float16, float32, double, int32,
* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
* complex128, uint32, uint64.
* @li paddings: A Tensor of type int32 or int64.

*@par Attributes:
* @li mode: An optional string, Defaults to "reflect", indicates paddings mode,
* support "reflect", "edge"
* @li paddings_contiguous: An optional bool value, Defaults to true.
* If true, paddings is arranged as [[begin0, end0], [begin1, end1], ...]
* If false, paddings is arranged as [[begin0, begin1], ..., [end0, end1], ...]

*@par Outputs:
*y: A Tensor of the same type as "x".

*@par Third-party framework compatibility:
* Compatible with ONNX operator PadGrad.
*/

REG_OP(PadV3Grad)
.INPUT(x, TensorType::BasicType())
.INPUT(paddings, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType())
.ATTR(mode, String, "reflect")
.ATTR(paddings_contiguous, Bool, true)
.OP_END_FACTORY_REG(PadV3Grad)

/**
*@brief Pads a tensor.


+ 19
- 1
third_party/fwkacllib/inc/ops/random_ops.h View File

@@ -685,6 +685,24 @@ REG_OP(Uniform)
.ATTR(from, Float, 0.0)
.ATTR(to, Float, 1.0)
.OP_END_FACTORY_REG(Uniform)
} // namespace ge

/**
*@brief Outputs integers consisting of 0 and 1, used for lstm etc. \n
*@par Inputs
* @li time_step: A tensor with data type int64. 0-D.
* @li batch_size: A tensor with data type int64. 0-D.

*@par Outputs:
*y: A Tensor. Has the type float16 or float, 2-D, [time_step,batch_size]. \n

*@attention Constraints:
* Compatible with the Caffe operator ContinuationIndicator.
*/

REG_OP(ContinuationIndicator)
.REQUIRED_ATTR(time_step, Int)
.REQUIRED_ATTR(batch_size, Int)
.OUTPUT(y, TensorType({DT_FLOAT}))
.OP_END_FACTORY_REG(ContinuationIndicator)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_

+ 9
- 2
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -1275,7 +1275,7 @@ REG_OP(ReduceStd)


* @par Attributes:
* Three Attributes, including:
* Five Attributes, including:
* @li dim: An optional listint, Defaults to "None". \n
* @li unbiased: An optional bool. Defaults to "True".
* If "True", Use Bessel Correction.
@@ -1283,9 +1283,14 @@ REG_OP(ReduceStd)
* @li keepdim: An optional bool. Defaults to "False".
* If "True", Keep the original tensor dimension.
* If "False", Do not keep the original tensor dimension. \n
* @li invert: An optional bool, Defaults to "False".
* If "True", the output is inverse of variance.
* If "False", the output is variance.
* @li epsilon: An optional floar, Defaults to 0.001.
* Prevent division by 0.

* @par Outputs:
* @li y: A Tensor. It's the std of X. Has the same type as "x".
* @li y: A Tensor. It's the variance of X or reciprocal of vaiance of X. Has the same type as "x".

* @par Third-party framework compatibility
* Compatible with the Pytorch operator ReduceStdWithMean.
@@ -1297,6 +1302,8 @@ REG_OP(ReduceStdWithMean)
.ATTR(dim, ListInt, {})
.ATTR(unbiased, Bool, true)
.ATTR(keepdim, Bool, false)
.ATTR(invert, Bool, false)
.ATTR(epsilon, Float, 0.001)
.OP_END_FACTORY_REG(ReduceStdWithMean)
} //namespace ge



+ 50
- 6
third_party/fwkacllib/inc/ops/rnn.h View File

@@ -822,7 +822,7 @@ REG_OP(DynamicGRU)
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
*@li bias_input:Must be one of the following types: float16, float32. The format must be ND.
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:Must be one of the following types: int32. The format must be ND.
*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND.
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Attributes:
@@ -852,7 +852,7 @@ REG_OP(DynamicGRUV2)
.INPUT(weight_hidden, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(bias_input, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -880,7 +880,7 @@ REG_OP(DynamicGRUV2)
*@li x_weight_input:Must be one of the following types: float32. The format must be FRACTAL_NZ.
*@li weight_hidden:Must be one of the following types: float16. The format must be FRACTAL_Z.
*@li bias_hidden:Must be one of the following types: float16, float32. The format must be ND.
*@li seq_length:Must be one of the following types: int32. The format must be ND.
*@li seq_length:Must be one of the following types: float16 in FRACTAL_NZ and int32 in ND.
*@li init_h:Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Attributes:
@@ -913,7 +913,7 @@ REG_OP(DynamicGRUV2Hidden)
.INPUT(x_weight_input, TensorType({DT_FLOAT32}))
.INPUT(weight_hidden, TensorType({DT_FLOAT16}))
.OPTIONAL_INPUT(bias_hidden, TensorType({DT_FLOAT16, DT_FLOAT}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32}))
.OPTIONAL_INPUT(seq_length, TensorType({DT_INT32, DT_FLOAT16}))
.OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -1050,6 +1050,50 @@ REG_OP(GRUV2HiddenGradCell)
.OP_END_FACTORY_REG(GRUV2HiddenGradCell)

/**
*@brief: DynamicGRUCellGrad calculation.
*@par Inputs:
*ten inputs: \n
*@li dh_pre_t:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dy:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dh:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li update:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li reset:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li hidden_new:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.+
*@li init_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li t_state:A 1D Tensor. Must be one of the following types: int32. The format must be ND.

*@par Attributes:
*gate_order:An string identifying the gate order in weight and bias. Default to "zrh". "rzh" is another option.

*@par Outputs:
*three outputs: \n
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dgate_h:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.
*@li dnt_x:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ.

*@par Restrictions:
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(DynamicGRUCellGrad)
.INPUT(dh_pre_t, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(h, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(dh, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(update, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(reset, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(new, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(hidden_new, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(t_state, TensorType({DT_INT32, DT_INT32}))
.OUTPUT(dh_prev, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dgate_h, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(dnt_x, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(gate_order, String, "zrh")
.OP_END_FACTORY_REG(DynamicGRUCellGrad)

/**
* @brief Calculates the reversed outputs of the function "embedding". \n

* @par Inputs:
@@ -1137,8 +1181,8 @@ REG_OP(CommonLSTM)
*
* @par Inputs:
* @li seq_length: A 1D Tensor. Must be one of the following types: int32. Record the current length of each batch. [batch_size].
* @li b: A 1D Tensor. Must be one of the following types: fp16/fp32. Record the hidden_size. [4 * hidden_size].
* @li x: A 3D Tensor. Must be one of the following types: fp16/fp32. Record the num_step/batch_size/input_size. [num_step, batch_size, input_size].
* @li hidden_size: An optional attribute of type int32. pass the hidden_size. \n
*
* @par Outputs:
* seq_mask: A 3D Tensor. Must be one of the following types: fp16/fp32. with the shape of [num_step, batch_size, hidden_size]. And has the same type as "b" \n
@@ -1148,8 +1192,8 @@ REG_OP(CommonLSTM)
*/
REG_OP(RnnGenMaskV2)
.INPUT(seq_length, TensorType({DT_INT32}))
.INPUT(b, TensorType({{DT_FLOAT16, DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(hidden_size, Int)
.OUTPUT(seq_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(RnnGenMaskV2)



+ 34
- 0
third_party/fwkacllib/inc/ops/selection_ops.h View File

@@ -2408,6 +2408,40 @@ REG_OP(TopKPQDistanceMerge)
.OUTPUT(topk_index, TensorType({DT_INT32}))
.REQUIRED_ATTR(k, Int)
.OP_END_FACTORY_REG(TopKPQDistanceMerge)

/**
*@brief Extracts a strided slice of a tensor. Roughly speaking, this op
extracts a slice of size (end-begin)/stride from the given input tensor.
Starting at the location specified by begin the slice continues by
adding stride to the index until all dimensions are not less than end.

*@par Inputs:
*Four inputs, including:
* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16,
* complex128, float16, uint32, uint64.
* @li begin: A Tensor of type int32 or int64, for the index of the first value to select . \n

* @li end: A Tensor of type int32 or int64, for the index of the last value to select . \n

* @li strides: A Tensor of type int32 or int64, for the increment . \n

* @li axes: A Tensor of type int32 or int64, for the increment . \n

*@par Outputs:
*y: A Tensor. Has the same type as "x" . \n

* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(StridedSliceV3)
.INPUT(x, TensorType::BasicType())
.INPUT(begin, TensorType::IndexNumberType())
.INPUT(end, TensorType::IndexNumberType())
.OPTIONAL_INPUT(axes, TensorType::IndexNumberType())
.OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(StridedSliceV3)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_

+ 8
- 2
third_party/fwkacllib/inc/ops/transformation_ops.h View File

@@ -368,8 +368,9 @@ REG_OP(SpaceToDepth)
* complex128, uint32, uint64

*@par Attributes:
*Two attributes, including:
*Three attributes, including:
* @li block_size: An int >= 2, specifying the size of the spatial block.
* @li mode: An optional string, specifying the mode. Defaults to "DCR".
* @li data_format: An optional string, specifying the data format. Defaults to "NHWC" . \n

*@par Outputs:
@@ -382,6 +383,7 @@ REG_OP(DepthToSpace)
.INPUT(x, TensorType::BasicType())
.OUTPUT(y, TensorType::BasicType())
.REQUIRED_ATTR(block_size, Int)
.ATTR(mode, String, "DCR")
.ATTR(data_format, String, "NHWC")
.OP_END_FACTORY_REG(DepthToSpace)

@@ -845,7 +847,11 @@ with the same setting for this option. Default: False \n
selected indices from the boxes tensor, where M <= max_output_size. \n

*@attention Constraints:
*Input theta must be float16 or float, output_size must be int32 type . \n
*Input theta must be float16 or float, output_size must be int32 type .
The current implementation of AffineGrid operator AiCore adopts
BatchMatMul's FP16 fusion operator scheme, and the accuracy will
decrease when the theta range exceeds [-10,10].If the model requires
high accuracy of AffineGrid, it is recommended to use AICPU. \n

*@par Third-party framework compatibility
*Compatible with Pytorch affine_grid operator.


+ 82
- 1
third_party/fwkacllib/inc/ops/vector_search.h View File

@@ -34,7 +34,7 @@ namespace ge {
* @li bucket_list: A Tensor. Must be one of the following types: int32, int64.
*
* @par Outputs:
* @li adc_tables: A Tensor. Must be one of the following types: float16, float32.
* adc_tables: A Tensor. Must be one of the following types: float16, float32.
*/
REG_OP(GenADC)
.INPUT(query, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -43,6 +43,87 @@ REG_OP(GenADC)
.INPUT(bucket_list, TensorType({DT_INT32, DT_INT64}))
.OUTPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT}))
.OP_END_FACTORY_REG(GenADC)

/**
* @brief Finds values and indices of the "k" largest or least elements for the last dimension. \n
*
* @par Inputs:
* Dynamin inputs, including:
* @li actual_count: A Tensor of type int32, the actual number of pq_distance.
* @li pq_distance: A Tensor, Will be updated after calculation. Must be one of the following types: float32, float16.
* @li grouped_extreme_distance: A Tensor, the extremum in each group. Must be one of the following types: float32, float16.
* @li pq_index: A Tensor of type int32, index corresponding to pq_distance.
* @li pq_ivf: A Tensor of type int32 , the bucket number corresponding to pq_distance.
*
* @par Attributes:
* @li order: A string, indicates the sorting method of topk_pq_distance. \n
* @li k: Int, k maximum or minimum values. \n
* @li group_size: Int, the group size of the extremum. \n
*
* @par Restrictions:
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
*/
REG_OP(TopKPQDistance)
.DYNAMIC_INPUT(actual_count, TensorType({DT_INT32}))
.DYNAMIC_INPUT(pq_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
.DYNAMIC_INPUT(grouped_extreme_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
.DYNAMIC_INPUT(pq_ivf, TensorType({DT_INT32}))
.DYNAMIC_INPUT(pq_index, TensorType({DT_INT32}))
.OUTPUT(topk_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(topk_ivf, TensorType({DT_INT32}))
.OUTPUT(topk_index, TensorType({DT_INT32}))
.ATTR(order, String, "ASC")
.ATTR(k, Int, 0)
.ATTR(group_size, Int, 0)
.OP_END_FACTORY_REG(TopKPQDistance)

/**
* @brief Calculate PQ distance. \n
*
* @par Inputs:
* Six inputs, including:
* @li ivf: A Tensor, dtype is uint8.
* @li bucket_list: A Tensor, dtype is int32.
* @li bucket_base_distance: A Tensor, dtype is float16.
* @li bucket_limits: A Tensor, dtype is int32.
* @li bucket_offsets: A Tensor, dtype is int32.
* @li adc_tables: A Tensor. dtype is float16. \n
*
* @par Outputs:
* Five outputs, including:
* @li actual_count: A Tensor, dtype is int32, the first element means the length of processed ivf.
* @li pq_distance: A Tensor, dtype is float16.
* @li grouped_extreme_distance: A Tensor, dtype is float16.
* @li pq_ivf: A Tensor, dtype is int32.
* @li pq_index: A Tensor, dtype is int32. \n
*
* @par Attributes:
* Five attributes, including:
* @li group_size: A Scalar, indicates the group size when compute grouped_extreme_distance.
* @li total_limit: A Scalar, indicates the total length of the outputs.
* @li extreme_mode: A Scalar, indicates the type of extremum, 0 means minimum, and 1 means maximum.
* @li split_count: A Scalar.
* @li split_index: A Scalar. \n
*
*/
REG_OP(ScanPQCodes)
.INPUT(ivf, TensorType({DT_UINT8}))
.INPUT(bucket_list, TensorType({DT_INT32, DT_INT64}))
.INPUT(bucket_base_distance, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(bucket_limits, TensorType({DT_INT32}))
.INPUT(bucket_offsets, TensorType({DT_INT64}))
.INPUT(adc_tables, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(actual_count, TensorType({DT_INT32}))
.OUTPUT(pq_distance, TensorType({DT_FLOAT16}))
.OUTPUT(grouped_extreme_distance, TensorType({DT_FLOAT16}))
.OUTPUT(pq_ivf, TensorType({DT_INT32}))
.OUTPUT(pq_index, TensorType({DT_INT32}))
.REQUIRED_ATTR(total_limit, Int)
.ATTR(group_size, Int, 64)
.ATTR(extreme_mode, Int, 0)
.ATTR(split_count, Int, 1)
.ATTR(split_index, Int, 0)
.OP_END_FACTORY_REG(ScanPQCodes)
} // namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_

+ 2
- 14
third_party/fwkacllib/inc/runtime/base.h View File

@@ -42,7 +42,7 @@ static const int32_t RT_ERROR_NONE = 0; // success
*/
typedef enum tagRtDeviceMode {
RT_DEVICE_MODE_SINGLE_DIE = 0,
RT_DEVICE_MODE_MULTI_DIE = 1,
RT_DEVICE_MODE_MULTI_DIE,
RT_DEVICE_MODE_RESERVED
} rtDeviceMode;

@@ -178,7 +178,7 @@ RTS_API rtError_t rtProfilerInit(const char *profDir, const char *address, const
* @ingroup profiling_base
* @brief config rts profiler.
*/
RTS_API rtError_t rtProfilerConfig(uint16_t type);
RTS_API rtError_t rtProfilerConfig(uint16_t profConfig);

/**
* @ingroup profiling_base
@@ -251,18 +251,6 @@ RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle

/**
* @ingroup dvrt_base
* @brief Returns the last error from a runtime call.
*/
RTS_API rtError_t rtGetLastError();

/**
* @ingroup dvrt_base
* @brief Returns the last error from a runtime call.
*/
RTS_API rtError_t rtPeekAtLastError();

/**
* @ingroup dvrt_base
* @brief register callback for error code
* @param [out] NA
* @return RT_ERROR_NONE for ok


+ 31
- 28
third_party/fwkacllib/inc/runtime/config.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_CONFIG_H__
#define __CCE_RUNTIME_CONFIG_H__
#ifndef CCE_RUNTIME_CONFIG_H
#define CCE_RUNTIME_CONFIG_H

#include "base.h"

@@ -23,28 +23,28 @@
extern "C" {
#endif

#define PLAT_COMBINE(arch, chip, ver) ((arch << 16) | (chip << 8) | (ver))
#define PLAT_GET_ARCH(type) ((type >> 16) & 0xffff)
#define PLAT_GET_CHIP(type) ((type >> 8) & 0xff)
#define PLAT_GET_VER(type) (type & 0xff)
#define PLAT_COMBINE(arch, chip, ver) (((arch) << 16U) | ((chip) << 8U) | (ver))
#define PLAT_GET_ARCH(type) (((type) >> 16U) & 0xffffU)
#define PLAT_GET_CHIP(type) (((type) >> 8U) & 0xffU)
#define PLAT_GET_VER(type) ((type) & 0xffU)

typedef enum tagRtArchType {
ARCH_BEGIN = 0,
ARCH_V100 = ARCH_BEGIN,
ARCH_V200,
ARCH_END,
ARCH_V200 = 1,
ARCH_END = 2,
} rtArchType_t;

typedef enum tagRtChipType {
CHIP_BEGIN = 0,
CHIP_MINI = CHIP_BEGIN,
CHIP_CLOUD,
CHIP_MDC,
CHIP_LHISI,
CHIP_DC,
CHIP_CLOUD_V2,
CHIP_NO_DEVICE,
CHIP_END,
CHIP_CLOUD = 1,
CHIP_MDC = 2,
CHIP_LHISI = 3,
CHIP_DC = 4,
CHIP_CLOUD_V2 = 5,
CHIP_NO_DEVICE = 6,
CHIP_END = 7,
} rtChipType_t;

typedef enum tagRtAicpuScheType {
@@ -59,29 +59,32 @@ typedef enum tagRtDeviceCapabilityType {
RT_SCHEDULE_HARDWARE, // HWTS Schedule
RT_AICPU_BLOCKING_OP_NOT_SUPPORT,
RT_AICPU_BLOCKING_OP_SUPPORT, // 1910/1980/1951 ts support AICPU blocking operation
RT_MODE_NO_FFTS, // no ffts
RT_MODE_FFTS, // 1981 get ffts work mode, ffts
RT_MODE_FFTS_PLUS, // 1981 get ffts work mode, ffts plus
} rtDeviceCapabilityType;

typedef enum tagRtVersion {
VER_BEGIN = 0,
VER_NA = VER_BEGIN,
VER_ES,
VER_CS,
VER_SD3403,
VER_END,
VER_ES = 1,
VER_CS = 2,
VER_SD3403 = 3,
VER_END = 4,
} rtVersion_t;

/* match rtChipType_t */
typedef enum tagRtPlatformType {
PLATFORM_BEGIN = 0,
PLATFORM_MINI_V1 = PLATFORM_BEGIN,
PLATFORM_CLOUD_V1,
PLATFORM_MINI_V2,
PLATFORM_LHISI_ES,
PLATFORM_LHISI_CS,
PLATFORM_DC,
PLATFORM_CLOUD_V2,
PLATFORM_LHISI_SD3403,
PLATFORM_END,
PLATFORM_CLOUD_V1 = 1,
PLATFORM_MINI_V2 = 2,
PLATFORM_LHISI_ES = 3,
PLATFORM_LHISI_CS = 4,
PLATFORM_DC = 5,
PLATFORM_CLOUD_V2 = 6,
PLATFORM_LHISI_SD3403 = 7,
PLATFORM_END = 8,
} rtPlatformType_t;

typedef enum tagRtCubeFracMKNFp16 {
@@ -240,4 +243,4 @@ RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
}
#endif

#endif // __CCE_RUNTIME_STREAM_H__
#endif // CCE_RUNTIME_CONFIG_H

+ 3
- 3
third_party/fwkacllib/inc/runtime/context.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_CONTEXT_H__
#define __CCE_RUNTIME_CONTEXT_H__
#ifndef CCE_RUNTIME_CONTEXT_H
#define CCE_RUNTIME_CONTEXT_H

#include "base.h"

@@ -173,4 +173,4 @@ RTS_API rtError_t rtSetCtxINFMode(bool mode);
#endif


#endif // __CCE_RUNTIME_CONTEXT_H__
#endif // CCE_RUNTIME_CONTEXT_H

+ 29
- 10
third_party/fwkacllib/inc/runtime/dev.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_DEVICE_H__
#define __CCE_RUNTIME_DEVICE_H__
#ifndef CCE_RUNTIME_DEVICE_H
#define CCE_RUNTIME_DEVICE_H

#include "base.h"

@@ -23,8 +23,8 @@
extern "C" {
#endif

#define RT_CAPABILITY_SUPPORT (0x1)
#define RT_CAPABILITY_NOT_SUPPORT (0x0)
#define RT_CAPABILITY_SUPPORT (0x1U)
#define RT_CAPABILITY_NOT_SUPPORT (0x0U)

typedef struct tagRTDeviceInfo {
uint8_t env_type; // 0: FPGA 1: EMU 2: ESL
@@ -45,27 +45,28 @@ typedef struct tagRTDeviceInfo {

typedef enum tagRtRunMode {
RT_RUN_MODE_OFFLINE = 0,
RT_RUN_MODE_ONLINE = 1,
RT_RUN_MODE_AICPU_SCHED = 2,
RT_RUN_MODE_ONLINE,
RT_RUN_MODE_AICPU_SCHED,
RT_RUN_MODE_RESERVED
} rtRunMode;

typedef enum tagRtAicpuDeployType {
AICPU_DEPLOY_CROSS_OS = 0x0,
AICPU_DEPLOY_CROSS_PROCESS = 0x1,
AICPU_DEPLOY_CROSS_THREAD = 0x2,
AICPU_DEPLOY_CROSS_PROCESS,
AICPU_DEPLOY_CROSS_THREAD,
AICPU_DEPLOY_RESERVED
} rtAicpuDeployType_t;

typedef enum tagRtFeatureType {
FEATURE_TYPE_MEMCPY = 0,
FEATURE_TYPE_MEMORY = 1,
FEATURE_TYPE_MEMORY,
FEATURE_TYPE_RSV
} rtFeatureType_t;

typedef enum tagRtDeviceFeatureType {
FEATURE_TYPE_SCHE,
FEATURE_TYPE_BLOCKING_OPERATOR,
FEATURE_TYPE_FFTS_MODE,
FEATURE_TYPE_END,
} rtDeviceFeatureType_t;

@@ -90,6 +91,15 @@ typedef enum tagRtDeviceModuleType {
RT_MODULE_TYPE_VECTOR_CORE, /**< VECTOR CORE info*/
} rtDeviceModuleType_t;

// used for rtGetDevMsg callback function
typedef void (*rtGetMsgCallback)(const char *msg, uint32_t len);

typedef enum tagGetDevMsgType {
RT_GET_DEV_ERROR_MSG = 0,
RT_GET_DEV_RUNNING_STREAM_SNAPSHOT_MSG,
RT_GET_DEV_MSG_RESERVE
} rtGetDevMsgType_t;

/**
* @ingroup dvrt_dev
* @brief get total device number.
@@ -408,8 +418,17 @@ RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device);
*/
RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device);

/**
* @ingroup dvrt_dev
* @brief get device message
* @param [in] rtGetDevMsgType_t getMsgType:msg type
* @param [in] GetMsgCallback callback:acl callback function
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtGetDevMsg(rtGetDevMsgType_t getMsgType, rtGetMsgCallback callback);
#if defined(__cplusplus)
}
#endif

#endif // __CCE_RUNTIME_DEVICE_H__
#endif // CCE_RUNTIME_DEVICE_H

+ 3
- 3
third_party/fwkacllib/inc/runtime/dvfsprofile.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_DVFSPROFILE_H__
#define __CCE_RUNTIME_DVFSPROFILE_H__
#ifndef CCE_RUNTIME_DVFSPROFILE_H
#define CCE_RUNTIME_DVFSPROFILE_H

#include "base.h"

@@ -60,4 +60,4 @@ RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode);
}
#endif

#endif // __CCE_RUNTIME_PROFILE_H__
#endif // CCE_RUNTIME_DVFSPROFILE_H

+ 9
- 9
third_party/fwkacllib/inc/runtime/event.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_EVENT_H__
#define __CCE_RUNTIME_EVENT_H__
#ifndef CCE_RUNTIME_EVENT_H
#define CCE_RUNTIME_EVENT_H

#include "base.h"

@@ -33,8 +33,8 @@ typedef enum rtEventWaitStatus {
* @ingroup event_flags
* @brief event op bit flags
*/
#define RT_EVENT_DEFAULT (0x0E)
#define RT_EVENT_WITH_FLAG (0x0B)
#define RT_EVENT_DEFAULT (0x0EU)
#define RT_EVENT_WITH_FLAG (0x0BU)

#define RT_EVENT_DDSYNC_NS 0x01U
#define RT_EVENT_STREAM_MARK 0x02U
@@ -200,14 +200,14 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream);
/**
* @ingroup dvrt_event
* @brief Wait for a notify with time out
* @param [in] notify_ notify to be wait
* @param [in] stream_ input stream
* @param [in] notify notify to be wait
* @param [in] stream input stream
* @param [in] timeOut input timeOut
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
* @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx
*/
RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify_, rtStream_t stream_, uint32_t timeOut);
RTS_API rtError_t rtNotifyWaitWithTimeOut(rtNotify_t notify, rtStream_t stream, uint32_t timeOut);

/**
* @ingroup dvrt_event
@@ -270,10 +270,10 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs
* @return RT_ERROR_INVALID_VALUE for error input
* @return RT_ERROR_DRV_ERR for driver error
*/
RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num);
RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int32_t num);

#if defined(__cplusplus)
}
#endif

#endif // __CCE_RUNTIME_EVENT_H__
#endif // CCE_RUNTIME_EVENT_H

+ 29
- 26
third_party/fwkacllib/inc/runtime/kernel.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_KERNEL_H__
#define __CCE_RUNTIME_KERNEL_H__
#ifndef CCE_RUNTIME_KERNEL_H
#define CCE_RUNTIME_KERNEL_H

#include "base.h"
#include "stream.h"
@@ -131,7 +131,10 @@ typedef struct tagRtArgsWithTiling {
uint32_t argsSizeWithoutTiling; // input + output + tiling addr size
uint16_t tilingAddrOffset; // tiling addr offset
uint16_t tilingDataOffset; // tiling data offset
uint16_t reserved[2];
uint16_t hostInputAddrOffset; // index of host_memory input in inputs_addrs list
uint16_t hostInputDataOffset; // host_mem input data offset
bool hasHostMemInput; // has host_memory input data in args or not: ture or false
uint8_t reserved[7];
} rtArgsWithTiling_t;

/**
@@ -141,7 +144,7 @@ typedef struct tagRtArgsWithTiling {
typedef enum tagRtDumpKind {
RT_DATA_DUMP_KIND_INVALID = -1,
RT_DATA_DUMP_KIND_DUMP = 0,
RT_DATA_DUMP_KIND_RESERVED
RT_DATA_DUMP_KIND_RESERVED = 1,
} rtDumpKind_t;

/**
@@ -160,72 +163,72 @@ typedef void (*rtCallback_t)(void *fnData);
* @ingroup rt_kernel
* @brief magic number of plain binary for aicore
*/
#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50
#define RT_DEV_BINARY_MAGIC_PLAIN 0xabceed50U

/**
* @ingroup rt_kernel
* @brief magic number of plain binary for aicpu
*/
#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51
#define RT_DEV_BINARY_MAGIC_PLAIN_AICPU 0xabceed51U

/**
* @ingroup rt_kernel
* @brief magic number of plain binary for aivector
*/
#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52
#define RT_DEV_BINARY_MAGIC_PLAIN_AIVEC 0xabceed52U

/**
* @ingroup rt_kernel
* @brief magic number of elf binary for aicore
*/
#define RT_DEV_BINARY_MAGIC_ELF 0x43554245
#define RT_DEV_BINARY_MAGIC_ELF 0x43554245U

/**
* @ingroup rt_kernel
* @brief magic number of elf binary for aicpu
*/
#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243
#define RT_DEV_BINARY_MAGIC_ELF_AICPU 0x41415243U

/**
* @ingroup rt_kernel
* @brief magic number of elf binary for aivector
*/
#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246
#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U

/**
* @ingroup rt_kernel
* @brief magic number of elf binary for aicube
*/
#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343
#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U

/**
* @ingroup rt_kernel_flags
* @brief kernel op bit flags
*/
#define RT_KERNEL_DEFAULT (0x00)
#define RT_KERNEL_CONVERT (0x01)
#define RT_KERNEL_DUMPFLAG (0x02)
#define RT_FUSION_KERNEL_DUMPFLAG (0x04)
#define RT_KERNEL_CUSTOM_AICPU (0x08)
#define RT_KERNEL_DEFAULT (0x00U)
#define RT_KERNEL_CONVERT (0x01U)
#define RT_KERNEL_DUMPFLAG (0x02U)
#define RT_FUSION_KERNEL_DUMPFLAG (0x04U)
#define RT_KERNEL_CUSTOM_AICPU (0x08U)

// STARS topic scheduler sqe : topic_type
#define RT_KERNEL_DEVICE_FIRST (0x10)
#define RT_KERNEL_HOST_ONLY (0x20)
#define RT_KERNEL_HOST_FIRST (0x40)
#define RT_KERNEL_DEVICE_FIRST (0x10U)
#define RT_KERNEL_HOST_ONLY (0x20U)
#define RT_KERNEL_HOST_FIRST (0x40U)

/**
* @ingroup rt_kernel
* @brief kernel mode
**/
#define RT_DEFAULT_KERNEL_MODE (0x00)
#define RT_NORMAL_KERNEL_MODE (0x01)
#define RT_ALL_KERNEL_MODE (0x02)
#define RT_DEFAULT_KERNEL_MODE (0x00U)
#define RT_NORMAL_KERNEL_MODE (0x01U)
#define RT_ALL_KERNEL_MODE (0x02U)

/**
* @ingroup rt_kernel
* @brief kernel L1 Fusion Dump bit flags
*/
#define RT_DDR_ADDR (0x0)
#define RT_DDR_ADDR (0x0U)

/**
* @ingroup rt_kernel
@@ -672,7 +675,7 @@ RTS_API rtError_t rtStopMDCProfiler(void *addr);
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockDim,
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_);
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream);

/**
* @ingroup rt_kernel
@@ -688,11 +691,11 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *handle, const void *devFunc, uint32_t blockDim,
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream_, const void* kernelInfo);
rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stream, const void* kernelInfo);

#if defined(__cplusplus)
}
#endif

#endif // __CCE_RUNTIME_KERNEL_H__
#endif // CCE_RUNTIME_KERNEL_H


+ 75
- 44
third_party/fwkacllib/inc/runtime/mem.h View File

@@ -14,12 +14,10 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_MEM_H__
#define __CCE_RUNTIME_MEM_H__
#ifndef CCE_RUNTIME_MEM_H
#define CCE_RUNTIME_MEM_H

/*lint -e7*/
#include <stddef.h>
/*lint +e7*/
#include "base.h"
#include "config.h"
#include "stream.h"
@@ -32,43 +30,43 @@ extern "C" {
* @ingroup dvrt_mem
* @brief memory type
*/
#define RT_MEMORY_DEFAULT ((uint32_t)0x0) // default memory on device
#define RT_MEMORY_HBM ((uint32_t)0x2) // HBM memory on device
#define RT_MEMORY_RDMA_HBM ((uint32_t)0x3) // RDMA-HBM memory on device
#define RT_MEMORY_DDR ((uint32_t)0x4) // DDR memory on device
#define RT_MEMORY_SPM ((uint32_t)0x8) // shared physical memory on device
#define RT_MEMORY_P2P_HBM ((uint32_t)0x10) // HBM memory on other 4P device
#define RT_MEMORY_P2P_DDR ((uint32_t)0x11) // DDR memory on other device
#define RT_MEMORY_DDR_NC ((uint32_t)0x20) // DDR memory of non-cache
#define RT_MEMORY_TS_4G ((uint32_t)0x40)
#define RT_MEMORY_TS ((uint32_t)0x80)
#define RT_MEMORY_RESERVED ((uint32_t)0x100)
#define RT_MEMORY_DEFAULT (0x0U) // default memory on device
#define RT_MEMORY_HBM (0x2U) // HBM memory on device
#define RT_MEMORY_RDMA_HBM (0x3U) // RDMA-HBM memory on device
#define RT_MEMORY_DDR (0x4U) // DDR memory on device
#define RT_MEMORY_SPM (0x8U) // shared physical memory on device
#define RT_MEMORY_P2P_HBM (0x10U) // HBM memory on other 4P device
#define RT_MEMORY_P2P_DDR (0x11U) // DDR memory on other device
#define RT_MEMORY_DDR_NC (0x20U) // DDR memory of non-cache
#define RT_MEMORY_TS_4G (0x40U)
#define RT_MEMORY_TS (0x80U)
#define RT_MEMORY_RESERVED (0x100U)

#define RT_MEMORY_L1 ((uint32_t)0x1<<16)
#define RT_MEMORY_L2 ((uint32_t)0x1<<17)
#define RT_MEMORY_L1 (0x1U << 16U)
#define RT_MEMORY_L2 (0x1U << 17U)

/**
* @ingroup dvrt_mem
* @brief memory info type
*/
#define RT_MEM_INFO_TYPE_DDR_SIZE ((uint32_t)0x1)
#define RT_MEM_INFO_TYPE_HBM_SIZE ((uint32_t)0x2)
#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE ((uint32_t)0x3)
#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE ((uint32_t)0x4)
#define RT_MEM_INFO_TYPE_DDR_SIZE (0x1U)
#define RT_MEM_INFO_TYPE_HBM_SIZE (0x2U)
#define RT_MEM_INFO_TYPE_DDR_P2P_SIZE (0x3U)
#define RT_MEM_INFO_TYPE_HBM_P2P_SIZE (0x4U)

/**
* @ingroup dvrt_mem
* @brief memory Policy
*/
#define RT_MEMORY_POLICY_NONE ((uint32_t)0x0) // Malloc mem prior hage page, then default page
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST ((uint32_t)0x1 << 10) // Malloc mem prior hage page, then default page
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY ((uint32_t)0x1 << 11) // Malloc mem only use hage page
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY ((uint32_t)0x1 << 12) // Malloc mem only use default page
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P ((uint32_t)0x1 << 13) // Malloc mem prior hage page, then default page, use for p2p
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P ((uint32_t)0x1 << 14) // Malloc mem only use hage page, use for p2p
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P ((uint32_t)0x1 << 15) // Malloc mem only use default page, use for p2p
#define RT_MEMORY_POLICY_NONE (0x0U) // Malloc mem prior huge page, then default page
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST (0x1U << 10U) // Malloc mem prior huge page, then default page
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY (0x1U << 11U) // Malloc mem only use huge page
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY (0x1U << 12U) // Malloc mem only use default page
#define RT_MEMORY_POLICY_HUGE_PAGE_FIRST_P2P (0x1U << 13U) // Malloc mem prior huge page, then default page, for p2p
#define RT_MEMORY_POLICY_HUGE_PAGE_ONLY_P2P (0x1U << 14U) // Malloc mem only use huge page, use for p2p
#define RT_MEMORY_POLICY_DEFAULT_PAGE_ONLY_P2P (0x1U << 15U) // Malloc mem only use default page, use for p2p

#define MEM_ALLOC_TYPE_BIT ((uint32_t)0x3FF) // mem type bit in <0, 9>
#define MEM_ALLOC_TYPE_BIT (0x3FFU) // mem type bit in <0, 9>

/**
* @ingroup dvrt_mem
@@ -80,10 +78,10 @@ typedef uint32_t rtMemType_t;
* @ingroup dvrt_mem
* @brief memory advise type
*/
#define RT_MEMORY_ADVISE_EXE (0x02)
#define RT_MEMORY_ADVISE_THP (0x04)
#define RT_MEMORY_ADVISE_PLE (0x08)
#define RT_MEMORY_ADVISE_PIN (0x16)
#define RT_MEMORY_ADVISE_EXE (0x02U)
#define RT_MEMORY_ADVISE_THP (0x04U)
#define RT_MEMORY_ADVISE_PLE (0x08U)
#define RT_MEMORY_ADVISE_PIN (0x16U)

/**
* @ingroup dvrt_mem
@@ -119,7 +117,7 @@ typedef enum tagRtRecudeKind {
RT_MEMCPY_SDMA_AUTOMATIC_MAX = 11,
RT_MEMCPY_SDMA_AUTOMATIC_MIN = 12,
RT_MEMCPY_SDMA_AUTOMATIC_EQUAL = 13,
RT_RECUDE_KIND_END
RT_RECUDE_KIND_END = 14,
} rtRecudeKind_t;

typedef enum tagRtDataType {
@@ -134,7 +132,7 @@ typedef enum tagRtDataType {
RT_DATA_TYPE_UINT8 = 8, // uint8
RT_DATA_TYPE_UINT16= 9, // uint16
RT_DATA_TYPE_UINT32= 10,// uint32
RT_DATA_TYPE_END
RT_DATA_TYPE_END = 11,
} rtDataType_t;

/**
@@ -197,7 +195,7 @@ typedef struct rtMallocHostSharedMemoryIn {
} rtMallocHostSharedMemoryIn;

typedef struct rtMallocHostSharedMemoryOut {
int fd;
int32_t fd;
void *ptr;
void *devPtr;
} rtMallocHostSharedMemoryOut;
@@ -205,7 +203,7 @@ typedef struct rtMallocHostSharedMemoryOut {
typedef struct rtFreeHostSharedMemoryIn {
const char *name;
const uint64_t size;
int fd;
int32_t fd;
void *ptr;
void *devPtr;
} rtFreeHostSharedMemoryIn;
@@ -384,6 +382,39 @@ RTS_API rtError_t rtReduceAsync(void *dst, uint64_t destMax, const void *src, ui

/**
* @ingroup dvrt_mem
* @brief synchronized memcpy2D
* @param [in] dst destination address pointer
* @param [in] dstPitch pitch of destination memory
* @param [in] src source address pointer
* @param [in] srcPitch pitch of source memory
* @param [in] width width of matrix transfer
* @param [in] height height of matrix transfer
* @param [in] kind memcpy type
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtMemcpy2d(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
uint64_t height, rtMemcpyKind_t kind);

/**
* @ingroup dvrt_mem
* @brief asynchronized memcpy2D
* @param [in] dst destination address pointer
* @param [in] dstPitch length of destination address memory
* @param [in] src source address pointer
* @param [in] srcPitch length of destination address memory
* @param [in] width width of matrix transfer
* @param [in] height height of matrix transfer
* @param [in] kind memcpy type
* @param [in] stream asynchronized task stream
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtMemcpy2dAsync(void *dst, uint64_t dstPitch, const void *src, uint64_t srcPitch, uint64_t width,
uint64_t height, rtMemcpyKind_t kind, rtStream_t stream);

/**
* @ingroup dvrt_mem
* @brief query memory size
* @param [in] aiCoreMemorySize
* @return RT_ERROR_NONE for ok, errno for failed
@@ -429,22 +460,22 @@ RTS_API rtError_t rtMemsetAsync(void *ptr, uint64_t destMax, uint32_t value, uin
/**
* @ingroup dvrt_mem
* @brief get current device memory total and free
* @param [out] free
* @param [out] total
* @param [out] freeSize
* @param [out] totalSize
* @return RT_ERROR_NONE for ok, errno for failed
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtMemGetInfo(size_t *free, size_t *total);
RTS_API rtError_t rtMemGetInfo(size_t *freeSize, size_t *totalSize);

/**
* @ingroup dvrt_mem
* @brief get current device memory total and free
* @param [in] memInfoType
* @param [out] free
* @param [out] total
* @param [out] freeSize
* @param [out] totalSize
* @return RT_ERROR_NONE for ok, errno for failed
*/
RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total);
RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *freeSize, size_t *totalSize);

/**
* @ingroup dvrt_mem
@@ -551,4 +582,4 @@ RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t str
}
#endif

#endif // __CCE_RUNTIME_MEM_H__
#endif // CCE_RUNTIME_MEM_H

+ 3
- 3
third_party/fwkacllib/inc/runtime/rt.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_RT_H__
#define __CCE_RUNTIME_RT_H__
#ifndef CCE_RUNTIME_RT_H
#define CCE_RUNTIME_RT_H

#include "base.h"
#include "config.h"
@@ -32,4 +32,4 @@
#include "rt_ffts_plus.h"
#include "rt_dfx.h"

#endif // __CCE_RUNTIME_RT_H__
#endif // CCE_RUNTIME_RT_H

+ 6
- 4
third_party/fwkacllib/inc/runtime/rt_ffts.h View File

@@ -3,8 +3,8 @@
* Description: ffts interface
*/

#ifndef __CCE_RUNTIME_FFTS_H
#define __CCE_RUNTIME_FFTS_H
#ifndef CCE_RUNTIME_RT_FFTS_H
#define CCE_RUNTIME_RT_FFTS_H

#include "base.h"

@@ -33,7 +33,7 @@ typedef enum tagFftsSubTaskType {
RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6,
RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7,
RT_FFTS_SUB_TASK_TYPE_SDMA = 8,
RT_FFTS_SUB_TASK_TYPE_RESERVED,
RT_FFTS_SUB_TASK_TYPE_RESERVED = 9,
} rtFftsSubTaskType_t;

typedef struct tagManualThreadDmuInfo {
@@ -178,7 +178,9 @@ typedef struct tagFftsTaskInfo {

RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream);

RTS_API rtError_t rtFftsTaskLaunchWithFlag(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream, uint32_t flag);

#if defined(__cplusplus)
}
#endif
#endif // __CCE_RUNTIME_FFTS_H
#endif // CCE_RUNTIME_RT_FFTS_H

+ 7
- 3
third_party/fwkacllib/inc/runtime/rt_ffts_plus.h View File

@@ -3,8 +3,8 @@
* Description: ffts plus interface
*/

#ifndef __CCE_RUNTIME_FFTS_PLUS_H
#define __CCE_RUNTIME_FFTS_PLUS_H
#ifndef CCE_RUNTIME_RT_FFTS_PLUS_H
#define CCE_RUNTIME_RT_FFTS_PLUS_H

#include "base.h"
#include "rt_ffts_plus_define.h"
@@ -26,9 +26,13 @@ typedef struct tagFftsPlusTaskInfo {
#pragma pack(pop)

RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *handle, const void *devFunc, void **addr, uint32_t *prefetchCnt);

RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream);

RTS_API rtError_t rtFftsPlusTaskLaunchWithFlag(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stream,
uint32_t flag);

#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif
#endif // __CCE_RUNTIME_FFTS_H
#endif // CCE_RUNTIME_RT_FFTS_PLUS_H

+ 7
- 7
third_party/fwkacllib/inc/runtime/rt_ffts_plus_define.h View File

@@ -3,8 +3,8 @@
* Description: the definition of ffts plus
*/

#ifndef __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
#define __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
#ifndef CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H
#define CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H

#include "base.h"

@@ -30,7 +30,7 @@ typedef enum tagFftsPlusHwType {
RT_HW_CTX_TYPE_WRITEBACK_DATA = 11,
RT_HW_CTX_TYPE_AICPU = 12,
RT_HW_CTX_TYPE_LOAD = 13,
RT_HW_CTX_TYPE_MAX,
RT_HW_CTX_TYPE_MAX = 14,
} rtFftsPlusHwType_t;

// hardware context type
@@ -40,7 +40,7 @@ typedef enum tagFftsPlusSoftType {
RT_SOFT_CTX_TYPE_AT_START = 3,
RT_SOFT_CTX_TYPE_AT_END = 4,
RT_SOFT_CTX_TYPE_LABEL = 5,
RT_SOFT_CTX_TYPE_MAX,
RT_SOFT_CTX_TYPE_MAX = 6,
} rtFftsPlusSoftType_t;

typedef enum tagFftsPlusContextType {
@@ -71,7 +71,7 @@ typedef enum tagFftsPlusCondType {
RT_COND_TYPE_GREATER_OR_EQUAL = 3,
RT_COND_TYPE_LESS = 4,
RT_COND_TYPE_LESS_OR_EQUAL = 5,
RT_COND_TYPE_MAX,
RT_COND_TYPE_MAX = 6,
} rtFftsPlusCondType_t;

// the definition of ffts plus context
@@ -505,7 +505,7 @@ typedef struct tagFftsPlusAtStartCtx {
uint16_t threadIdInit;
uint16_t threadWindowSize;
// 80-127
uint16_t res9[12];
uint32_t res9[12];
} rtFftsPlusAtStartCtx_t;

// at end context
@@ -712,4 +712,4 @@ typedef struct tagFftsPlusCondSwitchCtx {
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif
#endif // __CCE_RUNTIME_FFTS_PLUS_DEFINE_H
#endif // CCE_RUNTIME_RT_FFTS_PLUS_DEFINE_H

+ 416
- 0
third_party/fwkacllib/inc/runtime/rt_mem_queue.h View File

@@ -0,0 +1,416 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* Description: mbuf and queue interface
*/

#ifndef CCE_RUNTIME_RT_MEM_QUEUE_H
#define CCE_RUNTIME_RT_MEM_QUEUE_H

#include "base.h"

#if defined(__cplusplus)
extern "C" {
#endif

#define RT_MQ_MAX_NAME_LEN 128 // same as driver's
#define RT_MQ_DEPTH_MIN 2U
#define RT_MQ_MODE_PUSH 1
#define RT_MQ_MODE_PULL 2
#define RT_MQ_MODE_DEFAULT RT_MQ_MODE_PUSH

typedef struct tagMemQueueAttr {
char name[RT_MQ_MAX_NAME_LEN];
uint32_t depth;
uint32_t workMode;
uint32_t flowCtrlDropTime;
bool flowCtrlFlag;
bool overWriteFlag;
} rtMemQueueAttr_t;

typedef struct tagMemQueueShareAttr {
uint32_t manage : 1;
uint32_t read : 1;
uint32_t write : 1;
uint32_t rsv : 29;
} rtMemQueueShareAttr_t;

typedef struct tagMemQueueBuffInfo {
void *addr;
size_t len;
} rtMemQueueBuffInfo;

typedef struct tagMemQueueBuff {
void *contextAddr;
size_t contextLen;
rtMemQueueBuffInfo *buffInfo;
uint32_t buffCount;
} rtMemQueueBuff_t;


typedef enum tagMemQueueQueryCmd {
RT_MQ_QUERY_QUE_ATTR_OF_CUR_PROC = 0, // input is qid(4bytes), output is rtMemQueueShareAttr_t
RT_MQ_QUERY_QUES_OF_CUR_PROC = 1,
RT_MQ_QUERY_CMD_MAX = 2
} rtMemQueueQueryCmd_t;

#define RT_MQ_EVENT_QS_MSG 27 // same as driver's

#define RT_MQ_SCHED_PRIORITY_LEVEL0 0 // same as driver's
#define RT_MQ_SCHED_PRIORITY_LEVEL1 1
#define RT_MQ_SCHED_PRIORITY_LEVEL2 2
#define RT_MQ_SCHED_PRIORITY_LEVEL3 3
#define RT_MQ_SCHED_PRIORITY_LEVEL4 4
#define RT_MQ_SCHED_PRIORITY_LEVEL5 5
#define RT_MQ_SCHED_PRIORITY_LEVEL6 6
#define RT_MQ_SCHED_PRIORITY_LEVEL7 7

/* Events can be released between different systems. This parameter specifies the destination type of events
to be released. The destination type is defined based on the CPU type of the destination system. */
#define RT_MQ_DST_ENGINE_ACPU_DEVICE 0 // device AICPU, same as driver's
#define RT_MQ_DST_ENGINE_ACPU_HOST 1 // Host AICPU
#define RT_MQ_DST_ENGINE_CCPU_DEVICE 2 // device CtrlCPU
#define RT_MQ_DST_ENGINE_CCPU_HOST 3 // Host CtrlCPU
#define RT_MQ_DST_ENGINE_DCPU_DEVICE 4 // device DataCPU
#define RT_MQ_DST_ENGINE_TS_CPU 5 // device TS CPU
#define RT_MQ_DST_ENGINE_DVPP_CPU 6 // device DVPP CPU

#define RT_MQ_SCHED_EVENT_QS_MSG 25 // same as driver's EVENT_QS_MSG

/* When the destination engine is AICPU, select a policy.
ONLY: The command is executed only on the local AICPU.
FIRST: The local AICPU is preferentially executed. If the local AICPU is busy, the remote AICPU can be used. */
#define RT_SCHEDULE_POLICY_ONLY 0 // same as driver's schedule_policy
#define RT_SCHEDULE_POLICY_FIRST 1 // same as driver's schedule_policy


typedef struct tagEschedEventSummary {
int32_t pid; // dst PID
uint32_t grpId;
int32_t eventId; // only RT_MQ_SCHED_EVENT_QS_MSG is supported
uint32_t subeventId;
uint32_t msgLen;
char *msg;
uint32_t dstEngine; // dst system cpu type
int32_t policy; // RT_SCHEDULE_POLICY_ONLY or RT_SCHEDULE_POLICY_FIRST
} rtEschedEventSummary_t;

typedef struct tagEschedEventReply {
char *buf;
uint32_t bufLen;
uint32_t replyLen; // output, ack msg len, same with msgLen in halEschedAckEvent
} rtEschedEventReply_t;

#define RT_DEV_PROCESS_CP1 0
#define RT_DEV_PROCESS_CP2 1
#define RT_DEV_PROCESS_DEV_ONLY 2
#define RT_DEV_PROCESS_QS 3
#define RT_DEV_PROCESS_SIGN_LENGTH 49

typedef struct tagBindHostpidInfo {
int32_t hostPid;
uint32_t vfid;
uint32_t chipId;
int32_t mode; // online:0, offline:1
int32_t cpType; // type of custom-process, see RT_DEV_PROCESS_XXX
uint32_t len; // lenth of sign
char sign[RT_DEV_PROCESS_SIGN_LENGTH]; // sign of hostpid
} rtBindHostpidInfo_t;

#define RT_MEM_BUFF_MAX_CFG_NUM 64

typedef struct {
uint32_t cfgId; // cfg id, start from 0
uint32_t totalSize; // one zone total size
uint32_t blkSize; // blk size, 2^n (0, 2M]
uint32_t maxBufSize; // max size can alloc from zone
uint32_t pageType; // page type, small page / huge page
int32_t elasticEnable; // elastic enable
int32_t elasticRate;
int32_t elasticRateMax;
int32_t elasticHighLevel;
int32_t elasticLowLevel;
} rtMemZoneCfg_t;

typedef struct {
rtMemZoneCfg_t cfg[RT_MEM_BUFF_MAX_CFG_NUM];
}rtMemBuffCfg_t;

typedef void *rtMbufPtr_t;

/**
* @ingroup rt_mem_queue
* @brief init queue schedule
* @param [in] device the logical device id
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueInitQS(int32_t device);

/**
* @ingroup rt_mem_queue
* @brief create mbuf queue
* @param [in] device the logical device id
* @param [in] rtMemQueueAttr attribute of queue
* @param [out] qid queue id
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueCreate(int32_t device, const rtMemQueueAttr_t *queueAttr, uint32_t *qid);

/**
* @ingroup rt_mem_queue
* @brief destroy mbuf queue
* @param [in] device the logical device id
* @param [in] qid queue id
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueDestroy(int32_t device, uint32_t qid);

/**
* @ingroup rt_mem_queue
* @brief destroy mbuf queue init
* @param [in] device the logical device id
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueInit(int32_t device);

/**
* @ingroup rt_mem_queue
* @brief enqueu mbuf
* @param [in] device the logical device id
* @param [in] qid queue id
* @param [in] mbuf enqueue mbuf
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueEnQueue(int32_t device, uint32_t qid, void *mbuf);


/**
* @ingroup rt_mem_queue
* @brief enqueu mbuf
* @param [in] device the logical device id
* @param [in] qid queue id
* @param [out] mbuf dequeue mbuf
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueDeQueue(int32_t device, uint32_t qid, void **mbuf);

/**
* @ingroup rt_mem_queue
* @brief enqueu peek
* @param [in] device the logical device id
* @param [in] qid queue id
* @param [out] bufLen length of mbuf in queue
* @param [in] timeout peek timeout (ms), -1: wait all the time until peeking success
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueuePeek(int32_t device, uint32_t qid, size_t *bufLen, int32_t timeout);

/**
* @ingroup rt_mem_queue
* @brief enqueu buff
* @param [in] device the logical device id
* @param [in] qid queue id
* @param [in] inBuf enqueue buff
* @param [in] timeout enqueue timeout (ms), -1: wait all the time until enqueue success
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueEnQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *inBuf, int32_t timeout);

/**
* @ingroup rt_mem_queue
* @brief enqueu buff
* @param [in] device the logical device id
* @param [in] qid queue id
* @param [out] outBuf dequeue buff
* @param [in] timeout dequeue timeout (ms), -1: wait all the time until dequeue success
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueDeQueueBuff(int32_t device, uint32_t qid, rtMemQueueBuff_t *outBuf, int32_t timeout);


/**
* @ingroup rt_mem_queue
* @brief query queue status
* @param [in] device: the logical device id
* @param [in] cmd: query cmd
* @param [in] inBuff: input buff
* @param [in] inLen: the length of input
* @param [in|out] outBuff: output buff
* @param [in|out] outLen: the length of output
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueQuery(int32_t device, rtMemQueueQueryCmd_t cmd, const void *inBuff, uint32_t inLen,
void *outBuff, uint32_t *outLen);

/**
* @ingroup rt_mem_queue
* @brief grant queue
* @param [in] device: logic devid
* @param [in] qid: queue id
* @param [in] pid: pid
* @param [in] attr: queue share attr
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueGrant(int32_t device, uint32_t qid, int32_t pid, rtMemQueueShareAttr_t *attr);

/**
* @ingroup rt_mem_queue
* @brief attach queue
* @param [in] device: logic devid
* @param [in] qid: queue id
* @param [in] timeOut: timeOut
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMemQueueAttach(int32_t device, uint32_t qid, int32_t timeOut);

/**
* @ingroup rt_mem_queue
* @brief Commit the event to a specific process
* @param [in] device: logic devid
* @param [in] event: event summary info
* @param [out] ack: event reply info
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtEschedSubmitEventSync(int32_t device, rtEschedEventSummary_t *event,
rtEschedEventReply_t *ack);

/**
* @ingroup rt_mem_queue
* @brief query device proccess id
* @param [in] info: see struct rtBindHostpidInfo_t
* @param [out] devPid: device proccess id
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtQueryDevPid(rtBindHostpidInfo_t *info, int32_t *devPid);

/**
* @ingroup rt_mem_queue
* @brief device buff init
* @param [in] cfg, init cfg
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg);

/**
* @ingroup rt_mem_queue
* @brief alloc buff
* @param [out] buff: buff addr alloced
* @param [in] size: The amount of memory space requested
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size);

/**
* @ingroup rt_mem_queue
* @brief free buff
* @param [in] buff: buff addr to be freed
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf);

/**
* @ingroup rt_mem_queue
* @brief get Data addr of Mbuf
* @param [in] mbuf: Mbuf addr
* @param [out] buf: Mbuf data addr
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf);

/**
* @ingroup rt_mem_queue
* @brief get total Buffer size of Mbuf
* @param [in] mbuf: Mbuf addr
* @param [out] totalSize: total buffer size of Mbuf
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize);

/**
* @ingroup rt_mem_queue
* @brief Get the address and length of its user_data from the specified Mbuf
* @param [in] mbuf: Mbuf addr
* @param [out] priv: address of its user_data
* @param [out] size: length of its user_data
* @return RT_ERROR_NONE for ok
*/
RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf, void **priv, uint64_t *size);

// mem group
typedef struct {
uint64_t maxMemSize; // max buf size in grp, in KB. = 0 means no limit
} rtMemGrpConfig_t;

typedef struct {
uint32_t admin : 1; // admin permission, can add other proc to grp
uint32_t read : 1; // read only permission
uint32_t write : 1; // read and write permission
uint32_t alloc : 1; // alloc permission (have read and write permission)
uint32_t rsv : 28;
} rtMemGrpShareAttr_t;

#define RT_MEM_GRP_QUERY_GROUPS_OF_PROCESS 1 // query process all grp

typedef struct {
int32_t pid;
} rtMemGrpQueryByProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS

typedef union {
rtMemGrpQueryByProc_t grpQueryByProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
} rtMemGrpQueryInput_t;

#define RT_MEM_GRP_NAME_LEN 32 // it must be same as driver define BUFF_GRP_NAME_LEN

typedef struct {
char groupName[RT_MEM_GRP_NAME_LEN]; // group name
rtMemGrpShareAttr_t attr; // process in group attribute
} rtMemGrpOfProc_t; // cmd: GRP_QUERY_GROUPS_OF_PROCESS

typedef struct {
rtMemGrpOfProc_t *groupsOfProc; // cmd: GRP_QUERY_GROUPS_OF_PROCESS
size_t maxNum; // max number of result
size_t resultNum; // if the number of results exceeds 'maxNum', only 'maxNum' results are filled in buffer
} rtMemGrpQueryOutput_t;

/**
* @ingroup rt_mem_queue
* @brief create mem group
* @attention null
* @param [in] name, group name
* @param [in] cfg, group cfg
* @return 0 for success, others for fail
*/
RTS_API rtError_t rtMemGrpCreate(const char *name, const rtMemGrpConfig_t *cfg);

/**
* @ingroup rt_mem_queue
* @brief add process to group
* @param [in] name, group name
* @param [in] pid, process id
* @param [in] attr, process permission in group
* @return 0 for success, others for fail
*/
RTS_API rtError_t rtMemGrpAddProc(const char *name, int32_t pid, const rtMemGrpShareAttr_t *attr);

/**
* @ingroup rt_mem_queue
* @brief attach proccess to check permission in group
* @param [in] name, group name
* @param [in] timeout, time out ms
* @return 0 for success, others for fail
*/
RTS_API rtError_t rtMemGrpAttach(const char *name, int32_t timeout);

/**
* @ingroup rt_mem_queue
* @brief buff group query
* @param [in] cmd, cmd type
* @param [in] input, query input
* @param [in|out] output, query output
* @return 0 for success, others for fail
*/
RTS_API rtError_t rtMemGrpQuery(int32_t cmd, const rtMemGrpQueryInput_t *input, rtMemGrpQueryOutput_t *output);

#if defined(__cplusplus)
}
#endif
#endif // CCE_RUNTIME_RT_MEM_QUEUE_H

+ 12
- 12
third_party/fwkacllib/inc/runtime/rt_model.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_MODEL_H__
#define __CCE_RUNTIME_MODEL_H__
#ifndef CCE_RUNTIME_RT_MODEL_H
#define CCE_RUNTIME_RT_MODEL_H

#include "base.h"

@@ -42,7 +42,7 @@ typedef enum tagModelTaskType {
RT_MODEL_TASK_NOTIFY_WAIT,
RT_MODEL_TASK_REDUCE_ASYNC,
RT_MODEL_TASK_RDMA_SEND,
RT_MODEL_TASK_EVENT_RESET = 18,
RT_MODEL_TASK_EVENT_RESET,
RT_MODEL_TASK_MODEL_END_GRAPH,
RT_MODEL_TASK_STREAM_SWITCH_N,
RT_MODEL_TASK_RDMA_DB_SEND,
@@ -66,16 +66,16 @@ typedef enum tagModelQueueFlag {
RT_MODEL_OUTPUT_QUEUE = 1
} rtModelQueueFlag_t;

#define EXECUTOR_NONE ((uint32_t)0x0)
#define EXECUTOR_TS ((uint32_t)0x01)
#define EXECUTOR_AICPU ((uint32_t)0x02)
#define EXECUTOR_NONE (0x0U)
#define EXECUTOR_TS (0x01U)
#define EXECUTOR_AICPU (0x02U)

/*
* @ingroup rt_model
* @brief debug flag for kernel exception dump
*/
#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1 << 0)
#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1 << 1)
#define RT_DEBUG_FLAG_AICORE_OVERFLOW (0x1U << 0U)
#define RT_DEBUG_FLAG_ATOMIC_ADD_OVERFLOW (0x1U << 1U)

/**
* @ingroup
@@ -392,12 +392,12 @@ RTS_API rtError_t rtModelExecute(rtModel_t model, rtStream_t stream, uint32_t fl
* @ingroup rt_model
* @brief get model the last persist task id
* @param [in] model model to execute
* @param [out] taskid last task id of the model
* @param [out] streamid last steam id of the model
* @param [out] taskId last task id of the model
* @param [out] streamId last steam id of the model
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskid, uint32_t *streamid);
RTS_API rtError_t rtModelGetTaskId(rtModel_t model, uint32_t *taskId, uint32_t *streamId);

/**
* @ingroup rt_model
@@ -495,4 +495,4 @@ RTS_API rtError_t rtDebugUnRegister(rtModel_t model);
}
#endif

#endif // __CCE_RUNTIME_MODEL_H__
#endif // CCE_RUNTIME_RT_MODEL_H

+ 4
- 4
third_party/fwkacllib/inc/runtime/rt_stars.h View File

@@ -1,10 +1,10 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* Description:
* Description: the definition of stars
*/

#ifndef __CCE_RUNTIME_STARS_H
#define __CCE_RUNTIME_STARS_H
#ifndef CCE_RUNTIME_RT_STARS_H
#define CCE_RUNTIME_RT_STARS_H

#include "base.h"

@@ -84,4 +84,4 @@ RTS_API rtError_t rtCdqEnQueuePtrMode(const char *queName, uint32_t cdqeIndex, c

}
#endif
#endif // __CCE_RUNTIME_STARS_H
#endif // CCE_RUNTIME_RT_STARS_H

+ 3
- 3
third_party/fwkacllib/inc/runtime/rt_stars_define.h View File

@@ -3,8 +3,8 @@
* Description: the definition of stars
*/

#ifndef __CCE_RUNTIME_STARS_DEFINE__H
#define __CCE_RUNTIME_STARS_DEFINE__H
#ifndef CCE_RUNTIME_RT_STARS_DEFINE_H
#define CCE_RUNTIME_RT_STARS_DEFINE_H

#include "base.h"

@@ -88,4 +88,4 @@ typedef struct tagFftsPlusSqe {
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif
#endif // __CCE_RUNTIME_STARS_DEFINE__H
#endif // CCE_RUNTIME_RT_STARS_DEFINE_H

+ 15
- 15
third_party/fwkacllib/inc/runtime/stream.h View File

@@ -14,8 +14,8 @@
* limitations under the License.
*/

#ifndef __CCE_RUNTIME_STREAM_H__
#define __CCE_RUNTIME_STREAM_H__
#ifndef CCE_RUNTIME_STREAM_H
#define CCE_RUNTIME_STREAM_H

#include "base.h"
#include "event.h"
@@ -28,27 +28,27 @@ extern "C" {
* @ingroup stream_flags
* @brief stream op bit flags
*/
#define RT_STREAM_DEFAULT (0x00)
#define RT_STREAM_PERSISTENT (0x01)
#define RT_STREAM_FORCE_COPY (0x02)
#define RT_STREAM_HUGE (0x04)
#define RT_STREAM_AICPU (0x08)
#define RT_STREAM_FORBIDDEN_DEFAULT (0x10)
#define RT_STREAM_HEAD (0x20)
#define RT_STREAM_PRIMARY_DEFAULT (0x40)
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80)
#define RT_STREAM_DEFAULT (0x00U)
#define RT_STREAM_PERSISTENT (0x01U)
#define RT_STREAM_FORCE_COPY (0x02U)
#define RT_STREAM_HUGE (0x04U)
#define RT_STREAM_AICPU (0x08U)
#define RT_STREAM_FORBIDDEN_DEFAULT (0x10U)
#define RT_STREAM_HEAD (0x20U)
#define RT_STREAM_PRIMARY_DEFAULT (0x40U)
#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80U)

/**
* @ingroup stream_type
* @brief stream type
*/
#define RT_NORMAL_STREAM (0x00)
#define RT_HUGE_STREAM (0x01)
#define RT_NORMAL_STREAM (0x00U)
#define RT_HUGE_STREAM (0x01U)

/**
* priority level default value when create a stream
*/
#define RT_STREAM_PRIORITY_DEFAULT (0)
#define RT_STREAM_PRIORITY_DEFAULT (0U)

/**
* @ingroup dvrt_stream
@@ -215,4 +215,4 @@ RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);
}
#endif

#endif // __CCE_RUNTIME_STREAM_H__
#endif // CCE_RUNTIME_STREAM_H

+ 59
- 59
third_party/fwkacllib/inc/toolchain/plog.h View File

@@ -1,59 +1,59 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _PLOG_H_
#define _PLOG_H_
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#ifndef LINUX
#define LINUX 0
#endif // LINUX
#ifndef WIN
#define WIN 1
#endif
#ifndef OS_TYPE
#define OS_TYPE 0
#endif // OS_TYPE
#if (OS_TYPE == LINUX)
#define DLL_EXPORT __attribute__((visibility("default")))
#else
#define DLL_EXPORT _declspec(dllexport)
#endif
/**
* @ingroup plog
* @brief DlogReportInitialize: init log in service process before all device setting.
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogReportInitialize();
/**
* @ingroup plog
* @brief DlogReportFinalize: release log resource in service process after all device reset.
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogReportFinalize();
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // D_PLOG_H_
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _PLOG_H_
#define _PLOG_H_
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
#ifndef LINUX
#define LINUX 0
#endif // LINUX
#ifndef WIN
#define WIN 1
#endif
#ifndef OS_TYPE
#define OS_TYPE 0
#endif // OS_TYPE
#if (OS_TYPE == LINUX)
#define DLL_EXPORT __attribute__((visibility("default")))
#else
#define DLL_EXPORT _declspec(dllexport)
#endif
/**
* @ingroup plog
* @brief DlogReportInitialize: init log in service process before all device setting.
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogReportInitialize(void);
/**
* @ingroup plog
* @brief DlogReportFinalize: release log resource in service process after all device reset.
* @return: 0: SUCCEED, others: FAILED
*/
DLL_EXPORT int DlogReportFinalize(void);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // D_PLOG_H_

+ 28
- 28
third_party/fwkacllib/inc/toolchain/prof_acl_api.h View File

@@ -22,18 +22,7 @@
#define PROF_TASK_TIME 0x00000002
#define PROF_AICORE_METRICS 0x00000004
#define PROF_AICPU_TRACE 0x00000008
#define PROF_MODEL_EXECUTE 0x00000010
#define PROF_RUNTIME_API 0x00000020
#define PROF_RUNTIME_TRACE 0x00000040
#define PROF_SCHEDULE_TIMELINE 0x00000080
#define PROF_SCHEDULE_TRACE 0x00000100
#define PROF_AIVECTORCORE_METRICS 0x00000200
#define PROF_SUBTASK_TIME 0x00000400

#define PROF_TRAINING_TRACE 0x00000800
#define PROF_HCCL_TRACE 0x00001000

#define PROF_TASK_TRACE 0x00001852
#define PROF_L2CACHE 0x00000010

// system profilinig switch
#define PROF_CPU 0x00010000
@@ -44,6 +33,19 @@
#define PROF_SYS_AICORE_SAMPLE 0x00200000
#define PROF_AIVECTORCORE_SAMPLE 0x00400000

#define PROF_MODEL_EXECUTE 0x0000001000000
#define PROF_RUNTIME_API 0x0000002000000
#define PROF_RUNTIME_TRACE 0x0000004000000
#define PROF_SCHEDULE_TIMELINE 0x0000008000000
#define PROF_SCHEDULE_TRACE 0x0000010000000
#define PROF_AIVECTORCORE_METRICS 0x0000020000000
#define PROF_SUBTASK_TIME 0x0000040000000

#define PROF_TRAINING_TRACE 0x0000080000000
#define PROF_HCCL_TRACE 0x0000100000000

#define PROF_TASK_TRACE 0x0000185000002

#define PROF_MODEL_LOAD 0x8000000000000000

// DataTypeConfig MASK
@@ -51,16 +53,7 @@
#define PROF_TASK_TIME_MASK 0x00000002
#define PROF_AICORE_METRICS_MASK 0x00000004
#define PROF_AICPU_TRACE_MASK 0x00000008
#define PROF_MODEL_EXECUTE_MASK 0x00000010
#define PROF_RUNTIME_API_MASK 0x00000020
#define PROF_RUNTIME_TRACE_MASK 0x00000040
#define PROF_SCHEDULE_TIMELINE_MASK 0x00000080
#define PROF_SCHEDULE_TRACE_MASK 0x00000100
#define PROF_AIVECTORCORE_METRICS_MASK 0x00000200
#define PROF_SUBTASK_TIME_MASK 0x00000400

#define PROF_TRAINING_TRACE_MASK 0x00000800
#define PROF_HCCL_TRACE_MASK 0x00001000
#define PROF_L2CACHE_MASK 0x00000010

// system profilinig mask
#define PROF_CPU_MASK 0x00010000
@@ -71,20 +64,27 @@
#define PROF_SYS_AICORE_SAMPLE_MASK 0x00200000
#define PROF_AIVECTORCORE_SAMPLE_MASK 0x00400000

#define PROF_MODEL_LOAD_MASK 0x8000000000000000
#define PROF_MODEL_EXECUTE_MASK 0x0000001000000
#define PROF_RUNTIME_API_MASK 0x0000002000000
#define PROF_RUNTIME_TRACE_MASK 0x0000004000000
#define PROF_SCHEDULE_TIMELINE_MASK 0x0000008000000
#define PROF_SCHEDULE_TRACE_MASK 0x0000010000000
#define PROF_AIVECTORCORE_METRICS_MASK 0x0000020000000
#define PROF_SUBTASK_TIME_MASK 0x0000040000000

#ifndef OS_TYPE
#define OS_TYPE 0
#endif // OS_TYPE
#define PROF_TRAINING_TRACE_MASK 0x0000080000000
#define PROF_HCCL_TRACE_MASK 0x0000100000000

#define PROF_MODEL_LOAD_MASK 0x8000000000000000

#if (OS_TYPE != LINUX)
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
#define MSVP_PROF_API __declspec(dllexport)
#else
#define MSVP_PROF_API __attribute__((visibility("default")))
#endif

#include <cstdint>
#include <stddef.h>
#include <cstddef>

namespace Msprofiler {
namespace Api {


+ 1
- 1
third_party/fwkacllib/inc/toolchain/prof_callback.h View File

@@ -24,7 +24,7 @@
extern "C" {
#endif // __cplusplus

#if (OS_TYPE != LINUX)
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
#define MSVP_PROF_API __declspec(dllexport)
#else
#define MSVP_PROF_API __attribute__((visibility("default")))


+ 1
- 4
third_party/fwkacllib/inc/toolchain/prof_reporter.h View File

@@ -16,11 +16,8 @@

#ifndef MSPROF_ENGINE_PROF_REPORTER_H_
#define MSPROF_ENGINE_PROF_REPORTER_H_
#ifndef OS_TYPE
#define OS_TYPE 0
#endif // OS_TYPE

#if (OS_TYPE != LINUX)
#if (defined(_WIN32) || defined(_WIN64) || defined(_MSC_VER))
#define MSVP_PROF_API __declspec(dllexport)
#else
#define MSVP_PROF_API __attribute__((visibility("default")))


Loading…
Cancel
Save