Browse Source

upgrade Ascend package 23 Dec 21

pull/2102/head
yanghaoran 3 years ago
parent
commit
82e6f4774f
10 changed files with 751 additions and 99 deletions
  1. +11
    -1
      inc/framework/common/profiling_definitions.h
  2. +35
    -0
      inc/framework/omg/model_tool.h
  3. +1
    -1
      metadef
  4. +3
    -3
      third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
  5. +0
    -3
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
  6. +83
    -86
      third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
  7. +68
    -0
      third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
  8. +86
    -0
      third_party/fwkacllib/inc/ops/reduce_ops.h
  9. +14
    -5
      third_party/fwkacllib/inc/toolchain/prof_callback.h
  10. +450
    -0
      third_party/fwkacllib/inc/toolchain/prof_common.h

+ 11
- 1
inc/framework/common/profiling_definitions.h View File

@@ -22,6 +22,8 @@
#include <mutex>
#include <unordered_map>
#include "graph/profiler.h"
#include "external/ge/ge_api_types.h"
#include "toolchain/prof_callback.h"
namespace ge {
namespace profiling {
enum {
@@ -46,6 +48,7 @@ enum {
kCopyH2D,
kProfilingIndexEnd
};
constexpr uint64_t kInvalidHashId = 0ULL;

class ProfilingContext {
public:
@@ -100,9 +103,16 @@ class ProfilingContext {
}

int64_t RegisterString(const std::string &str);
int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str);
void UpdateElementHashId(MsprofReporterCallback reporter_callback);
static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str,
uint64_t &hash_id);
size_t GetRegisterStringNum() const {
return strings_to_index_.size();
}

private:
void RegisterString(int64_t index, const std::string &str);
void UpdateHashByStr(const std::string &str, const uint64_t hash);
void Init();

private:


+ 35
- 0
inc/framework/omg/model_tool.h View File

@@ -0,0 +1,35 @@
/**
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_OMG_MODEL_TOOL_H_
#define INC_FRAMEWORK_OMG_MODEL_TOOL_H_

#include <memory>
#include <string>

#include "framework/common/debug/ge_log.h"
#include "proto/ge_ir.pb.h"

namespace ge {
class GE_FUNC_VISIBILITY ModelTool {
public:
static Status GetModelInfoFromOm(const char *model_file, ge::proto::ModelDef &model_def, uint32_t &modeldef_size);

static Status GetModelInfoFromPbtxt(const char *model_file, ge::proto::ModelDef &model_def);
};
} // namespace ge

#endif // INC_FRAMEWORK_OMG_MODEL_TOOL_H_

+ 1
- 1
metadef

@@ -1 +1 @@
Subproject commit 2659f49dcb14c0773e10e17ee9896b7be4d8e7be
Subproject commit dc5ac26aac4c49b4e72cd91d4e6d6a57bbe03af4

+ 3
- 3
third_party/fwkacllib/inc/cce/fwk_adpt_struct.h View File

@@ -145,9 +145,9 @@ struct ResultSummary {

#pragma pack(push, 1)
struct AsyncWait {
uint8_t waitType; // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait
uint32_t waitId; // wait id, GE refresh
uint32_t timeOut; // reserved
uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
uint32_t waitId; // wait id, GE refresh
uint32_t timeOut; // reserved
uint64_t reserved;
};
#pragma pack(pop)


+ 0
- 3
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h View File

@@ -79,9 +79,6 @@ typedef long LONG;
#define MMPA_THREAD_SCHED_OTHER SCHED_OTHER
#define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN

#define MMPA_PATH_SEPARATOR_STR "/"
#define MMPA_PATH_SEPARATOR_CHAR '/'

#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER

#define MMPA_MAX_NI 19


+ 83
- 86
third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h View File

@@ -1,86 +1,83 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MMPA_TYPEDEF_WIN_H
#define MMPA_TYPEDEF_WIN_H
#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)
#define HANDLE_INVALID_VALUE (-1)
#define INVALID_SOCKET_HANDLE INVALID_SOCKET
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)
#define MMPA_ONE_THOUSAND 1000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define SUMMER_TIME_OR_NOT (-1)
#define MMPA_ZERO 0
#define MMPA_VALUE_ONE 1
#define MMPA_SOCKET_MAIN_EDITION 2
#define MMPA_SOCKET_SECOND_EDITION 0
#define MMPA_PIPE_BUF_SIZE 1024
#define MMPA_MAX_SCANDIR_COUNT 1024
#define MAX_IOVEC_SIZE 32
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
#define MMPA_MIN_OS_VERSION_SIZE 64
#define MMPA_MAX_NI 19
#define MMPA_MIDDLE_NI 5
#define MMPA_LOW_NI (-5)
#define MMPA_MIN_NI (-20)
#define MMPA_MAX_FILE 128
#define MMPA_PATH_SEPARATOR_STR "\\"
#define MMPA_PATH_SEPARATOR_CHAR '\\'
#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIDDLE_THREAD_PIO 66
#define MMPA_LOW_THREAD_PIO 33
#define MMPA_MIN_THREAD_PIO 1
#define MMPA_THREAD_SCHED_RR 0
#define MMPA_THREAD_SCHED_FIFO 0
#define MMPA_THREAD_SCHED_OTHER 0
#define MMPA_THREAD_MIN_STACK_SIZE 0
#define MM_MUTEX_INITIALIZER NULL
#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // _MMPA_TYPEDEF_WIN_H_
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MMPA_TYPEDEF_WIN_H
#define MMPA_TYPEDEF_WIN_H

#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif // __cpluscplus
#endif // __cpluscplus

#ifndef FALSE
#define FALSE 0
#endif

#ifndef TRUE
#define TRUE 1
#endif

#define EN_OK 0
#define EN_ERR 1
#define EN_ERROR (-1)
#define EN_INVALID_PARAM (-2)
#define EN_TIMEOUT (-3)

#define HANDLE_INVALID_VALUE (-1)
#define INVALID_SOCKET_HANDLE INVALID_SOCKET
#define MMPA_MEM_MAX_LEN (0x7fffffff)
#define MMPA_PROCESS_ERROR (0x7fffffff)

#define MMPA_ONE_THOUSAND 1000
#define MMPA_COMPUTER_BEGIN_YEAR 1900
#define SUMMER_TIME_OR_NOT (-1)
#define MMPA_ZERO 0
#define MMPA_VALUE_ONE 1
#define MMPA_SOCKET_MAIN_EDITION 2
#define MMPA_SOCKET_SECOND_EDITION 0
#define MMPA_PIPE_BUF_SIZE 1024
#define MMPA_MAX_SCANDIR_COUNT 1024
#define MAX_IOVEC_SIZE 32
#define MMPA_PIPE_COUNT 2
#define MMPA_THREADNAME_SIZE 16
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
#define MMPA_MIN_OS_VERSION_SIZE 64

#define MMPA_MAX_NI 19
#define MMPA_MIDDLE_NI 5
#define MMPA_LOW_NI (-5)
#define MMPA_MIN_NI (-20)
#define MMPA_MAX_FILE 128

#define MMPA_MAX_THREAD_PIO 99
#define MMPA_MIDDLE_THREAD_PIO 66
#define MMPA_LOW_THREAD_PIO 33
#define MMPA_MIN_THREAD_PIO 1

#define MMPA_THREAD_SCHED_RR 0
#define MMPA_THREAD_SCHED_FIFO 0
#define MMPA_THREAD_SCHED_OTHER 0
#define MMPA_THREAD_MIN_STACK_SIZE 0

#define MM_MUTEX_INITIALIZER NULL

#ifdef __cplusplus
#if __cplusplus
}
#endif // __cpluscplus
#endif // __cpluscplus
#endif // _MMPA_TYPEDEF_WIN_H_

+ 68
- 0
third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h View File

@@ -143,6 +143,74 @@ REG_OP(BatchNorm)
.OP_END_FACTORY_REG(BatchNorm)

/**
* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device,
* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized,
* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated.

* @par Inputs:
* include:
* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32.
* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32.
* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32.
* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32.
* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n

* @par Attributes:
* Two Attributes, including:
* @li momentum: A optional float. Defaults to 0.01. \n
* @li epsilon: An optional float. Defaults to 0.00001. \n

* @par Outputs:
* include:
* @li invert_std: A Tensor. It's inverse of total variance.
* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n

* @par Third-party framework compatibility
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
*/
REG_OP(SyncBatchNormGatherStatsWithCounts)
.INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(momentum, Float, 0.1)
.ATTR(epsilon, Float, 0.001)
.OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts)

/**
* @brief update running_mean.

* @par Inputs:
* include:
* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n

* @par Attributes:
* One Attribute, including:
* @li momentum: A optional float. Defaults to 0.01. \n

* @par Outputs:
* include:
* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n

* @par Third-party framework compatibility
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
*/
REG_OP(SyncBNTrainingUpdate)
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16}))
.ATTR(momentum, Float, 0.1)
.OP_END_FACTORY_REG(SyncBNTrainingUpdate)

/**
*@brief part of SyncBatchNormBackward . \n

*@par Inputs:


+ 86
- 0
third_party/fwkacllib/inc/ops/reduce_ops.h View File

@@ -516,6 +516,34 @@ REG_OP(ReduceSumD)
.OP_END_FACTORY_REG(ReduceSumD)

/**
*@brief Calculate the total mean based on the mean of each device . \n

*@par Inputs:
* Three inputs, including:
*@li x: A Tensor. Must be one of the following types: float16, float32 .
*@li count: A Tensor. Must be one of the following types: float16, float32 .
*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n

*@par Attributes:
*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce.
*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n

*@par Outputs:
*y: The reduced tensor. Has the same type and format as input "x" . \n

*@par Third-party framework compatibility
* Compatible with the TensorFlow operator Sum.
*/
REG_OP(ReduceMeanWithCount)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16}))
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR(axes, ListInt)
.ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceMeanWithCount)

/**
*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n

*@par Inputs:
@@ -1363,6 +1391,64 @@ REG_OP(ReduceStdV2Update)
.ATTR(unbiased, Bool, true)
.ATTR(keepdim, Bool, false)
.OP_END_FACTORY_REG(ReduceStdV2Update)

/**
*@brief Computes the log and sum and exp of elements across dimensions of a tensor.
* Reduces "x" along the dimensions given in "axes".
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
* entry in "axes". If "keep_dims" is true, the reduced dimensions
* are retained with length 1.
*
*@par Inputs:
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float32, float16, int32, int64, uint32, uint64, double
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
*
*@par Attributes:
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
*
*@par Outputs:
*y: The reduced tensor. Has the same type and format as input "x" . \n
*
*@par Third-party framework compatibility
* Compatible with the Onnx operator ReduceLogSumExp.
*/
REG_OP(ReduceLogSumExp)
.INPUT(x, TensorType::NumberType())
.INPUT(axes, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::NumberType())
.ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceLogSumExp)

/**
*@brief Computes the log and sum of elements across dimensions of a tensor.
* Reduces "x" along the dimensions given in "axes".
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
* entry in "axes". If "keep_dims" is true, the reduced dimensions
* are retained with length 1.
*
*@par Inputs:
* Two inputs, including:
*@li x: A Tensor. Must be one of the following types:
* float32, float16, int32, int64, uint32, uint64, double
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
*
*@par Attributes:
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
*
*@par Outputs:
*y: The reduced tensor. Has the same type and format as input "x" . \n
*
*@par Third-party framework compatibility
* Compatible with the Onnx operator ReduceLogSum.
*/
REG_OP(ReduceLogSum)
.INPUT(x, TensorType::NumberType())
.INPUT(axes, TensorType::IndexNumberType())
.OUTPUT(y, TensorType::NumberType())
.ATTR(keep_dims, Bool, false)
.OP_END_FACTORY_REG(ReduceLogSum)
} //namespace ge

#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_

+ 14
- 5
third_party/fwkacllib/inc/toolchain/prof_callback.h View File

@@ -1,8 +1,17 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
* Description: handle perf data
* Author: xp
* Create: 2019-10-13
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MSPROFILER_PROF_CALLBACK_H_


+ 450
- 0
third_party/fwkacllib/inc/toolchain/prof_common.h View File

@@ -0,0 +1,450 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
* Description: handle perf data
* Author: Huawei Technologies Co., Ltd.
* Create: 2019-10-13
*/
#ifndef MSPROFILER_PROF_COMMON_H_
#define MSPROFILER_PROF_COMMON_H_

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

#include <stdint.h>

#define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a

enum MsprofDataTag {
MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19
MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39
MSPROF_GE_DATA_TAG_FUSION = 21,
MSPROF_GE_DATA_TAG_INFER = 22,
MSPROF_GE_DATA_TAG_TASK = 23,
MSPROF_GE_DATA_TAG_TENSOR = 24,
MSPROF_GE_DATA_TAG_STEP = 25,
MSPROF_GE_DATA_TAG_ID_MAP = 26,
MSPROF_GE_DATA_TAG_HOST_SCH = 27,
MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59
MSPROF_RUNTIME_DATA_TAG_TRACK = 41,
MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79
MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99
MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119
MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139
MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t
};

/**
* @brief struct of mixed data
*/
#define MSPROF_MIX_DATA_RESERVE_BYTES 7
#define MSPROF_MIX_DATA_STRING_LEN 120
enum MsprofMixDataType {
MSPROF_MIX_DATA_HASH_ID = 0,
MSPROF_MIX_DATA_STRING,
};
struct MsprofMixData {
uint8_t type; // MsprofMixDataType
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
union {
uint64_t hashId;
char dataStr[MSPROF_MIX_DATA_STRING_LEN];
} data;
};
using MixData = struct MsprofMixData;

/**
* @brief profiling command info
*/
#define MSPROF_MAX_DEV_NUM 64
struct MsprofCommandHandle {
uint64_t profSwitch;
uint64_t profSwitchHi;
uint32_t devNums;
uint32_t devIdList[MSPROF_MAX_DEV_NUM];
uint32_t modelId;
uint32_t type;
};

/**
* @brief struct of data reported by acl
*/
#define MSPROF_ACL_DATA_RESERVE_BYTES 32
#define MSPROF_ACL_API_NAME_LEN 64
enum MsprofAclApiType {
MSPROF_ACL_API_TYPE_OP = 1,
MSPROF_ACL_API_TYPE_MODEL,
MSPROF_ACL_API_TYPE_RUNTIME,
MSPROF_ACL_API_TYPE_OTHERS,
};
struct MsprofAclProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_ACL_DATA_TAG;
uint32_t apiType; // enum MsprofAclApiType
uint64_t beginTime;
uint64_t endTime;
uint32_t processId;
uint32_t threadId;
char apiName[MSPROF_ACL_API_NAME_LEN];
uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by GE
*/
#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104
struct MsprofGeProfModelLoadData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD;
uint32_t modelId;
MixData modelName;
uint64_t startTime;
uint64_t endTime;
uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8
#define MSPROF_GE_FUSION_OP_NUM 8
struct MsprofGeProfFusionData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION;
uint32_t modelId;
MixData fusionName;
uint64_t inputMemSize;
uint64_t outputMemSize;
uint64_t weightMemSize;
uint64_t workspaceMemSize;
uint64_t totalMemSize;
uint64_t fusionOpNum;
uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM];
uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64
struct MsprofGeProfInferData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER;
uint32_t modelId;
MixData modelName;
uint32_t requestId;
uint32_t threadId;
uint64_t inputDataStartTime;
uint64_t inputDataEndTime;
uint64_t inferStartTime;
uint64_t inferEndTime;
uint64_t outputDataStartTime;
uint64_t outputDataEndTime;
uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16
#define MSPROF_GE_OP_TYPE_LEN 56
enum MsprofGeTaskType {
MSPROF_GE_TASK_TYPE_AI_CORE = 0,
MSPROF_GE_TASK_TYPE_AI_CPU,
MSPROF_GE_TASK_TYPE_AIV,
};
enum MsprofGeShapeType {
MSPROF_GE_SHAPE_TYPE_STATIC = 0,
MSPROF_GE_SHAPE_TYPE_DYNAMIC,
};
struct MsprofGeOpType {
uint8_t type; // MsprofMixDataType
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
union {
uint64_t hashId;
char dataStr[MSPROF_GE_OP_TYPE_LEN];
} data;
};
struct MsprofGeProfTaskData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK;
uint32_t taskType; // MsprofGeTaskType
MixData opName;
MsprofGeOpType opType;
uint64_t curIterNum;
uint64_t timeStamp;
uint32_t shapeType; // MsprofGeShapeType
uint32_t blockDims;
uint32_t modelId;
uint32_t streamId;
uint32_t taskId;
uint32_t threadId;
uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8
#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8
#define MSPROF_GE_TENSOR_DATA_NUM 5
enum MsprofGeTensorType {
MSPROF_GE_TENSOR_TYPE_INPUT = 0,
MSPROF_GE_TENSOR_TYPE_OUTPUT,
};
struct MsprofGeTensorData {
uint32_t tensorType; // MsprofGeTensorType
uint32_t format;
uint32_t dataType;
uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN];
};

struct MsprofGeProfTensorData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR;
uint32_t modelId;
uint64_t curIterNum;
uint32_t streamId;
uint32_t taskId;
uint32_t tensorNum;
MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM];
uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27
enum MsprofGeStepTag {
MSPROF_GE_STEP_TAG_BEGIN = 0,
MSPROF_GE_STEP_TAG_END,
};
struct MsprofGeProfStepData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP;
uint32_t modelId;
uint32_t streamId;
uint32_t taskId;
uint64_t timeStamp;
uint64_t curIterNum;
uint32_t threadId;
uint8_t tag; // MsprofGeStepTag
uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6
struct MsprofGeProfIdMapData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP;
uint32_t graphId;
uint32_t modelId;
uint32_t sessionId;
uint64_t timeStamp;
uint16_t mode;
uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES];
};

#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24
struct MsprofGeProfHostSchData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH;
uint32_t threadId; // record in start event
uint64_t element;
uint64_t event;
uint64_t startTime; // record in start event
uint64_t endTime; // record in end event
uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by RunTime
*/
#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106
#define MSPROF_RUNTIME_TASK_ID_NUM 10
#define MSPROF_RUNTIME_API_NAME_LEN 64
struct MsprofRuntimeProfApiData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API;
uint32_t threadId;
uint64_t entryTime;
uint64_t exitTime;
uint64_t dataSize;
uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN];
uint32_t retCode;
uint32_t streamId;
uint32_t taskNum;
uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM];
uint16_t memcpyDirection;
uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES];
};

#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10
#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32
struct MsprofRuntimeProfTrackData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK;
uint32_t threadId;
uint64_t timeStamp;
char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN];
uint32_t taskId;
uint16_t streamId;
uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by RunTime
*/
#define MSPROF_AICPU_DATA_RESERVE_BYTES 9
struct MsprofAicpuProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_AICPU_DATA_TAG;
uint16_t streamId;
uint16_t taskId;
uint64_t runStartTime;
uint64_t runStartTick;
uint64_t computeStartTime;
uint64_t memcpyStartTime;
uint64_t memcpyEndTime;
uint64_t runEndTime;
uint64_t runEndTick;
uint32_t threadId;
uint32_t deviceId;
uint64_t submitTick;
uint64_t scheduleTick;
uint64_t tickBeforeRun;
uint64_t tickAfterRun;
uint32_t kernelType;
uint32_t dispatchTime;
uint32_t totalTime;
uint16_t fftsThreadId;
uint8_t version;
uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by DP
*/
#define MSPROF_DP_DATA_RESERVE_BYTES 16
#define MSPROF_DP_DATA_ACTION_LEN 16
#define MSPROF_DP_DATA_SOURCE_LEN 64
struct MsprofDpProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_DP_DATA_TAG;
uint32_t rsv; // Ensure 8-byte alignment
uint64_t timeStamp;
char action[MSPROF_DP_DATA_ACTION_LEN];
char source[MSPROF_DP_DATA_SOURCE_LEN];
uint64_t index;
uint64_t size;
uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES];
};

/**
* @brief struct of data reported by HCCL
*/
#pragma pack(4)
struct MsprofHcclProfNotify {
uint32_t taskID;
uint64_t notifyID;
uint32_t stage;
uint32_t remoteRank;
uint32_t transportType;
uint32_t role; // role {0: dst, 1:src}
double durationEstimated;
};

struct MsprofHcclProfReduce {
uint32_t taskID;
uint64_t src;
uint64_t dst;
uint64_t size;
uint32_t op; // {0: sum, 1: mul, 2: max, 3: min}
uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64}
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
uint32_t remoteRank;
uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL}
uint32_t role; // role {0: dst, 1:src}
double durationEstimated;
};

struct MsprofHcclProfRDMA {
uint32_t taskID;
uint64_t src;
uint64_t dst;
uint64_t size;
uint64_t notifyID;
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
uint32_t remoteRank;
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
uint32_t role; // role {0: dst, 1:src}
uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload}
double durationEstimated;
};

struct MsprofHcclProfMemcpy {
uint32_t taskID;
uint64_t src;
uint64_t dst;
uint64_t size;
uint64_t notifyID;
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
uint32_t remoteRank;
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
uint32_t role; // role {0: dst, 1:src}
double durationEstimated;
};

struct MsprofHcclProfStageStep {
uint32_t rank;
uint32_t rankSize;
};

struct MsprofHcclProfFlag {
uint64_t cclTag;
uint64_t groupName;
uint32_t localRank;
uint32_t workFlowMode;
};

/**
* @name MsprofHcclProfData
* @brief struct of data reported by hccl
*/
struct MsprofHcclProfData {
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
uint16_t dataTag = MSPROF_HCCL_DATA_TAG;
uint32_t planeID;
uint32_t deviceID;
uint32_t streamID;
double ts;
char name[16];
union {
MsprofHcclProfNotify notify;
MsprofHcclProfReduce reduce;
MsprofHcclProfStageStep stageStep;
MsprofHcclProfMemcpy forMemcpy;
MsprofHcclProfRDMA RDMA;
MsprofHcclProfFlag flag;
} args;
};
#pragma pack()

/**
* @name MsprofStampInfo
* @brief struct of data reported by msproftx
*/
struct MsprofStampInfo {
uint16_t magicNumber;
uint16_t dataTag;
uint32_t processId;
uint32_t threadId;
uint32_t category; //marker category
uint32_t eventType;
int32_t payloadType;
union PayloadValue //payload info for marker
{
uint64_t ullValue;
int64_t llValue;
double dValue;
uint32_t uiValue[2];
int32_t iValue[2];
float fValue[2];
} payload;
uint64_t startTime;
uint64_t endTime;
int32_t messageType;
char message[128];
uint8_t reserve0[4];
uint8_t reserve1[72];
};

#ifdef __cplusplus
}
#endif

#endif // MSPROFILER_PROF_COMMON_H_

Loading…
Cancel
Save