@@ -22,6 +22,8 @@ | |||
#include <mutex> | |||
#include <unordered_map> | |||
#include "graph/profiler.h" | |||
#include "external/ge/ge_api_types.h" | |||
#include "toolchain/prof_callback.h" | |||
namespace ge { | |||
namespace profiling { | |||
enum { | |||
@@ -46,6 +48,7 @@ enum { | |||
kCopyH2D, | |||
kProfilingIndexEnd | |||
}; | |||
constexpr uint64_t kInvalidHashId = 0ULL; | |||
class ProfilingContext { | |||
public: | |||
@@ -100,9 +103,16 @@ class ProfilingContext { | |||
} | |||
int64_t RegisterString(const std::string &str); | |||
int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); | |||
void UpdateElementHashId(MsprofReporterCallback reporter_callback); | |||
static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str, | |||
uint64_t &hash_id); | |||
size_t GetRegisterStringNum() const { | |||
return strings_to_index_.size(); | |||
} | |||
private: | |||
void RegisterString(int64_t index, const std::string &str); | |||
void UpdateHashByStr(const std::string &str, const uint64_t hash); | |||
void Init(); | |||
private: | |||
@@ -0,0 +1,35 @@ | |||
/** | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_OMG_MODEL_TOOL_H_ | |||
#define INC_FRAMEWORK_OMG_MODEL_TOOL_H_ | |||
#include <memory> | |||
#include <string> | |||
#include "framework/common/debug/ge_log.h" | |||
#include "proto/ge_ir.pb.h" | |||
namespace ge { | |||
class GE_FUNC_VISIBILITY ModelTool { | |||
public: | |||
static Status GetModelInfoFromOm(const char *model_file, ge::proto::ModelDef &model_def, uint32_t &modeldef_size); | |||
static Status GetModelInfoFromPbtxt(const char *model_file, ge::proto::ModelDef &model_def); | |||
}; | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_OMG_MODEL_TOOL_H_ |
@@ -1 +1 @@ | |||
Subproject commit 2659f49dcb14c0773e10e17ee9896b7be4d8e7be | |||
Subproject commit dc5ac26aac4c49b4e72cd91d4e6d6a57bbe03af4 |
@@ -145,9 +145,9 @@ struct ResultSummary { | |||
#pragma pack(push, 1) | |||
struct AsyncWait { | |||
uint8_t waitType; // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait | |||
uint32_t waitId; // wait id, GE refresh | |||
uint32_t timeOut; // reserved | |||
uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait | |||
uint32_t waitId; // wait id, GE refresh | |||
uint32_t timeOut; // reserved | |||
uint64_t reserved; | |||
}; | |||
#pragma pack(pop) | |||
@@ -79,9 +79,6 @@ typedef long LONG; | |||
#define MMPA_THREAD_SCHED_OTHER SCHED_OTHER | |||
#define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN | |||
#define MMPA_PATH_SEPARATOR_STR "/" | |||
#define MMPA_PATH_SEPARATOR_CHAR '/' | |||
#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER | |||
#define MMPA_MAX_NI 19 | |||
@@ -1,86 +1,83 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MMPA_TYPEDEF_WIN_H | |||
#define MMPA_TYPEDEF_WIN_H | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
extern "C" { | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#ifndef FALSE | |||
#define FALSE 0 | |||
#endif | |||
#ifndef TRUE | |||
#define TRUE 1 | |||
#endif | |||
#define EN_OK 0 | |||
#define EN_ERR 1 | |||
#define EN_ERROR (-1) | |||
#define EN_INVALID_PARAM (-2) | |||
#define EN_TIMEOUT (-3) | |||
#define HANDLE_INVALID_VALUE (-1) | |||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||
#define MMPA_ONE_THOUSAND 1000 | |||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
#define SUMMER_TIME_OR_NOT (-1) | |||
#define MMPA_ZERO 0 | |||
#define MMPA_VALUE_ONE 1 | |||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||
#define MMPA_PIPE_BUF_SIZE 1024 | |||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||
#define MAX_IOVEC_SIZE 32 | |||
#define MMPA_PIPE_COUNT 2 | |||
#define MMPA_THREADNAME_SIZE 16 | |||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||
#define MMPA_MAX_NI 19 | |||
#define MMPA_MIDDLE_NI 5 | |||
#define MMPA_LOW_NI (-5) | |||
#define MMPA_MIN_NI (-20) | |||
#define MMPA_MAX_FILE 128 | |||
#define MMPA_PATH_SEPARATOR_STR "\\" | |||
#define MMPA_PATH_SEPARATOR_CHAR '\\' | |||
#define MMPA_MAX_THREAD_PIO 99 | |||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||
#define MMPA_LOW_THREAD_PIO 33 | |||
#define MMPA_MIN_THREAD_PIO 1 | |||
#define MMPA_THREAD_SCHED_RR 0 | |||
#define MMPA_THREAD_SCHED_FIFO 0 | |||
#define MMPA_THREAD_SCHED_OTHER 0 | |||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
#define MM_MUTEX_INITIALIZER NULL | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
} | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#endif // _MMPA_TYPEDEF_WIN_H_ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MMPA_TYPEDEF_WIN_H | |||
#define MMPA_TYPEDEF_WIN_H | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
extern "C" { | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#ifndef FALSE | |||
#define FALSE 0 | |||
#endif | |||
#ifndef TRUE | |||
#define TRUE 1 | |||
#endif | |||
#define EN_OK 0 | |||
#define EN_ERR 1 | |||
#define EN_ERROR (-1) | |||
#define EN_INVALID_PARAM (-2) | |||
#define EN_TIMEOUT (-3) | |||
#define HANDLE_INVALID_VALUE (-1) | |||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||
#define MMPA_ONE_THOUSAND 1000 | |||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||
#define SUMMER_TIME_OR_NOT (-1) | |||
#define MMPA_ZERO 0 | |||
#define MMPA_VALUE_ONE 1 | |||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||
#define MMPA_PIPE_BUF_SIZE 1024 | |||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||
#define MAX_IOVEC_SIZE 32 | |||
#define MMPA_PIPE_COUNT 2 | |||
#define MMPA_THREADNAME_SIZE 16 | |||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||
#define MMPA_MAX_NI 19 | |||
#define MMPA_MIDDLE_NI 5 | |||
#define MMPA_LOW_NI (-5) | |||
#define MMPA_MIN_NI (-20) | |||
#define MMPA_MAX_FILE 128 | |||
#define MMPA_MAX_THREAD_PIO 99 | |||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||
#define MMPA_LOW_THREAD_PIO 33 | |||
#define MMPA_MIN_THREAD_PIO 1 | |||
#define MMPA_THREAD_SCHED_RR 0 | |||
#define MMPA_THREAD_SCHED_FIFO 0 | |||
#define MMPA_THREAD_SCHED_OTHER 0 | |||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||
#define MM_MUTEX_INITIALIZER NULL | |||
#ifdef __cplusplus | |||
#if __cplusplus | |||
} | |||
#endif // __cpluscplus | |||
#endif // __cpluscplus | |||
#endif // _MMPA_TYPEDEF_WIN_H_ |
@@ -143,6 +143,74 @@ REG_OP(BatchNorm) | |||
.OP_END_FACTORY_REG(BatchNorm) | |||
/** | |||
* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device, | |||
* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized, | |||
* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated. | |||
* @par Inputs: | |||
* include: | |||
* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||
* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32. | |||
* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32. | |||
* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32. | |||
* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32. | |||
* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n | |||
* @par Attributes: | |||
* Two Attributes, including: | |||
* @li momentum: A optional float. Defaults to 0.01. \n | |||
* @li epsilon: An optional float. Defaults to 0.00001. \n | |||
* @par Outputs: | |||
* include: | |||
* @li invert_std: A Tensor. It's inverse of total variance. | |||
* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n | |||
* @par Third-party framework compatibility | |||
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||
*/ | |||
REG_OP(SyncBatchNormGatherStatsWithCounts) | |||
.INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(momentum, Float, 0.1) | |||
.ATTR(epsilon, Float, 0.001) | |||
.OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts) | |||
/** | |||
* @brief update running_mean. | |||
* @par Inputs: | |||
* include: | |||
* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||
* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n | |||
* @par Attributes: | |||
* One Attribute, including: | |||
* @li momentum: A optional float. Defaults to 0.01. \n | |||
* @par Outputs: | |||
* include: | |||
* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n | |||
* @par Third-party framework compatibility | |||
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||
*/ | |||
REG_OP(SyncBNTrainingUpdate) | |||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.ATTR(momentum, Float, 0.1) | |||
.OP_END_FACTORY_REG(SyncBNTrainingUpdate) | |||
/** | |||
*@brief part of SyncBatchNormBackward . \n | |||
*@par Inputs: | |||
@@ -516,6 +516,34 @@ REG_OP(ReduceSumD) | |||
.OP_END_FACTORY_REG(ReduceSumD) | |||
/** | |||
*@brief Calculate the total mean based on the mean of each device . \n | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li x: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li count: A Tensor. Must be one of the following types: float16, float32 . | |||
*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n | |||
*@par Attributes: | |||
*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. | |||
*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
*@par Outputs: | |||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Sum. | |||
*/ | |||
REG_OP(ReduceMeanWithCount) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||
.REQUIRED_ATTR(axes, ListInt) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceMeanWithCount) | |||
/** | |||
*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n | |||
*@par Inputs: | |||
@@ -1363,6 +1391,64 @@ REG_OP(ReduceStdV2Update) | |||
.ATTR(unbiased, Bool, true) | |||
.ATTR(keepdim, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceStdV2Update) | |||
/** | |||
*@brief Computes the log and sum and exp of elements across dimensions of a tensor. | |||
* Reduces "x" along the dimensions given in "axes". | |||
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||
* entry in "axes". If "keep_dims" is true, the reduced dimensions | |||
* are retained with length 1. | |||
* | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li x: A Tensor. Must be one of the following types: | |||
* float32, float16, int32, int64, uint32, uint64, double | |||
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||
* | |||
*@par Attributes: | |||
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
* | |||
*@par Outputs: | |||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||
* | |||
*@par Third-party framework compatibility | |||
* Compatible with the Onnx operator ReduceLogSumExp. | |||
*/ | |||
REG_OP(ReduceLogSumExp) | |||
.INPUT(x, TensorType::NumberType()) | |||
.INPUT(axes, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::NumberType()) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceLogSumExp) | |||
/** | |||
*@brief Computes the log and sum of elements across dimensions of a tensor. | |||
* Reduces "x" along the dimensions given in "axes". | |||
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||
* entry in "axes". If "keep_dims" is true, the reduced dimensions | |||
* are retained with length 1. | |||
* | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li x: A Tensor. Must be one of the following types: | |||
* float32, float16, int32, int64, uint32, uint64, double | |||
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||
* | |||
*@par Attributes: | |||
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||
* | |||
*@par Outputs: | |||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||
* | |||
*@par Third-party framework compatibility | |||
* Compatible with the Onnx operator ReduceLogSum. | |||
*/ | |||
REG_OP(ReduceLogSum) | |||
.INPUT(x, TensorType::NumberType()) | |||
.INPUT(axes, TensorType::IndexNumberType()) | |||
.OUTPUT(y, TensorType::NumberType()) | |||
.ATTR(keep_dims, Bool, false) | |||
.OP_END_FACTORY_REG(ReduceLogSum) | |||
} //namespace ge | |||
#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ |
@@ -1,8 +1,17 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||
* Description: handle perf data | |||
* Author: xp | |||
* Create: 2019-10-13 | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef MSPROFILER_PROF_CALLBACK_H_ | |||
@@ -0,0 +1,450 @@ | |||
/* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||
* Description: handle perf data | |||
* Author: Huawei Technologies Co., Ltd. | |||
* Create: 2019-10-13 | |||
*/ | |||
#ifndef MSPROFILER_PROF_COMMON_H_ | |||
#define MSPROFILER_PROF_COMMON_H_ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#include <stdint.h> | |||
#define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a | |||
enum MsprofDataTag { | |||
MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19 | |||
MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39 | |||
MSPROF_GE_DATA_TAG_FUSION = 21, | |||
MSPROF_GE_DATA_TAG_INFER = 22, | |||
MSPROF_GE_DATA_TAG_TASK = 23, | |||
MSPROF_GE_DATA_TAG_TENSOR = 24, | |||
MSPROF_GE_DATA_TAG_STEP = 25, | |||
MSPROF_GE_DATA_TAG_ID_MAP = 26, | |||
MSPROF_GE_DATA_TAG_HOST_SCH = 27, | |||
MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 | |||
MSPROF_RUNTIME_DATA_TAG_TRACK = 41, | |||
MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 | |||
MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 | |||
MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 | |||
MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 | |||
MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t | |||
}; | |||
/** | |||
* @brief struct of mixed data | |||
*/ | |||
#define MSPROF_MIX_DATA_RESERVE_BYTES 7 | |||
#define MSPROF_MIX_DATA_STRING_LEN 120 | |||
enum MsprofMixDataType { | |||
MSPROF_MIX_DATA_HASH_ID = 0, | |||
MSPROF_MIX_DATA_STRING, | |||
}; | |||
struct MsprofMixData { | |||
uint8_t type; // MsprofMixDataType | |||
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||
union { | |||
uint64_t hashId; | |||
char dataStr[MSPROF_MIX_DATA_STRING_LEN]; | |||
} data; | |||
}; | |||
using MixData = struct MsprofMixData; | |||
/** | |||
* @brief profiling command info | |||
*/ | |||
#define MSPROF_MAX_DEV_NUM 64 | |||
struct MsprofCommandHandle { | |||
uint64_t profSwitch; | |||
uint64_t profSwitchHi; | |||
uint32_t devNums; | |||
uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | |||
uint32_t modelId; | |||
uint32_t type; | |||
}; | |||
/** | |||
* @brief struct of data reported by acl | |||
*/ | |||
#define MSPROF_ACL_DATA_RESERVE_BYTES 32 | |||
#define MSPROF_ACL_API_NAME_LEN 64 | |||
enum MsprofAclApiType { | |||
MSPROF_ACL_API_TYPE_OP = 1, | |||
MSPROF_ACL_API_TYPE_MODEL, | |||
MSPROF_ACL_API_TYPE_RUNTIME, | |||
MSPROF_ACL_API_TYPE_OTHERS, | |||
}; | |||
struct MsprofAclProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_ACL_DATA_TAG; | |||
uint32_t apiType; // enum MsprofAclApiType | |||
uint64_t beginTime; | |||
uint64_t endTime; | |||
uint32_t processId; | |||
uint32_t threadId; | |||
char apiName[MSPROF_ACL_API_NAME_LEN]; | |||
uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by GE | |||
*/ | |||
#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104 | |||
struct MsprofGeProfModelLoadData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD; | |||
uint32_t modelId; | |||
MixData modelName; | |||
uint64_t startTime; | |||
uint64_t endTime; | |||
uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8 | |||
#define MSPROF_GE_FUSION_OP_NUM 8 | |||
struct MsprofGeProfFusionData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION; | |||
uint32_t modelId; | |||
MixData fusionName; | |||
uint64_t inputMemSize; | |||
uint64_t outputMemSize; | |||
uint64_t weightMemSize; | |||
uint64_t workspaceMemSize; | |||
uint64_t totalMemSize; | |||
uint64_t fusionOpNum; | |||
uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM]; | |||
uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64 | |||
struct MsprofGeProfInferData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER; | |||
uint32_t modelId; | |||
MixData modelName; | |||
uint32_t requestId; | |||
uint32_t threadId; | |||
uint64_t inputDataStartTime; | |||
uint64_t inputDataEndTime; | |||
uint64_t inferStartTime; | |||
uint64_t inferEndTime; | |||
uint64_t outputDataStartTime; | |||
uint64_t outputDataEndTime; | |||
uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16 | |||
#define MSPROF_GE_OP_TYPE_LEN 56 | |||
enum MsprofGeTaskType { | |||
MSPROF_GE_TASK_TYPE_AI_CORE = 0, | |||
MSPROF_GE_TASK_TYPE_AI_CPU, | |||
MSPROF_GE_TASK_TYPE_AIV, | |||
}; | |||
enum MsprofGeShapeType { | |||
MSPROF_GE_SHAPE_TYPE_STATIC = 0, | |||
MSPROF_GE_SHAPE_TYPE_DYNAMIC, | |||
}; | |||
struct MsprofGeOpType { | |||
uint8_t type; // MsprofMixDataType | |||
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||
union { | |||
uint64_t hashId; | |||
char dataStr[MSPROF_GE_OP_TYPE_LEN]; | |||
} data; | |||
}; | |||
struct MsprofGeProfTaskData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK; | |||
uint32_t taskType; // MsprofGeTaskType | |||
MixData opName; | |||
MsprofGeOpType opType; | |||
uint64_t curIterNum; | |||
uint64_t timeStamp; | |||
uint32_t shapeType; // MsprofGeShapeType | |||
uint32_t blockDims; | |||
uint32_t modelId; | |||
uint32_t streamId; | |||
uint32_t taskId; | |||
uint32_t threadId; | |||
uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8 | |||
#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8 | |||
#define MSPROF_GE_TENSOR_DATA_NUM 5 | |||
enum MsprofGeTensorType { | |||
MSPROF_GE_TENSOR_TYPE_INPUT = 0, | |||
MSPROF_GE_TENSOR_TYPE_OUTPUT, | |||
}; | |||
struct MsprofGeTensorData { | |||
uint32_t tensorType; // MsprofGeTensorType | |||
uint32_t format; | |||
uint32_t dataType; | |||
uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN]; | |||
}; | |||
struct MsprofGeProfTensorData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR; | |||
uint32_t modelId; | |||
uint64_t curIterNum; | |||
uint32_t streamId; | |||
uint32_t taskId; | |||
uint32_t tensorNum; | |||
MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM]; | |||
uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27 | |||
enum MsprofGeStepTag { | |||
MSPROF_GE_STEP_TAG_BEGIN = 0, | |||
MSPROF_GE_STEP_TAG_END, | |||
}; | |||
struct MsprofGeProfStepData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP; | |||
uint32_t modelId; | |||
uint32_t streamId; | |||
uint32_t taskId; | |||
uint64_t timeStamp; | |||
uint64_t curIterNum; | |||
uint32_t threadId; | |||
uint8_t tag; // MsprofGeStepTag | |||
uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6 | |||
struct MsprofGeProfIdMapData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP; | |||
uint32_t graphId; | |||
uint32_t modelId; | |||
uint32_t sessionId; | |||
uint64_t timeStamp; | |||
uint16_t mode; | |||
uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24 | |||
struct MsprofGeProfHostSchData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH; | |||
uint32_t threadId; // record in start event | |||
uint64_t element; | |||
uint64_t event; | |||
uint64_t startTime; // record in start event | |||
uint64_t endTime; // record in end event | |||
uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by RunTime | |||
*/ | |||
#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106 | |||
#define MSPROF_RUNTIME_TASK_ID_NUM 10 | |||
#define MSPROF_RUNTIME_API_NAME_LEN 64 | |||
struct MsprofRuntimeProfApiData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API; | |||
uint32_t threadId; | |||
uint64_t entryTime; | |||
uint64_t exitTime; | |||
uint64_t dataSize; | |||
uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN]; | |||
uint32_t retCode; | |||
uint32_t streamId; | |||
uint32_t taskNum; | |||
uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM]; | |||
uint16_t memcpyDirection; | |||
uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES]; | |||
}; | |||
#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10 | |||
#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32 | |||
struct MsprofRuntimeProfTrackData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK; | |||
uint32_t threadId; | |||
uint64_t timeStamp; | |||
char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN]; | |||
uint32_t taskId; | |||
uint16_t streamId; | |||
uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by RunTime | |||
*/ | |||
#define MSPROF_AICPU_DATA_RESERVE_BYTES 9 | |||
struct MsprofAicpuProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_AICPU_DATA_TAG; | |||
uint16_t streamId; | |||
uint16_t taskId; | |||
uint64_t runStartTime; | |||
uint64_t runStartTick; | |||
uint64_t computeStartTime; | |||
uint64_t memcpyStartTime; | |||
uint64_t memcpyEndTime; | |||
uint64_t runEndTime; | |||
uint64_t runEndTick; | |||
uint32_t threadId; | |||
uint32_t deviceId; | |||
uint64_t submitTick; | |||
uint64_t scheduleTick; | |||
uint64_t tickBeforeRun; | |||
uint64_t tickAfterRun; | |||
uint32_t kernelType; | |||
uint32_t dispatchTime; | |||
uint32_t totalTime; | |||
uint16_t fftsThreadId; | |||
uint8_t version; | |||
uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by DP | |||
*/ | |||
#define MSPROF_DP_DATA_RESERVE_BYTES 16 | |||
#define MSPROF_DP_DATA_ACTION_LEN 16 | |||
#define MSPROF_DP_DATA_SOURCE_LEN 64 | |||
struct MsprofDpProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_DP_DATA_TAG; | |||
uint32_t rsv; // Ensure 8-byte alignment | |||
uint64_t timeStamp; | |||
char action[MSPROF_DP_DATA_ACTION_LEN]; | |||
char source[MSPROF_DP_DATA_SOURCE_LEN]; | |||
uint64_t index; | |||
uint64_t size; | |||
uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES]; | |||
}; | |||
/** | |||
* @brief struct of data reported by HCCL | |||
*/ | |||
#pragma pack(4) | |||
struct MsprofHcclProfNotify { | |||
uint32_t taskID; | |||
uint64_t notifyID; | |||
uint32_t stage; | |||
uint32_t remoteRank; | |||
uint32_t transportType; | |||
uint32_t role; // role {0: dst, 1:src} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfReduce { | |||
uint32_t taskID; | |||
uint64_t src; | |||
uint64_t dst; | |||
uint64_t size; | |||
uint32_t op; // {0: sum, 1: mul, 2: max, 3: min} | |||
uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64} | |||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
uint32_t remoteRank; | |||
uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL} | |||
uint32_t role; // role {0: dst, 1:src} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfRDMA { | |||
uint32_t taskID; | |||
uint64_t src; | |||
uint64_t dst; | |||
uint64_t size; | |||
uint64_t notifyID; | |||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
uint32_t remoteRank; | |||
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||
uint32_t role; // role {0: dst, 1:src} | |||
uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfMemcpy { | |||
uint32_t taskID; | |||
uint64_t src; | |||
uint64_t dst; | |||
uint64_t size; | |||
uint64_t notifyID; | |||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||
uint32_t remoteRank; | |||
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||
uint32_t role; // role {0: dst, 1:src} | |||
double durationEstimated; | |||
}; | |||
struct MsprofHcclProfStageStep { | |||
uint32_t rank; | |||
uint32_t rankSize; | |||
}; | |||
struct MsprofHcclProfFlag { | |||
uint64_t cclTag; | |||
uint64_t groupName; | |||
uint32_t localRank; | |||
uint32_t workFlowMode; | |||
}; | |||
/** | |||
* @name MsprofHcclProfData | |||
* @brief struct of data reported by hccl | |||
*/ | |||
struct MsprofHcclProfData { | |||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||
uint16_t dataTag = MSPROF_HCCL_DATA_TAG; | |||
uint32_t planeID; | |||
uint32_t deviceID; | |||
uint32_t streamID; | |||
double ts; | |||
char name[16]; | |||
union { | |||
MsprofHcclProfNotify notify; | |||
MsprofHcclProfReduce reduce; | |||
MsprofHcclProfStageStep stageStep; | |||
MsprofHcclProfMemcpy forMemcpy; | |||
MsprofHcclProfRDMA RDMA; | |||
MsprofHcclProfFlag flag; | |||
} args; | |||
}; | |||
#pragma pack() | |||
/** | |||
* @name MsprofStampInfo | |||
* @brief struct of data reported by msproftx | |||
*/ | |||
struct MsprofStampInfo { | |||
uint16_t magicNumber; | |||
uint16_t dataTag; | |||
uint32_t processId; | |||
uint32_t threadId; | |||
uint32_t category; //marker category | |||
uint32_t eventType; | |||
int32_t payloadType; | |||
union PayloadValue //payload info for marker | |||
{ | |||
uint64_t ullValue; | |||
int64_t llValue; | |||
double dValue; | |||
uint32_t uiValue[2]; | |||
int32_t iValue[2]; | |||
float fValue[2]; | |||
} payload; | |||
uint64_t startTime; | |||
uint64_t endTime; | |||
int32_t messageType; | |||
char message[128]; | |||
uint8_t reserve0[4]; | |||
uint8_t reserve1[72]; | |||
}; | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // MSPROFILER_PROF_COMMON_H_ |