Merge pull request !2102 from yanghaoran/r1.6pull/2103/MERGE
@@ -22,6 +22,8 @@ | |||||
#include <mutex> | #include <mutex> | ||||
#include <unordered_map> | #include <unordered_map> | ||||
#include "graph/profiler.h" | #include "graph/profiler.h" | ||||
#include "external/ge/ge_api_types.h" | |||||
#include "toolchain/prof_callback.h" | |||||
namespace ge { | namespace ge { | ||||
namespace profiling { | namespace profiling { | ||||
enum { | enum { | ||||
@@ -46,6 +48,7 @@ enum { | |||||
kCopyH2D, | kCopyH2D, | ||||
kProfilingIndexEnd | kProfilingIndexEnd | ||||
}; | }; | ||||
constexpr uint64_t kInvalidHashId = 0ULL; | |||||
class ProfilingContext { | class ProfilingContext { | ||||
public: | public: | ||||
@@ -100,9 +103,16 @@ class ProfilingContext { | |||||
} | } | ||||
int64_t RegisterString(const std::string &str); | int64_t RegisterString(const std::string &str); | ||||
int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str); | |||||
void UpdateElementHashId(MsprofReporterCallback reporter_callback); | |||||
static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str, | |||||
uint64_t &hash_id); | |||||
size_t GetRegisterStringNum() const { | |||||
return strings_to_index_.size(); | |||||
} | |||||
private: | private: | ||||
void RegisterString(int64_t index, const std::string &str); | |||||
void UpdateHashByStr(const std::string &str, const uint64_t hash); | |||||
void Init(); | void Init(); | ||||
private: | private: | ||||
@@ -0,0 +1,35 @@ | |||||
/** | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_MODEL_TOOL_H_ | |||||
#define INC_FRAMEWORK_OMG_MODEL_TOOL_H_ | |||||
#include <memory> | |||||
#include <string> | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "proto/ge_ir.pb.h" | |||||
namespace ge { | |||||
class GE_FUNC_VISIBILITY ModelTool { | |||||
public: | |||||
static Status GetModelInfoFromOm(const char *model_file, ge::proto::ModelDef &model_def, uint32_t &modeldef_size); | |||||
static Status GetModelInfoFromPbtxt(const char *model_file, ge::proto::ModelDef &model_def); | |||||
}; | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_OMG_MODEL_TOOL_H_ |
@@ -1 +1 @@ | |||||
Subproject commit 2659f49dcb14c0773e10e17ee9896b7be4d8e7be | |||||
Subproject commit dc5ac26aac4c49b4e72cd91d4e6d6a57bbe03af4 |
@@ -145,9 +145,9 @@ struct ResultSummary { | |||||
#pragma pack(push, 1) | #pragma pack(push, 1) | ||||
struct AsyncWait { | struct AsyncWait { | ||||
uint8_t waitType; // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait | |||||
uint32_t waitId; // wait id, GE refresh | |||||
uint32_t timeOut; // reserved | |||||
uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait | |||||
uint32_t waitId; // wait id, GE refresh | |||||
uint32_t timeOut; // reserved | |||||
uint64_t reserved; | uint64_t reserved; | ||||
}; | }; | ||||
#pragma pack(pop) | #pragma pack(pop) | ||||
@@ -79,9 +79,6 @@ typedef long LONG; | |||||
#define MMPA_THREAD_SCHED_OTHER SCHED_OTHER | #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER | ||||
#define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN | #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN | ||||
#define MMPA_PATH_SEPARATOR_STR "/" | |||||
#define MMPA_PATH_SEPARATOR_CHAR '/' | |||||
#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER | #define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER | ||||
#define MMPA_MAX_NI 19 | #define MMPA_MAX_NI 19 | ||||
@@ -1,86 +1,83 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef MMPA_TYPEDEF_WIN_H | |||||
#define MMPA_TYPEDEF_WIN_H | |||||
#ifdef __cplusplus | |||||
#if __cplusplus | |||||
extern "C" { | |||||
#endif // __cpluscplus | |||||
#endif // __cpluscplus | |||||
#ifndef FALSE | |||||
#define FALSE 0 | |||||
#endif | |||||
#ifndef TRUE | |||||
#define TRUE 1 | |||||
#endif | |||||
#define EN_OK 0 | |||||
#define EN_ERR 1 | |||||
#define EN_ERROR (-1) | |||||
#define EN_INVALID_PARAM (-2) | |||||
#define EN_TIMEOUT (-3) | |||||
#define HANDLE_INVALID_VALUE (-1) | |||||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||||
#define MMPA_ONE_THOUSAND 1000 | |||||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||||
#define SUMMER_TIME_OR_NOT (-1) | |||||
#define MMPA_ZERO 0 | |||||
#define MMPA_VALUE_ONE 1 | |||||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||||
#define MMPA_PIPE_BUF_SIZE 1024 | |||||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||||
#define MAX_IOVEC_SIZE 32 | |||||
#define MMPA_PIPE_COUNT 2 | |||||
#define MMPA_THREADNAME_SIZE 16 | |||||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||||
#define MMPA_MAX_NI 19 | |||||
#define MMPA_MIDDLE_NI 5 | |||||
#define MMPA_LOW_NI (-5) | |||||
#define MMPA_MIN_NI (-20) | |||||
#define MMPA_MAX_FILE 128 | |||||
#define MMPA_PATH_SEPARATOR_STR "\\" | |||||
#define MMPA_PATH_SEPARATOR_CHAR '\\' | |||||
#define MMPA_MAX_THREAD_PIO 99 | |||||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||||
#define MMPA_LOW_THREAD_PIO 33 | |||||
#define MMPA_MIN_THREAD_PIO 1 | |||||
#define MMPA_THREAD_SCHED_RR 0 | |||||
#define MMPA_THREAD_SCHED_FIFO 0 | |||||
#define MMPA_THREAD_SCHED_OTHER 0 | |||||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||||
#define MM_MUTEX_INITIALIZER NULL | |||||
#ifdef __cplusplus | |||||
#if __cplusplus | |||||
} | |||||
#endif // __cpluscplus | |||||
#endif // __cpluscplus | |||||
#endif // _MMPA_TYPEDEF_WIN_H_ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef MMPA_TYPEDEF_WIN_H | |||||
#define MMPA_TYPEDEF_WIN_H | |||||
#ifdef __cplusplus | |||||
#if __cplusplus | |||||
extern "C" { | |||||
#endif // __cpluscplus | |||||
#endif // __cpluscplus | |||||
#ifndef FALSE | |||||
#define FALSE 0 | |||||
#endif | |||||
#ifndef TRUE | |||||
#define TRUE 1 | |||||
#endif | |||||
#define EN_OK 0 | |||||
#define EN_ERR 1 | |||||
#define EN_ERROR (-1) | |||||
#define EN_INVALID_PARAM (-2) | |||||
#define EN_TIMEOUT (-3) | |||||
#define HANDLE_INVALID_VALUE (-1) | |||||
#define INVALID_SOCKET_HANDLE INVALID_SOCKET | |||||
#define MMPA_MEM_MAX_LEN (0x7fffffff) | |||||
#define MMPA_PROCESS_ERROR (0x7fffffff) | |||||
#define MMPA_ONE_THOUSAND 1000 | |||||
#define MMPA_COMPUTER_BEGIN_YEAR 1900 | |||||
#define SUMMER_TIME_OR_NOT (-1) | |||||
#define MMPA_ZERO 0 | |||||
#define MMPA_VALUE_ONE 1 | |||||
#define MMPA_SOCKET_MAIN_EDITION 2 | |||||
#define MMPA_SOCKET_SECOND_EDITION 0 | |||||
#define MMPA_PIPE_BUF_SIZE 1024 | |||||
#define MMPA_MAX_SCANDIR_COUNT 1024 | |||||
#define MAX_IOVEC_SIZE 32 | |||||
#define MMPA_PIPE_COUNT 2 | |||||
#define MMPA_THREADNAME_SIZE 16 | |||||
#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1) | |||||
#define MMPA_MIN_OS_VERSION_SIZE 64 | |||||
#define MMPA_MAX_NI 19 | |||||
#define MMPA_MIDDLE_NI 5 | |||||
#define MMPA_LOW_NI (-5) | |||||
#define MMPA_MIN_NI (-20) | |||||
#define MMPA_MAX_FILE 128 | |||||
#define MMPA_MAX_THREAD_PIO 99 | |||||
#define MMPA_MIDDLE_THREAD_PIO 66 | |||||
#define MMPA_LOW_THREAD_PIO 33 | |||||
#define MMPA_MIN_THREAD_PIO 1 | |||||
#define MMPA_THREAD_SCHED_RR 0 | |||||
#define MMPA_THREAD_SCHED_FIFO 0 | |||||
#define MMPA_THREAD_SCHED_OTHER 0 | |||||
#define MMPA_THREAD_MIN_STACK_SIZE 0 | |||||
#define MM_MUTEX_INITIALIZER NULL | |||||
#ifdef __cplusplus | |||||
#if __cplusplus | |||||
} | |||||
#endif // __cpluscplus | |||||
#endif // __cpluscplus | |||||
#endif // _MMPA_TYPEDEF_WIN_H_ |
@@ -143,6 +143,74 @@ REG_OP(BatchNorm) | |||||
.OP_END_FACTORY_REG(BatchNorm) | .OP_END_FACTORY_REG(BatchNorm) | ||||
/** | /** | ||||
* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device, | |||||
* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized, | |||||
* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated. | |||||
* @par Inputs: | |||||
* include: | |||||
* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||||
* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32. | |||||
* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32. | |||||
* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32. | |||||
* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32. | |||||
* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n | |||||
* @par Attributes: | |||||
* Two Attributes, including: | |||||
* @li momentum: A optional float. Defaults to 0.01. \n | |||||
* @li epsilon: An optional float. Defaults to 0.00001. \n | |||||
* @par Outputs: | |||||
* include: | |||||
* @li invert_std: A Tensor. It's inverse of total variance. | |||||
* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n | |||||
* @par Third-party framework compatibility | |||||
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||||
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||||
*/ | |||||
REG_OP(SyncBatchNormGatherStatsWithCounts) | |||||
.INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(momentum, Float, 0.1) | |||||
.ATTR(epsilon, Float, 0.001) | |||||
.OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts) | |||||
/** | |||||
* @brief update running_mean. | |||||
* @par Inputs: | |||||
* include: | |||||
* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32. | |||||
* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n | |||||
* @par Attributes: | |||||
* One Attribute, including: | |||||
* @li momentum: A optional float. Defaults to 0.01. \n | |||||
* @par Outputs: | |||||
* include: | |||||
* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n | |||||
* @par Third-party framework compatibility | |||||
* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate | |||||
* compatible with the Pytorch operator BatchNormGatherStatsWithCounts. | |||||
*/ | |||||
REG_OP(SyncBNTrainingUpdate) | |||||
.INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.ATTR(momentum, Float, 0.1) | |||||
.OP_END_FACTORY_REG(SyncBNTrainingUpdate) | |||||
/** | |||||
*@brief part of SyncBatchNormBackward . \n | *@brief part of SyncBatchNormBackward . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -516,6 +516,34 @@ REG_OP(ReduceSumD) | |||||
.OP_END_FACTORY_REG(ReduceSumD) | .OP_END_FACTORY_REG(ReduceSumD) | ||||
/** | /** | ||||
*@brief Calculate the total mean based on the mean of each device . \n | |||||
*@par Inputs: | |||||
* Three inputs, including: | |||||
*@li x: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li count: A Tensor. Must be one of the following types: float16, float32 . | |||||
*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n | |||||
*@par Attributes: | |||||
*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. | |||||
*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||||
*@par Outputs: | |||||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the TensorFlow operator Sum. | |||||
*/ | |||||
REG_OP(ReduceMeanWithCount) | |||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) | |||||
.REQUIRED_ATTR(axes, ListInt) | |||||
.ATTR(keep_dims, Bool, false) | |||||
.OP_END_FACTORY_REG(ReduceMeanWithCount) | |||||
/** | |||||
*@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n | *@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -1363,6 +1391,64 @@ REG_OP(ReduceStdV2Update) | |||||
.ATTR(unbiased, Bool, true) | .ATTR(unbiased, Bool, true) | ||||
.ATTR(keepdim, Bool, false) | .ATTR(keepdim, Bool, false) | ||||
.OP_END_FACTORY_REG(ReduceStdV2Update) | .OP_END_FACTORY_REG(ReduceStdV2Update) | ||||
/** | |||||
*@brief Computes the log and sum and exp of elements across dimensions of a tensor. | |||||
* Reduces "x" along the dimensions given in "axes". | |||||
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||||
* entry in "axes". If "keep_dims" is true, the reduced dimensions | |||||
* are retained with length 1. | |||||
* | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li x: A Tensor. Must be one of the following types: | |||||
* float32, float16, int32, int64, uint32, uint64, double | |||||
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||||
* | |||||
*@par Attributes: | |||||
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||||
* | |||||
*@par Outputs: | |||||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||||
* | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the Onnx operator ReduceLogSumExp. | |||||
*/ | |||||
REG_OP(ReduceLogSumExp) | |||||
.INPUT(x, TensorType::NumberType()) | |||||
.INPUT(axes, TensorType::IndexNumberType()) | |||||
.OUTPUT(y, TensorType::NumberType()) | |||||
.ATTR(keep_dims, Bool, false) | |||||
.OP_END_FACTORY_REG(ReduceLogSumExp) | |||||
/** | |||||
*@brief Computes the log and sum of elements across dimensions of a tensor. | |||||
* Reduces "x" along the dimensions given in "axes". | |||||
* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each | |||||
* entry in "axes". If "keep_dims" is true, the reduced dimensions | |||||
* are retained with length 1. | |||||
* | |||||
*@par Inputs: | |||||
* Two inputs, including: | |||||
*@li x: A Tensor. Must be one of the following types: | |||||
* float32, float16, int32, int64, uint32, uint64, double | |||||
*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n | |||||
* | |||||
*@par Attributes: | |||||
*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n | |||||
* | |||||
*@par Outputs: | |||||
*y: The reduced tensor. Has the same type and format as input "x" . \n | |||||
* | |||||
*@par Third-party framework compatibility | |||||
* Compatible with the Onnx operator ReduceLogSum. | |||||
*/ | |||||
REG_OP(ReduceLogSum) | |||||
.INPUT(x, TensorType::NumberType()) | |||||
.INPUT(axes, TensorType::IndexNumberType()) | |||||
.OUTPUT(y, TensorType::NumberType()) | |||||
.ATTR(keep_dims, Bool, false) | |||||
.OP_END_FACTORY_REG(ReduceLogSum) | |||||
} //namespace ge | } //namespace ge | ||||
#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ | #endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ |
@@ -1,8 +1,17 @@ | |||||
/* | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||||
* Description: handle perf data | |||||
* Author: xp | |||||
* Create: 2019-10-13 | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | */ | ||||
#ifndef MSPROFILER_PROF_CALLBACK_H_ | #ifndef MSPROFILER_PROF_CALLBACK_H_ | ||||
@@ -0,0 +1,450 @@ | |||||
/* | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved. | |||||
* Description: handle perf data | |||||
* Author: Huawei Technologies Co., Ltd. | |||||
* Create: 2019-10-13 | |||||
*/ | |||||
#ifndef MSPROFILER_PROF_COMMON_H_ | |||||
#define MSPROFILER_PROF_COMMON_H_ | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif // __cplusplus | |||||
#include <stdint.h> | |||||
#define MSPROF_DATA_HEAD_MAGIC_NUM 0x5a5a | |||||
enum MsprofDataTag { | |||||
MSPROF_ACL_DATA_TAG = 0, //acl data tag, range: 0~19 | |||||
MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39 | |||||
MSPROF_GE_DATA_TAG_FUSION = 21, | |||||
MSPROF_GE_DATA_TAG_INFER = 22, | |||||
MSPROF_GE_DATA_TAG_TASK = 23, | |||||
MSPROF_GE_DATA_TAG_TENSOR = 24, | |||||
MSPROF_GE_DATA_TAG_STEP = 25, | |||||
MSPROF_GE_DATA_TAG_ID_MAP = 26, | |||||
MSPROF_GE_DATA_TAG_HOST_SCH = 27, | |||||
MSPROF_RUNTIME_DATA_TAG_API = 40, //runtime data tag, range: 40~59 | |||||
MSPROF_RUNTIME_DATA_TAG_TRACK = 41, | |||||
MSPROF_AICPU_DATA_TAG = 60, //aicpu data tag, range: 60~79 | |||||
MSPROF_HCCL_DATA_TAG = 80, //hccl data tag, range: 80~99 | |||||
MSPROF_DP_DATA_TAG = 100, //dp data tag, range: 100~119 | |||||
MSPROF_MSPROFTX_DATA_TAG = 120, //hccl data tag, range: 120~139 | |||||
MSPROF_DATA_TAG_MAX = 65536, //data tag value type is uint16_t | |||||
}; | |||||
/** | |||||
* @brief struct of mixed data | |||||
*/ | |||||
#define MSPROF_MIX_DATA_RESERVE_BYTES 7 | |||||
#define MSPROF_MIX_DATA_STRING_LEN 120 | |||||
enum MsprofMixDataType { | |||||
MSPROF_MIX_DATA_HASH_ID = 0, | |||||
MSPROF_MIX_DATA_STRING, | |||||
}; | |||||
struct MsprofMixData { | |||||
uint8_t type; // MsprofMixDataType | |||||
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||||
union { | |||||
uint64_t hashId; | |||||
char dataStr[MSPROF_MIX_DATA_STRING_LEN]; | |||||
} data; | |||||
}; | |||||
using MixData = struct MsprofMixData; | |||||
/** | |||||
* @brief profiling command info | |||||
*/ | |||||
#define MSPROF_MAX_DEV_NUM 64 | |||||
struct MsprofCommandHandle { | |||||
uint64_t profSwitch; | |||||
uint64_t profSwitchHi; | |||||
uint32_t devNums; | |||||
uint32_t devIdList[MSPROF_MAX_DEV_NUM]; | |||||
uint32_t modelId; | |||||
uint32_t type; | |||||
}; | |||||
/** | |||||
* @brief struct of data reported by acl | |||||
*/ | |||||
#define MSPROF_ACL_DATA_RESERVE_BYTES 32 | |||||
#define MSPROF_ACL_API_NAME_LEN 64 | |||||
enum MsprofAclApiType { | |||||
MSPROF_ACL_API_TYPE_OP = 1, | |||||
MSPROF_ACL_API_TYPE_MODEL, | |||||
MSPROF_ACL_API_TYPE_RUNTIME, | |||||
MSPROF_ACL_API_TYPE_OTHERS, | |||||
}; | |||||
struct MsprofAclProfData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_ACL_DATA_TAG; | |||||
uint32_t apiType; // enum MsprofAclApiType | |||||
uint64_t beginTime; | |||||
uint64_t endTime; | |||||
uint32_t processId; | |||||
uint32_t threadId; | |||||
char apiName[MSPROF_ACL_API_NAME_LEN]; | |||||
uint8_t reserve[MSPROF_ACL_DATA_RESERVE_BYTES]; | |||||
}; | |||||
/** | |||||
* @brief struct of data reported by GE | |||||
*/ | |||||
#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104 | |||||
struct MsprofGeProfModelLoadData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD; | |||||
uint32_t modelId; | |||||
MixData modelName; | |||||
uint64_t startTime; | |||||
uint64_t endTime; | |||||
uint8_t reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8 | |||||
#define MSPROF_GE_FUSION_OP_NUM 8 | |||||
struct MsprofGeProfFusionData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION; | |||||
uint32_t modelId; | |||||
MixData fusionName; | |||||
uint64_t inputMemSize; | |||||
uint64_t outputMemSize; | |||||
uint64_t weightMemSize; | |||||
uint64_t workspaceMemSize; | |||||
uint64_t totalMemSize; | |||||
uint64_t fusionOpNum; | |||||
uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM]; | |||||
uint8_t reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64 | |||||
struct MsprofGeProfInferData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER; | |||||
uint32_t modelId; | |||||
MixData modelName; | |||||
uint32_t requestId; | |||||
uint32_t threadId; | |||||
uint64_t inputDataStartTime; | |||||
uint64_t inputDataEndTime; | |||||
uint64_t inferStartTime; | |||||
uint64_t inferEndTime; | |||||
uint64_t outputDataStartTime; | |||||
uint64_t outputDataEndTime; | |||||
uint8_t reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16 | |||||
#define MSPROF_GE_OP_TYPE_LEN 56 | |||||
enum MsprofGeTaskType { | |||||
MSPROF_GE_TASK_TYPE_AI_CORE = 0, | |||||
MSPROF_GE_TASK_TYPE_AI_CPU, | |||||
MSPROF_GE_TASK_TYPE_AIV, | |||||
}; | |||||
enum MsprofGeShapeType { | |||||
MSPROF_GE_SHAPE_TYPE_STATIC = 0, | |||||
MSPROF_GE_SHAPE_TYPE_DYNAMIC, | |||||
}; | |||||
struct MsprofGeOpType { | |||||
uint8_t type; // MsprofMixDataType | |||||
uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES]; | |||||
union { | |||||
uint64_t hashId; | |||||
char dataStr[MSPROF_GE_OP_TYPE_LEN]; | |||||
} data; | |||||
}; | |||||
struct MsprofGeProfTaskData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK; | |||||
uint32_t taskType; // MsprofGeTaskType | |||||
MixData opName; | |||||
MsprofGeOpType opType; | |||||
uint64_t curIterNum; | |||||
uint64_t timeStamp; | |||||
uint32_t shapeType; // MsprofGeShapeType | |||||
uint32_t blockDims; | |||||
uint32_t modelId; | |||||
uint32_t streamId; | |||||
uint32_t taskId; | |||||
uint32_t threadId; | |||||
uint8_t reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8 | |||||
#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8 | |||||
#define MSPROF_GE_TENSOR_DATA_NUM 5 | |||||
enum MsprofGeTensorType { | |||||
MSPROF_GE_TENSOR_TYPE_INPUT = 0, | |||||
MSPROF_GE_TENSOR_TYPE_OUTPUT, | |||||
}; | |||||
struct MsprofGeTensorData { | |||||
uint32_t tensorType; // MsprofGeTensorType | |||||
uint32_t format; | |||||
uint32_t dataType; | |||||
uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN]; | |||||
}; | |||||
struct MsprofGeProfTensorData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR; | |||||
uint32_t modelId; | |||||
uint64_t curIterNum; | |||||
uint32_t streamId; | |||||
uint32_t taskId; | |||||
uint32_t tensorNum; | |||||
MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM]; | |||||
uint8_t reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27 | |||||
enum MsprofGeStepTag { | |||||
MSPROF_GE_STEP_TAG_BEGIN = 0, | |||||
MSPROF_GE_STEP_TAG_END, | |||||
}; | |||||
struct MsprofGeProfStepData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP; | |||||
uint32_t modelId; | |||||
uint32_t streamId; | |||||
uint32_t taskId; | |||||
uint64_t timeStamp; | |||||
uint64_t curIterNum; | |||||
uint32_t threadId; | |||||
uint8_t tag; // MsprofGeStepTag | |||||
uint8_t reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6 | |||||
struct MsprofGeProfIdMapData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP; | |||||
uint32_t graphId; | |||||
uint32_t modelId; | |||||
uint32_t sessionId; | |||||
uint64_t timeStamp; | |||||
uint16_t mode; | |||||
uint8_t reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24 | |||||
struct MsprofGeProfHostSchData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH; | |||||
uint32_t threadId; // record in start event | |||||
uint64_t element; | |||||
uint64_t event; | |||||
uint64_t startTime; // record in start event | |||||
uint64_t endTime; // record in end event | |||||
uint8_t reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES]; | |||||
}; | |||||
/** | |||||
* @brief struct of data reported by RunTime | |||||
*/ | |||||
#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106 | |||||
#define MSPROF_RUNTIME_TASK_ID_NUM 10 | |||||
#define MSPROF_RUNTIME_API_NAME_LEN 64 | |||||
struct MsprofRuntimeProfApiData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API; | |||||
uint32_t threadId; | |||||
uint64_t entryTime; | |||||
uint64_t exitTime; | |||||
uint64_t dataSize; | |||||
uint8_t apiName[MSPROF_RUNTIME_API_NAME_LEN]; | |||||
uint32_t retCode; | |||||
uint32_t streamId; | |||||
uint32_t taskNum; | |||||
uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM]; | |||||
uint16_t memcpyDirection; | |||||
uint8_t reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES]; | |||||
}; | |||||
#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10 | |||||
#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32 | |||||
struct MsprofRuntimeProfTrackData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK; | |||||
uint32_t threadId; | |||||
uint64_t timeStamp; | |||||
char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN]; | |||||
uint32_t taskId; | |||||
uint16_t streamId; | |||||
uint8_t reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES]; | |||||
}; | |||||
/** | |||||
* @brief struct of data reported by RunTime | |||||
*/ | |||||
#define MSPROF_AICPU_DATA_RESERVE_BYTES 9 | |||||
struct MsprofAicpuProfData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_AICPU_DATA_TAG; | |||||
uint16_t streamId; | |||||
uint16_t taskId; | |||||
uint64_t runStartTime; | |||||
uint64_t runStartTick; | |||||
uint64_t computeStartTime; | |||||
uint64_t memcpyStartTime; | |||||
uint64_t memcpyEndTime; | |||||
uint64_t runEndTime; | |||||
uint64_t runEndTick; | |||||
uint32_t threadId; | |||||
uint32_t deviceId; | |||||
uint64_t submitTick; | |||||
uint64_t scheduleTick; | |||||
uint64_t tickBeforeRun; | |||||
uint64_t tickAfterRun; | |||||
uint32_t kernelType; | |||||
uint32_t dispatchTime; | |||||
uint32_t totalTime; | |||||
uint16_t fftsThreadId; | |||||
uint8_t version; | |||||
uint8_t reserve[MSPROF_AICPU_DATA_RESERVE_BYTES]; | |||||
}; | |||||
/** | |||||
* @brief struct of data reported by DP | |||||
*/ | |||||
#define MSPROF_DP_DATA_RESERVE_BYTES 16 | |||||
#define MSPROF_DP_DATA_ACTION_LEN 16 | |||||
#define MSPROF_DP_DATA_SOURCE_LEN 64 | |||||
struct MsprofDpProfData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_DP_DATA_TAG; | |||||
uint32_t rsv; // Ensure 8-byte alignment | |||||
uint64_t timeStamp; | |||||
char action[MSPROF_DP_DATA_ACTION_LEN]; | |||||
char source[MSPROF_DP_DATA_SOURCE_LEN]; | |||||
uint64_t index; | |||||
uint64_t size; | |||||
uint8_t reserve[MSPROF_DP_DATA_RESERVE_BYTES]; | |||||
}; | |||||
/** | |||||
* @brief struct of data reported by HCCL | |||||
*/ | |||||
#pragma pack(4) | |||||
struct MsprofHcclProfNotify { | |||||
uint32_t taskID; | |||||
uint64_t notifyID; | |||||
uint32_t stage; | |||||
uint32_t remoteRank; | |||||
uint32_t transportType; | |||||
uint32_t role; // role {0: dst, 1:src} | |||||
double durationEstimated; | |||||
}; | |||||
struct MsprofHcclProfReduce { | |||||
uint32_t taskID; | |||||
uint64_t src; | |||||
uint64_t dst; | |||||
uint64_t size; | |||||
uint32_t op; // {0: sum, 1: mul, 2: max, 3: min} | |||||
uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64} | |||||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||||
uint32_t remoteRank; | |||||
uint32_t transportType; // transport type {0: SDMA, 1: RDMA, 2:LOCAL} | |||||
uint32_t role; // role {0: dst, 1:src} | |||||
double durationEstimated; | |||||
}; | |||||
struct MsprofHcclProfRDMA { | |||||
uint32_t taskID; | |||||
uint64_t src; | |||||
uint64_t dst; | |||||
uint64_t size; | |||||
uint64_t notifyID; | |||||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||||
uint32_t remoteRank; | |||||
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||||
uint32_t role; // role {0: dst, 1:src} | |||||
uint32_t type; // RDMA type {0: RDMASendNotify, 1:RDMASendPayload} | |||||
double durationEstimated; | |||||
}; | |||||
struct MsprofHcclProfMemcpy { | |||||
uint32_t taskID; | |||||
uint64_t src; | |||||
uint64_t dst; | |||||
uint64_t size; | |||||
uint64_t notifyID; | |||||
uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'} | |||||
uint32_t remoteRank; | |||||
uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL} | |||||
uint32_t role; // role {0: dst, 1:src} | |||||
double durationEstimated; | |||||
}; | |||||
struct MsprofHcclProfStageStep { | |||||
uint32_t rank; | |||||
uint32_t rankSize; | |||||
}; | |||||
struct MsprofHcclProfFlag { | |||||
uint64_t cclTag; | |||||
uint64_t groupName; | |||||
uint32_t localRank; | |||||
uint32_t workFlowMode; | |||||
}; | |||||
/** | |||||
* @name MsprofHcclProfData | |||||
* @brief struct of data reported by hccl | |||||
*/ | |||||
struct MsprofHcclProfData { | |||||
uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM; | |||||
uint16_t dataTag = MSPROF_HCCL_DATA_TAG; | |||||
uint32_t planeID; | |||||
uint32_t deviceID; | |||||
uint32_t streamID; | |||||
double ts; | |||||
char name[16]; | |||||
union { | |||||
MsprofHcclProfNotify notify; | |||||
MsprofHcclProfReduce reduce; | |||||
MsprofHcclProfStageStep stageStep; | |||||
MsprofHcclProfMemcpy forMemcpy; | |||||
MsprofHcclProfRDMA RDMA; | |||||
MsprofHcclProfFlag flag; | |||||
} args; | |||||
}; | |||||
#pragma pack() | |||||
/** | |||||
* @name MsprofStampInfo | |||||
* @brief struct of data reported by msproftx | |||||
*/ | |||||
struct MsprofStampInfo { | |||||
uint16_t magicNumber; | |||||
uint16_t dataTag; | |||||
uint32_t processId; | |||||
uint32_t threadId; | |||||
uint32_t category; //marker category | |||||
uint32_t eventType; | |||||
int32_t payloadType; | |||||
union PayloadValue //payload info for marker | |||||
{ | |||||
uint64_t ullValue; | |||||
int64_t llValue; | |||||
double dValue; | |||||
uint32_t uiValue[2]; | |||||
int32_t iValue[2]; | |||||
float fValue[2]; | |||||
} payload; | |||||
uint64_t startTime; | |||||
uint64_t endTime; | |||||
int32_t messageType; | |||||
char message[128]; | |||||
uint8_t reserve0[4]; | |||||
uint8_t reserve1[72]; | |||||
}; | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif // MSPROFILER_PROF_COMMON_H_ |