From 82e6f4774f05078820445232eca776d5f4866649 Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Thu, 23 Dec 2021 19:21:13 +0800
Subject: [PATCH] upgrade Ascend package 23 Dec 21

---
 inc/framework/common/profiling_definitions.h       |  12 +-
 inc/framework/omg/model_tool.h                     |  35 ++
 metadef                                            |   2 +-
 third_party/fwkacllib/inc/cce/fwk_adpt_struct.h    |   6 +-
 .../inc/mmpa/sub_inc/mmpa_typedef_linux.h          |   3 -
 .../fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h  | 169 ++++----
 third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h  |  68 ++++
 third_party/fwkacllib/inc/ops/reduce_ops.h         |  86 ++++
 .../fwkacllib/inc/toolchain/prof_callback.h        |  19 +-
 third_party/fwkacllib/inc/toolchain/prof_common.h  | 450 +++++++++++++++++++++
 10 files changed, 751 insertions(+), 99 deletions(-)
 create mode 100644 inc/framework/omg/model_tool.h
 create mode 100644 third_party/fwkacllib/inc/toolchain/prof_common.h
diff --git a/inc/framework/common/profiling_definitions.h b/inc/framework/common/profiling_definitions.h
index e6da4ca8..81a50f9d 100644
--- a/inc/framework/common/profiling_definitions.h
+++ b/inc/framework/common/profiling_definitions.h
@@ -22,6 +22,8 @@
 #include <mutex>
 #include <unordered_map>
 #include "graph/profiler.h"
+#include "external/ge/ge_api_types.h"
+#include "toolchain/prof_callback.h"
 namespace ge {
 namespace profiling {
 enum {
@@ -46,6 +48,7 @@ enum {
   kCopyH2D,
   kProfilingIndexEnd
 };
+constexpr uint64_t kInvalidHashId = 0ULL;
 
 class ProfilingContext {
  public:
@@ -100,9 +103,16 @@ class ProfilingContext {
   }
 
   int64_t RegisterString(const std::string &str);
+  int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str);
+  void UpdateElementHashId(MsprofReporterCallback reporter_callback);
+  static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str,
+                            uint64_t &hash_id);
+  size_t GetRegisterStringNum() const {
+    return strings_to_index_.size();
+  }
 
  private:
-  void RegisterString(int64_t index, const std::string &str);
+  void UpdateHashByStr(const std::string &str, const uint64_t hash);
   void Init();
 
  private:
diff --git a/inc/framework/omg/model_tool.h b/inc/framework/omg/model_tool.h
new file mode 100644
index 00000000..24554e65
--- /dev/null
+++ b/inc/framework/omg/model_tool.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_FRAMEWORK_OMG_MODEL_TOOL_H_
+#define INC_FRAMEWORK_OMG_MODEL_TOOL_H_
+
+#include <memory>
+#include <string>
+
+#include "framework/common/debug/ge_log.h"
+#include "proto/ge_ir.pb.h"
+
+namespace ge {
+class GE_FUNC_VISIBILITY ModelTool {
+ public:
+  static Status GetModelInfoFromOm(const char *model_file, ge::proto::ModelDef &model_def, uint32_t &modeldef_size);
+
+  static Status GetModelInfoFromPbtxt(const char *model_file, ge::proto::ModelDef &model_def);
+};
+}  // namespace ge
+
+#endif  // INC_FRAMEWORK_OMG_MODEL_TOOL_H_
diff --git a/metadef b/metadef
index 2659f49d..dc5ac26a 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 2659f49dcb14c0773e10e17ee9896b7be4d8e7be
+Subproject commit dc5ac26aac4c49b4e72cd91d4e6d6a57bbe03af4
diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
index ec92a036..5255383f 100644
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -145,9 +145,9 @@ struct ResultSummary {
 
 #pragma pack(push, 1)
 struct AsyncWait {
-  uint8_t waitType;  // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait
-  uint32_t waitId;  // wait id, GE refresh
-  uint32_t timeOut;  // reserved
+  uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
+  uint32_t waitId; // wait id, GE refresh
+  uint32_t timeOut; // reserved
   uint64_t reserved;
 };
 #pragma pack(pop)
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
index 9c6f6499..9df5b9ce 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
@@ -79,9 +79,6 @@ typedef long LONG;
 #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER
 #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN
 
-#define MMPA_PATH_SEPARATOR_STR "/"
-#define MMPA_PATH_SEPARATOR_CHAR '/'
-
 #define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
 
 #define MMPA_MAX_NI 19
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
index 9f8a72cd..1627d7a9 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
@@ -1,86 +1,83 @@
-﻿/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MMPA_TYPEDEF_WIN_H
-#define MMPA_TYPEDEF_WIN_H
-
-#ifdef __cplusplus
-#if __cplusplus
-extern "C" {
-#endif  // __cpluscplus
-#endif  // __cpluscplus
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#define EN_OK 0
-#define EN_ERR 1
-#define EN_ERROR (-1)
-#define EN_INVALID_PARAM (-2)
-#define EN_TIMEOUT (-3)
-
-#define HANDLE_INVALID_VALUE (-1)
-#define INVALID_SOCKET_HANDLE INVALID_SOCKET
-#define MMPA_MEM_MAX_LEN (0x7fffffff)
-#define MMPA_PROCESS_ERROR (0x7fffffff)
-
-#define MMPA_ONE_THOUSAND 1000
-#define MMPA_COMPUTER_BEGIN_YEAR 1900
-#define SUMMER_TIME_OR_NOT (-1)
-#define MMPA_ZERO 0
-#define MMPA_VALUE_ONE 1
-#define MMPA_SOCKET_MAIN_EDITION 2
-#define MMPA_SOCKET_SECOND_EDITION 0
-#define MMPA_PIPE_BUF_SIZE 1024
-#define MMPA_MAX_SCANDIR_COUNT 1024
-#define MAX_IOVEC_SIZE 32
-#define MMPA_PIPE_COUNT 2
-#define MMPA_THREADNAME_SIZE 16
-#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
-#define MMPA_MIN_OS_VERSION_SIZE 64
-
-#define MMPA_MAX_NI 19
-#define MMPA_MIDDLE_NI 5
-#define MMPA_LOW_NI (-5)
-#define MMPA_MIN_NI (-20)
-#define MMPA_MAX_FILE 128
-
-#define MMPA_PATH_SEPARATOR_STR "\\"
-#define MMPA_PATH_SEPARATOR_CHAR '\\'
-
-#define MMPA_MAX_THREAD_PIO 99
-#define MMPA_MIDDLE_THREAD_PIO 66
-#define MMPA_LOW_THREAD_PIO 33
-#define MMPA_MIN_THREAD_PIO 1
-
-#define MMPA_THREAD_SCHED_RR 0
-#define MMPA_THREAD_SCHED_FIFO 0
-#define MMPA_THREAD_SCHED_OTHER 0
-#define MMPA_THREAD_MIN_STACK_SIZE 0
-
-#define MM_MUTEX_INITIALIZER NULL
-
-#ifdef __cplusplus
-#if __cplusplus
-}
-#endif  // __cpluscplus
-#endif  // __cpluscplus
-#endif  // _MMPA_TYPEDEF_WIN_H_
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MMPA_TYPEDEF_WIN_H
+#define MMPA_TYPEDEF_WIN_H
+
+#ifdef __cplusplus
+#if __cplusplus
+extern "C" {
+#endif  // __cpluscplus
+#endif  // __cpluscplus
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#define EN_OK 0
+#define EN_ERR 1
+#define EN_ERROR (-1)
+#define EN_INVALID_PARAM (-2)
+#define EN_TIMEOUT (-3)
+
+#define HANDLE_INVALID_VALUE (-1)
+#define INVALID_SOCKET_HANDLE INVALID_SOCKET
+#define MMPA_MEM_MAX_LEN (0x7fffffff)
+#define MMPA_PROCESS_ERROR (0x7fffffff)
+
+#define MMPA_ONE_THOUSAND 1000
+#define MMPA_COMPUTER_BEGIN_YEAR 1900
+#define SUMMER_TIME_OR_NOT (-1)
+#define MMPA_ZERO 0
+#define MMPA_VALUE_ONE 1
+#define MMPA_SOCKET_MAIN_EDITION 2
+#define MMPA_SOCKET_SECOND_EDITION 0
+#define MMPA_PIPE_BUF_SIZE 1024
+#define MMPA_MAX_SCANDIR_COUNT 1024
+#define MAX_IOVEC_SIZE 32
+#define MMPA_PIPE_COUNT 2
+#define MMPA_THREADNAME_SIZE 16
+#define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
+#define MMPA_MIN_OS_VERSION_SIZE 64
+
+#define MMPA_MAX_NI 19
+#define MMPA_MIDDLE_NI 5
+#define MMPA_LOW_NI (-5)
+#define MMPA_MIN_NI (-20)
+#define MMPA_MAX_FILE 128
+
+#define MMPA_MAX_THREAD_PIO 99
+#define MMPA_MIDDLE_THREAD_PIO 66
+#define MMPA_LOW_THREAD_PIO 33
+#define MMPA_MIN_THREAD_PIO 1
+
+#define MMPA_THREAD_SCHED_RR 0
+#define MMPA_THREAD_SCHED_FIFO 0
+#define MMPA_THREAD_SCHED_OTHER 0
+#define MMPA_THREAD_MIN_STACK_SIZE 0
+
+#define MM_MUTEX_INITIALIZER NULL
+
+#ifdef __cplusplus
+#if __cplusplus
+}
+#endif  // __cpluscplus
+#endif  // __cpluscplus
+#endif  // _MMPA_TYPEDEF_WIN_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index 398c6568..7a28a738 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -143,6 +143,74 @@ REG_OP(BatchNorm)
     .OP_END_FACTORY_REG(BatchNorm)
 
 /**
+* @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device,
+* the mena and reciprocal of standard deviation(invert_std) data on each device are normlized,
+* a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated.
+
+* @par Inputs:
+* include:
+* @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
+* @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32.
+* @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32.
+* @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32.
+* @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32.
+* @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n
+
+* @par Attributes:
+* Two Attributes, including:
+* @li momentum: A optional float. Defaults to 0.01. \n
+* @li epsilon: An optional float. Defaults to 0.00001. \n
+
+* @par Outputs:
+* include:
+* @li invert_std: A Tensor. It's inverse of total variance.
+* @li running_var_update: A Tensor. It's moving variance of each device after the update. \n
+
+* @par Third-party framework compatibility
+* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
+* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
+*/
+REG_OP(SyncBatchNormGatherStatsWithCounts)
+    .INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(momentum, Float, 0.1)
+    .ATTR(epsilon, Float, 0.001)
+    .OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts)
+
+/**
+* @brief update running_mean.
+
+* @par Inputs:
+* include:
+* @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
+* @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n
+
+* @par Attributes:
+* One Attribute, including:
+* @li momentum: A optional float. Defaults to 0.01. \n
+
+* @par Outputs:
+* include:
+* @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n
+
+* @par Third-party framework compatibility
+* ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
+* compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
+*/
+REG_OP(SyncBNTrainingUpdate)
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(momentum, Float, 0.1)
+    .OP_END_FACTORY_REG(SyncBNTrainingUpdate)
+
+/**
 *@brief part of SyncBatchNormBackward . \n
 
 *@par Inputs:
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index a273953c..78a423b5 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -516,6 +516,34 @@ REG_OP(ReduceSumD)
     .OP_END_FACTORY_REG(ReduceSumD)
 
 /**
+*@brief Calculate the total mean based on the mean of each device . \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li x: A Tensor. Must be one of the following types: float16, float32 .
+*@li count: A Tensor. Must be one of the following types: float16, float32 .
+*@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n
+
+*@par Attributes:
+*@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce.
+*@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operator Sum.
+*/
+REG_OP(ReduceMeanWithCount)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(axes, ListInt)
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceMeanWithCount)
+
+/**
 *@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n
 
 *@par Inputs:
@@ -1363,6 +1391,64 @@ REG_OP(ReduceStdV2Update)
     .ATTR(unbiased, Bool, true)
     .ATTR(keepdim, Bool, false)
     .OP_END_FACTORY_REG(ReduceStdV2Update)
+
+/**
+*@brief Computes the log and sum and exp of elements across dimensions of a tensor.
+* Reduces "x" along the dimensions given in "axes".
+* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
+* entry in "axes". If "keep_dims" is true, the reduced dimensions
+* are retained with length 1.
+*
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*     float32, float16, int32, int64, uint32, uint64, double
+*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
+*
+*@par Attributes:
+*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+*
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+*
+*@par Third-party framework compatibility
+* Compatible with the Onnx operator ReduceLogSumExp.
+*/
+REG_OP(ReduceLogSumExp)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceLogSumExp)
+
+/**
+*@brief Computes the log and sum of elements across dimensions of a tensor.
+* Reduces "x" along the dimensions given in "axes".
+* Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
+* entry in "axes". If "keep_dims" is true, the reduced dimensions
+* are retained with length 1.
+*
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*     float32, float16, int32, int64, uint32, uint64, double
+*@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
+*
+*@par Attributes:
+*keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
+*
+*@par Outputs:
+*y: The reduced tensor. Has the same type and format as input "x" . \n
+*
+*@par Third-party framework compatibility
+* Compatible with the Onnx operator ReduceLogSum.
+*/
+REG_OP(ReduceLogSum)
+    .INPUT(x, TensorType::NumberType())
+    .INPUT(axes, TensorType::IndexNumberType())
+    .OUTPUT(y, TensorType::NumberType())
+    .ATTR(keep_dims, Bool, false)
+    .OP_END_FACTORY_REG(ReduceLogSum)
 } //namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
index 6398d075..cccf76dc 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -1,8 +1,17 @@
-/*
- * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
- * Description: handle perf data
- * Author: xp
- * Create: 2019-10-13
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 #ifndef MSPROFILER_PROF_CALLBACK_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_common.h b/third_party/fwkacllib/inc/toolchain/prof_common.h
new file mode 100644
index 00000000..93a4cff9
--- /dev/null
+++ b/third_party/fwkacllib/inc/toolchain/prof_common.h
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
+ * Description: handle perf data
+ * Author: Huawei Technologies Co., Ltd.
+ * Create: 2019-10-13
+ */
+#ifndef MSPROFILER_PROF_COMMON_H_
+#define MSPROFILER_PROF_COMMON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#include <stdint.h>
+
+#define MSPROF_DATA_HEAD_MAGIC_NUM  0x5a5a
+
+enum MsprofDataTag {
+    MSPROF_ACL_DATA_TAG = 0,            //acl data tag, range: 0~19
+    MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39
+    MSPROF_GE_DATA_TAG_FUSION = 21,
+    MSPROF_GE_DATA_TAG_INFER = 22,
+    MSPROF_GE_DATA_TAG_TASK = 23,
+    MSPROF_GE_DATA_TAG_TENSOR = 24,
+    MSPROF_GE_DATA_TAG_STEP = 25,
+    MSPROF_GE_DATA_TAG_ID_MAP = 26,
+    MSPROF_GE_DATA_TAG_HOST_SCH = 27,
+    MSPROF_RUNTIME_DATA_TAG_API = 40,   //runtime data tag, range: 40~59
+    MSPROF_RUNTIME_DATA_TAG_TRACK = 41,
+    MSPROF_AICPU_DATA_TAG = 60,         //aicpu data tag, range: 60~79
+    MSPROF_HCCL_DATA_TAG = 80,          //hccl data tag, range: 80~99
+    MSPROF_DP_DATA_TAG = 100,           //dp data tag, range: 100~119
+    MSPROF_MSPROFTX_DATA_TAG = 120,     //hccl data tag, range: 120~139
+    MSPROF_DATA_TAG_MAX = 65536,        //data tag value type is uint16_t
+};
+
+/**
+ * @brief struct of mixed data
+ */
+#define MSPROF_MIX_DATA_RESERVE_BYTES 7
+#define MSPROF_MIX_DATA_STRING_LEN 120
+enum MsprofMixDataType {
+    MSPROF_MIX_DATA_HASH_ID = 0,
+    MSPROF_MIX_DATA_STRING,
+};
+struct MsprofMixData {
+    uint8_t type;  // MsprofMixDataType
+    uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
+    union {
+        uint64_t hashId;
+        char dataStr[MSPROF_MIX_DATA_STRING_LEN];
+    } data;
+};
+using MixData = struct MsprofMixData;
+
+/**
+ * @brief profiling command info
+ */
+#define MSPROF_MAX_DEV_NUM 64
+struct MsprofCommandHandle {
+    uint64_t profSwitch;
+    uint64_t profSwitchHi;
+    uint32_t devNums;
+    uint32_t devIdList[MSPROF_MAX_DEV_NUM];
+    uint32_t modelId;
+    uint32_t type;
+};
+
+/**
+ * @brief struct of data reported by acl
+ */
+#define MSPROF_ACL_DATA_RESERVE_BYTES 32
+#define MSPROF_ACL_API_NAME_LEN 64
+enum MsprofAclApiType {
+    MSPROF_ACL_API_TYPE_OP = 1,
+    MSPROF_ACL_API_TYPE_MODEL,
+    MSPROF_ACL_API_TYPE_RUNTIME,
+    MSPROF_ACL_API_TYPE_OTHERS,
+};
+struct MsprofAclProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_ACL_DATA_TAG;
+    uint32_t apiType;       // enum MsprofAclApiType
+    uint64_t beginTime;
+    uint64_t endTime;
+    uint32_t processId;
+    uint32_t threadId;
+    char apiName[MSPROF_ACL_API_NAME_LEN];
+    uint8_t  reserve[MSPROF_ACL_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by GE
+ */
+#define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104
+struct MsprofGeProfModelLoadData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD;
+    uint32_t modelId;
+    MixData  modelName;
+    uint64_t startTime;
+    uint64_t endTime;
+    uint8_t  reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8
+#define MSPROF_GE_FUSION_OP_NUM 8
+struct MsprofGeProfFusionData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION;
+    uint32_t modelId;
+    MixData  fusionName;
+    uint64_t inputMemSize;
+    uint64_t outputMemSize;
+    uint64_t weightMemSize;
+    uint64_t workspaceMemSize;
+    uint64_t totalMemSize;
+    uint64_t fusionOpNum;
+    uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM];
+    uint8_t  reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64
+struct MsprofGeProfInferData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER;
+    uint32_t modelId;
+    MixData  modelName;
+    uint32_t requestId;
+    uint32_t threadId;
+    uint64_t inputDataStartTime;
+    uint64_t inputDataEndTime;
+    uint64_t inferStartTime;
+    uint64_t inferEndTime;
+    uint64_t outputDataStartTime;
+    uint64_t outputDataEndTime;
+    uint8_t  reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16
+#define MSPROF_GE_OP_TYPE_LEN 56
+enum MsprofGeTaskType {
+    MSPROF_GE_TASK_TYPE_AI_CORE = 0,
+    MSPROF_GE_TASK_TYPE_AI_CPU,
+    MSPROF_GE_TASK_TYPE_AIV,
+};
+enum MsprofGeShapeType {
+    MSPROF_GE_SHAPE_TYPE_STATIC = 0,
+    MSPROF_GE_SHAPE_TYPE_DYNAMIC,
+};
+struct MsprofGeOpType {
+    uint8_t type;  // MsprofMixDataType
+    uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
+    union {
+        uint64_t hashId;
+        char dataStr[MSPROF_GE_OP_TYPE_LEN];
+    } data;
+};
+struct MsprofGeProfTaskData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK;
+    uint32_t taskType;      // MsprofGeTaskType
+    MixData  opName;
+    MsprofGeOpType opType;
+    uint64_t curIterNum;
+    uint64_t timeStamp;
+    uint32_t shapeType;     // MsprofGeShapeType
+    uint32_t blockDims;
+    uint32_t modelId;
+    uint32_t streamId;
+    uint32_t taskId;
+    uint32_t threadId;
+    uint8_t  reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8
+#define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8
+#define MSPROF_GE_TENSOR_DATA_NUM 5
+enum MsprofGeTensorType {
+    MSPROF_GE_TENSOR_TYPE_INPUT = 0,
+    MSPROF_GE_TENSOR_TYPE_OUTPUT,
+};
+struct MsprofGeTensorData {
+    uint32_t tensorType;    // MsprofGeTensorType
+    uint32_t format;
+    uint32_t dataType;
+    uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN];
+};
+
+struct MsprofGeProfTensorData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR;
+    uint32_t modelId;
+    uint64_t curIterNum;
+    uint32_t streamId;
+    uint32_t taskId;
+    uint32_t tensorNum;
+    MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM];
+    uint8_t  reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27
+enum MsprofGeStepTag {
+    MSPROF_GE_STEP_TAG_BEGIN = 0,
+    MSPROF_GE_STEP_TAG_END,
+};
+struct MsprofGeProfStepData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP;
+    uint32_t modelId;
+    uint32_t streamId;
+    uint32_t taskId;
+    uint64_t timeStamp;
+    uint64_t curIterNum;
+    uint32_t threadId;
+    uint8_t  tag;           // MsprofGeStepTag
+    uint8_t  reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6
+struct MsprofGeProfIdMapData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP;
+    uint32_t graphId;
+    uint32_t modelId;
+    uint32_t sessionId;
+    uint64_t timeStamp;
+    uint16_t mode;
+    uint8_t  reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24
+struct MsprofGeProfHostSchData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH;
+    uint32_t threadId;      // record in start event
+    uint64_t element;
+    uint64_t event;
+    uint64_t startTime;     // record in start event
+    uint64_t endTime;       // record in end event
+    uint8_t  reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by RunTime
+ */
+#define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106
+#define MSPROF_RUNTIME_TASK_ID_NUM 10
+#define MSPROF_RUNTIME_API_NAME_LEN 64
+struct MsprofRuntimeProfApiData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API;
+    uint32_t threadId;
+    uint64_t entryTime;
+    uint64_t exitTime;
+    uint64_t dataSize;
+    uint8_t  apiName[MSPROF_RUNTIME_API_NAME_LEN];
+    uint32_t retCode;
+    uint32_t streamId;
+    uint32_t taskNum;
+    uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM];
+    uint16_t memcpyDirection;
+    uint8_t  reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES];
+};
+
+#define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10
+#define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32
+struct MsprofRuntimeProfTrackData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK;
+    uint32_t threadId;
+    uint64_t timeStamp;
+    char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN];
+    uint32_t taskId;
+    uint16_t streamId;
+    uint8_t  reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by RunTime
+ */
+#define MSPROF_AICPU_DATA_RESERVE_BYTES 9
+struct MsprofAicpuProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_AICPU_DATA_TAG;
+    uint16_t streamId;
+    uint16_t taskId;
+    uint64_t runStartTime;
+    uint64_t runStartTick;
+    uint64_t computeStartTime;
+    uint64_t memcpyStartTime;
+    uint64_t memcpyEndTime;
+    uint64_t runEndTime;
+    uint64_t runEndTick;
+    uint32_t threadId;
+    uint32_t deviceId;
+    uint64_t submitTick;
+    uint64_t scheduleTick;
+    uint64_t tickBeforeRun;
+    uint64_t tickAfterRun;
+    uint32_t kernelType;
+    uint32_t dispatchTime;
+    uint32_t totalTime;
+    uint16_t fftsThreadId;
+    uint8_t  version;
+    uint8_t  reserve[MSPROF_AICPU_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by DP
+ */
+#define MSPROF_DP_DATA_RESERVE_BYTES 16
+#define MSPROF_DP_DATA_ACTION_LEN 16
+#define MSPROF_DP_DATA_SOURCE_LEN 64
+struct MsprofDpProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_DP_DATA_TAG;
+    uint32_t rsv;   // Ensure 8-byte alignment
+    uint64_t timeStamp;
+    char action[MSPROF_DP_DATA_ACTION_LEN];
+    char source[MSPROF_DP_DATA_SOURCE_LEN];
+    uint64_t index;
+    uint64_t size;
+    uint8_t  reserve[MSPROF_DP_DATA_RESERVE_BYTES];
+};
+
+/**
+ * @brief struct of data reported by HCCL
+ */
+#pragma pack(4)
+struct MsprofHcclProfNotify {
+    uint32_t taskID;
+    uint64_t notifyID;
+    uint32_t stage;
+    uint32_t remoteRank;
+    uint32_t transportType;
+    uint32_t role; // role {0: dst, 1:src}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfReduce {
+    uint32_t taskID;
+    uint64_t src;
+    uint64_t dst;
+    uint64_t size;
+    uint32_t op;       // {0: sum, 1: mul, 2: max, 3: min}
+    uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64}
+    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
+    uint32_t remoteRank;
+    uint32_t transportType; //  transport type {0: SDMA, 1: RDMA, 2:LOCAL}
+    uint32_t role;          // role {0: dst, 1:src}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfRDMA {
+    uint32_t taskID;
+    uint64_t src;
+    uint64_t dst;
+    uint64_t size;
+    uint64_t notifyID;
+    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
+    uint32_t remoteRank;
+    uint32_t transportType; //  transport type {0: RDMA, 1:SDMA, 2:LOCAL}
+    uint32_t role;          // role {0: dst, 1:src}
+    uint32_t type;          // RDMA type {0: RDMASendNotify, 1:RDMASendPayload}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfMemcpy {
+    uint32_t taskID;
+    uint64_t src;
+    uint64_t dst;
+    uint64_t size;
+    uint64_t notifyID;
+    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
+    uint32_t remoteRank;
+    uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
+    uint32_t role;          // role {0: dst, 1:src}
+    double durationEstimated;
+};
+
+struct MsprofHcclProfStageStep {
+    uint32_t rank;
+    uint32_t rankSize;
+};
+
+struct MsprofHcclProfFlag {
+    uint64_t cclTag;
+    uint64_t groupName;
+    uint32_t localRank;
+    uint32_t workFlowMode;
+};
+
+/**
+ * @name MsprofHcclProfData
+ * @brief struct of data reported by hccl
+ */
+struct MsprofHcclProfData {
+    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
+    uint16_t dataTag = MSPROF_HCCL_DATA_TAG;
+    uint32_t planeID;
+    uint32_t deviceID;
+    uint32_t streamID;
+    double ts;
+    char name[16];
+    union {
+        MsprofHcclProfNotify notify;
+        MsprofHcclProfReduce reduce;
+        MsprofHcclProfStageStep stageStep;
+        MsprofHcclProfMemcpy forMemcpy;
+        MsprofHcclProfRDMA RDMA;
+        MsprofHcclProfFlag flag;
+    } args;
+};
+#pragma pack()
+
+/**
+ * @name  MsprofStampInfo
+ * @brief struct of data reported by msproftx
+ */
+struct MsprofStampInfo {
+    uint16_t magicNumber;
+    uint16_t dataTag;
+    uint32_t processId;
+    uint32_t threadId;
+    uint32_t category;         //marker category
+    uint32_t  eventType;
+    int32_t payloadType;
+    union PayloadValue         //payload info for marker
+    {
+        uint64_t ullValue;
+        int64_t llValue;
+        double dValue;
+        uint32_t uiValue[2];
+        int32_t iValue[2];
+        float fValue[2];
+    } payload;
+    uint64_t startTime;
+    uint64_t endTime;
+    int32_t messageType;
+    char message[128];
+    uint8_t reserve0[4];
+    uint8_t reserve1[72];
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MSPROFILER_PROF_COMMON_H_