From fc33462be4b01fb7ed2431caf83139814d5c38bc Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Sun, 30 May 2021 23:30:08 +0800
Subject: [PATCH] Add subscribe for step_info.

---
 ge/common/profiling/profiling_manager.cc     | 10 ++++++----
 ge/graph/load/model_manager/davinci_model.cc | 13 ++++---------
 ge/hybrid/executor/hybrid_model_executor.cc  |  4 ++--
 third_party/fwkacllib/inc/runtime/base.h     |  6 ++++++
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index f1c3c87b..d615187f 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -323,11 +323,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfileStepInfo(
   uint64_t index_id, uint64_t model_id, uint16_t tag_id, rtStream_t stream, int32_t device_id) {
 #ifdef DAVINCI_SUPPORT_PROFILING
-  rtError_t rt_ret = RT_ERROR_NONE;
-#ifndef ONLY_COMPILE_OPEN_SRC
+  if (!is_load_profiling_ && subscribe_count_ == 0) {
+    GELOGD("Profiling is not turned on, no need to profile step info.");
+    return SUCCESS;
+  }
+
   GELOGD("Profiling Step Info TraceTask execute async start, index_id = %lu, model_id = %lu, tag_id = %u",
          index_id, model_id, tag_id);
-  rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
+  rtError_t rt_ret = rtProfilerTraceEx(index_id, model_id, tag_id, stream);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(RT_FAILED, "[Call][rtProfilerTraceEx]Failed, ret 0x%X", rt_ret);
     REPORT_CALL_ERROR("E19999", "Call rtProfilerTraceEx failed, ret 0x%X", rt_ret);
@@ -335,7 +338,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Profil
   }
   GELOGD("Profiling Step Info TraceTask execute async success, index_id = %lu, model_id = %lu, tag_id = %u",
          index_id, model_id, tag_id);
-#endif
 
   mmTimespec timespec = mmGetTickCount();
   // 1000 ^ 3 converts second to nanosecond
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index b52796c8..e76db039 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -3977,7 +3977,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
   is_dynamic_ = input_data.is_dynamic_batch;
 
   bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn();
-  bool profiling_model_load_on = ProfilingManager::Instance().ProfilingModelLoadOn();
   GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START));
   Status ret = CopyModelData(input_data, output_data, is_dynamic_);
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret,
@@ -3991,10 +3990,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
     uint64_t model_id = static_cast<uint64_t>(model_id_);
     int32_t device_id = static_cast<int32_t>(device_id_);
     // tag_id 0 means step begin, 1 meas step end.
-    if (profiling_model_load_on) {
-      GE_CHK_STATUS_RET_NOLOG(
-        ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id));
-    }
+    GE_CHK_STATUS_RET_NOLOG(
+      ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 0, rt_model_stream_, device_id));
 
     GELOGD("rtModelExecute do");
     GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START));
@@ -4003,10 +4000,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
     GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END));
     GELOGD("rtModelExecute end");
 
-    if (profiling_model_load_on) {
-      GE_CHK_STATUS_RET_NOLOG(
-        ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 1, rt_model_stream_, device_id));
-    }
+    GE_CHK_STATUS_RET_NOLOG(
+      ProfilingManager::Instance().ProfileStepInfo(index_id, model_id, 1, rt_model_stream_, device_id));
     iterator_count_++;
   }
 
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index d4d97840..d8939175 100755
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -90,7 +90,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
   int32_t device_id = static_cast<int32_t>(device_id_);
   auto &prof_mgr = ProfilingManager::Instance();
   // tag_id 0 means step begin, 1 meas step end.
-  if (!model_->IsSingleOp() && prof_mgr.ProfilingModelLoadOn()) {
+  if (!model_->IsSingleOp()) {
     GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id));
   }
 
@@ -98,7 +98,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
                         "Failed to execute partitioned call.");
   RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End");
 
-  if (!model_->IsSingleOp() && prof_mgr.ProfilingModelLoadOn()) {
+  if (!model_->IsSingleOp()) {
     GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id));
   }
 
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 5b246eed..40bc91f7 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -156,6 +156,12 @@ RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtSt
 
 /**
  * @ingroup profiling_base
+ * @brief ts send keypoint for step info.
+ */
+RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagId, rtStream_t stream);
+
+/**
+ * @ingroup profiling_base
  * @brief ts set profiling reporter callback.
  */
 RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);