Browse Source

Synchronize latest Ascend software suite 29 Dec 2020

tags/v1.1.0
yanghaoran 4 years ago
parent
commit
43faf5dbf3
100 changed files with 2201 additions and 1099 deletions
  1. +73
    -0
      Third_Party_Open_Source_Software_Notice
  2. +2
    -1
      cmake/intf_pub_linux.cmake
  3. +6
    -0
      ge/CMakeLists.txt
  4. +19
    -0
      ge/client/ge_api.cc
  5. +1
    -14
      ge/common/dump/dump_op.cc
  6. +2
    -2
      ge/common/ge/tbe_plugin_manager.cc
  7. +1
    -1
      ge/common/profiling/ge_profiling.cc
  8. +6
    -8
      ge/common/profiling/profiling_manager.cc
  9. +0
    -2
      ge/common/proto/op_mapping_info.proto
  10. +8
    -0
      ge/common/proto/tensorflow/attr_value.proto
  11. +8
    -0
      ge/common/proto/tensorflow/function.proto
  12. +8
    -0
      ge/common/proto/tensorflow/graph.proto
  13. +8
    -0
      ge/common/proto/tensorflow/graph_library.proto
  14. +8
    -0
      ge/common/proto/tensorflow/node_def.proto
  15. +8
    -0
      ge/common/proto/tensorflow/op_def.proto
  16. +8
    -0
      ge/common/proto/tensorflow/resource_handle.proto
  17. +8
    -0
      ge/common/proto/tensorflow/tensor.proto
  18. +8
    -0
      ge/common/proto/tensorflow/tensor_shape.proto
  19. +8
    -0
      ge/common/proto/tensorflow/types.proto
  20. +8
    -0
      ge/common/proto/tensorflow/versions.proto
  21. +83
    -38
      ge/executor/ge_executor.cc
  22. +0
    -2
      ge/executor/proto/op_mapping_info.proto
  23. +2
    -0
      ge/ge_inference.mk
  24. +3
    -2
      ge/ge_local_engine/engine/host_cpu_engine.cc
  25. +2
    -0
      ge/ge_runner.mk
  26. +42
    -20
      ge/generator/ge_generator.cc
  27. +1
    -1
      ge/graph/build/memory/graph_mem_assigner.cc
  28. +54
    -0
      ge/graph/build/model_builder.cc
  29. +6
    -0
      ge/graph/build/model_builder.h
  30. +6
    -4
      ge/graph/build/stream_graph_optimizer.cc
  31. +1
    -1
      ge/graph/build/task_generator.cc
  32. +67
    -10
      ge/graph/load/graph_loader.cc
  33. +6
    -0
      ge/graph/load/graph_loader.h
  34. +0
    -6
      ge/graph/load/new_model_manager/data_dumper.cc
  35. +146
    -138
      ge/graph/load/new_model_manager/davinci_model.cc
  36. +7
    -33
      ge/graph/load/new_model_manager/davinci_model.h
  37. +234
    -40
      ge/graph/load/new_model_manager/model_manager.cc
  38. +7
    -1
      ge/graph/load/new_model_manager/model_manager.h
  39. +74
    -58
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  40. +0
    -2
      ge/graph/load/new_model_manager/task_info/kernel_task_info.h
  41. +28
    -2
      ge/graph/manager/graph_manager.cc
  42. +3
    -0
      ge/graph/manager/graph_mem_allocator.cc
  43. +1
    -2
      ge/graph/optimize/graph_optimize.cc
  44. +5
    -23
      ge/graph/passes/attach_stream_label_pass.cc
  45. +1
    -3
      ge/graph/passes/attach_stream_label_pass.h
  46. +1
    -1
      ge/graph/passes/base_pass.cc
  47. +64
    -0
      ge/graph/passes/dimension_adjust_pass.cc
  48. +4
    -0
      ge/graph/passes/dimension_adjust_pass.h
  49. +4
    -8
      ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
  50. +1
    -1
      ge/graph/passes/dynamic_single_op_reset_shape_pass.h
  51. +57
    -7
      ge/graph/passes/enter_pass.cc
  52. +2
    -1
      ge/graph/passes/enter_pass.h
  53. +1
    -4
      ge/graph/passes/folding_pass.cc
  54. +0
    -10
      ge/graph/passes/merge_to_stream_merge_pass.cc
  55. +30
    -44
      ge/graph/passes/multi_batch_clone_pass.cc
  56. +11
    -11
      ge/graph/passes/multi_batch_clone_pass.h
  57. +89
    -173
      ge/graph/passes/next_iteration_pass.cc
  58. +3
    -13
      ge/graph/passes/next_iteration_pass.h
  59. +106
    -0
      ge/graph/passes/remove_same_const_pass.cc
  60. +28
    -0
      ge/graph/passes/remove_same_const_pass.h
  61. +8
    -4
      ge/graph/passes/switch_to_stream_switch_pass.cc
  62. +51
    -0
      ge/graph/passes/useless_control_out_remove_pass.cc
  63. +29
    -0
      ge/graph/passes/useless_control_out_remove_pass.h
  64. +40
    -0
      ge/graph/preprocess/graph_preprocess.cc
  65. +344
    -58
      ge/graph/preprocess/multi_batch_copy_graph.cc
  66. +15
    -1
      ge/graph/preprocess/multi_batch_copy_graph.h
  67. +10
    -1
      ge/host_kernels/dynamic_stitch_kernel.cc
  68. +2
    -0
      ge/hybrid/executor/hybrid_execution_context.h
  69. +1
    -0
      ge/hybrid/executor/hybrid_model_executor.cc
  70. +22
    -34
      ge/hybrid/executor/node_state.cc
  71. +1
    -2
      ge/hybrid/executor/node_state.h
  72. +8
    -1
      ge/hybrid/executor/subgraph_executor.cc
  73. +4
    -5
      ge/hybrid/executor/worker/execution_engine.cc
  74. +18
    -103
      ge/hybrid/executor/worker/shape_inference_engine.cc
  75. +0
    -4
      ge/hybrid/executor/worker/shape_inference_engine.h
  76. +3
    -0
      ge/hybrid/executor/worker/task_compile_engine.cc
  77. +26
    -0
      ge/hybrid/model/hybrid_model_builder.cc
  78. +1
    -0
      ge/hybrid/model/hybrid_model_builder.h
  79. +34
    -57
      ge/hybrid/model/node_item.cc
  80. +0
    -5
      ge/hybrid/model/node_item.h
  81. +0
    -22
      ge/hybrid/node_executor/task_context.cc
  82. +0
    -2
      ge/hybrid/node_executor/task_context.h
  83. +3
    -4
      ge/ir_build/atc_ir_common.cc
  84. +1
    -1
      ge/ir_build/atc_ir_common.h
  85. +1
    -1
      ge/offline/main.cc
  86. +8
    -0
      ge/proto/caffe/caffe.proto
  87. +0
    -2
      ge/proto/op_mapping_info.proto
  88. +8
    -0
      ge/proto/tensorflow/attr_value.proto
  89. +8
    -0
      ge/proto/tensorflow/function.proto
  90. +8
    -0
      ge/proto/tensorflow/graph.proto
  91. +8
    -0
      ge/proto/tensorflow/graph_library.proto
  92. +8
    -0
      ge/proto/tensorflow/node_def.proto
  93. +8
    -0
      ge/proto/tensorflow/op_def.proto
  94. +8
    -0
      ge/proto/tensorflow/resource_handle.proto
  95. +8
    -0
      ge/proto/tensorflow/tensor.proto
  96. +8
    -0
      ge/proto/tensorflow/tensor_shape.proto
  97. +8
    -0
      ge/proto/tensorflow/types.proto
  98. +8
    -0
      ge/proto/tensorflow/versions.proto
  99. +4
    -4
      ge/single_op/task/op_task.cc
  100. +101
    -101
      inc/external/acl/error_codes/rt_error_codes.h

+ 73
- 0
Third_Party_Open_Source_Software_Notice View File

@@ -458,3 +458,76 @@ Copyright (c) Facebook Inc. and Microsoft Corporation.

License: MIT License
Please see above.



Software: caffe 1.0

License: BSD 2-Clause License

Open Source Software Licensed Under the BSD 2-Clause License

GraphEngine uses source code files from caffe so as to support model format conversion from caffe model to GraphEngine model.
Please see below for the full list of source code files from caffe that are used by GraphEngine.
The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
----------------------------------------------------------------------------------------
1. caffe.proto master
All contributions by the University of California:
Copyright (c) 2014-2017 The Regents of the University of California (Regents)
All rights reserved.


Terms of the BSD 2-Clause License:
--------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.



Software: tensorflow 1.15.0

License: Apache-2.0 License

Open Source Software Licensed Under the Apache-2.0 License


GraphEngine uses source code files from tensorflow so as to support model format conversion from tensorflow model to GraphEngine model.
Please see below for the full list of source code files from tensorflow that are used by GraphEngine.
The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
----------------------------------------------------------------------------------------
1. attr_value.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

2. function.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

3. graph.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

4. node_def.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

5. op_def.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

6. resource_handle.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

7. tensor.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

8. tensor_shape.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

9. types.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

10. versions.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

Terms of the Apache-2.0 License:
Please see above.

+ 2
- 1
cmake/intf_pub_linux.cmake View File

@@ -4,7 +4,7 @@ endif()

add_library(intf_pub INTERFACE)

target_compile_options(intf_pub INTERFACE
target_compile_options(intf_pub INTERFACE
-Wall
-fPIC
$<IF:$<STREQUAL:${CMAKE_SYSTEM_NAME},centos>,-fstack-protector-all,-fstack-protector-strong>
@@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE
$<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
WIN64=1
LINUX=0
LOG_CPP
)
target_link_options(intf_pub INTERFACE
-Wl,-z,relro


+ 6
- 0
ge/CMakeLists.txt View File

@@ -157,6 +157,8 @@ set(TRAIN_SRC_LIST
"graph/passes/compile_nodes_pass.cc"
"graph/passes/constant_folding_pass.cc"
"graph/passes/constant_fuse_same_pass.cc"
"graph/passes/remove_same_const_pass.cc"
"graph/passes/useless_control_out_remove_pass.cc"
"graph/passes/control_trigger_pass.cc"
"graph/passes/dimension_adjust_pass.cc"
"graph/passes/dimension_compute_pass.cc"
@@ -522,6 +524,8 @@ set(INFER_SRC_LIST
"graph/passes/assign_pass.cc"
"graph/passes/addn_pass.cc"
"graph/passes/common_subexpression_elimination_pass.cc"
"graph/passes/remove_same_const_pass.cc"
"graph/passes/useless_control_out_remove_pass.cc"
"graph/passes/transop_symmetry_elimination_pass.cc"
"graph/passes/save_pass.cc"
"graph/passes/switch_dead_branch_elimination.cc"
@@ -616,6 +620,7 @@ target_compile_definitions(ge_runner PRIVATE
FMK_SUPPORT_DUMP
DAVINCI_CLOUD
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_runner PRIVATE
@@ -683,6 +688,7 @@ target_compile_definitions(ge_compiler PRIVATE
FMK_HOST_INFER
COMPILE_OMG_PACKAGE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_compiler PRIVATE


+ 19
- 0
ge/client/ge_api.cc View File

@@ -32,6 +32,9 @@
#include "graph/common/ge_call_wrapper.h"
#include "register/op_registry.h"
#include "common/ge/tbe_plugin_manager.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "toolchain/plog.h"
#endif

using domi::OpRegistry;
using std::map;
@@ -129,6 +132,11 @@ Status GEInitializeImpl(const std::map<string, string> &options) {

// Initialize GE, prepare for execution, call GELib::Initialize
Status GEInitialize(const std::map<string, string> &options) {
#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportInitialize() != SUCCESS) {
GELOGW("Dlog report device log initialize failed.");
}
#endif
return GEInitializeImpl(options);
}

@@ -143,6 +151,11 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
std::string val = option.second.GetString();
str_options[key] = val;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportInitialize() != SUCCESS) {
GELOGW("Dlog report device log initialize failed.");
}
#endif
return GEInitializeImpl(str_options);
}

@@ -187,6 +200,12 @@ Status GEFinalize() {
// to avoid memory fragment, use malloc_trim to back free stack to system
malloc_trim(0);

#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportFinalize() != SUCCESS) {
GELOGW("Dlog report device log finalize failed.");
}
#endif

GELOGT(TRACE_STOP, "GEFinalize finished");
return ret;
}


+ 1
- 14
ge/common/dump/dump_op.cc View File

@@ -94,9 +94,6 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
for (auto dim : output_descs.at(i).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -121,9 +118,6 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
for (auto dim : input_descs.at(i).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -220,15 +214,8 @@ Status DumpOp::LaunchDumpOp() {
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
dump_path.c_str());
uint32_t task_id = 0;
uint32_t stream_id = 0;
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret);
}

aicpu::dump::Task task;
task.set_task_id(task_id);
task.set_stream_id(stream_id);
task.mutable_op()->set_op_name(op_desc_->GetName());
task.mutable_op()->set_op_type(op_desc_->GetType());
if (dump_properties_.GetDumpMode() == kDumpOutput) {


+ 2
- 2
ge/common/ge/tbe_plugin_manager.cc View File

@@ -184,7 +184,7 @@ void TBEPluginManager::LoadCustomOpLib() {
std::string fmk_type = std::to_string(domi::TENSORFLOW);
auto it = options_.find(ge::FRAMEWORK_TYPE);
if (it != options_.end()) {
fmk_type = it->second;
fmk_type = it->second;
}
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
GELOGI("The size of registration_datas is: %zu", registration_datas.size());
@@ -192,7 +192,7 @@ void TBEPluginManager::LoadCustomOpLib() {
if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) {
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
(void)domi::OpRegistry::Instance()->Register(reg_data);
domi::OpRegistry::Instance()->Register(reg_data);
}
}
}


+ 1
- 1
ge/common/profiling/ge_profiling.cc View File

@@ -182,7 +182,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
command.module_index = prof_config_param->profSwitch;
}
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
command.module_index);
command.module_index);
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
}


+ 6
- 8
ge/common/profiling/profiling_manager.cc View File

@@ -38,8 +38,10 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
} // namespace

namespace ge {
ProfilingManager::ProfilingManager()
: is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) {
ProfilingManager::ProfilingManager() : is_load_profiling_(false),
is_execute_profiling_(false),
is_training_trace_(false),
subscribe_count_(0) {
prof_cb_.msprofCtrlCallback = nullptr;
prof_cb_.msprofReporterCallback = nullptr;
}
@@ -100,8 +102,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
return INTERNAL_ERROR;
}
is_execute_profiling_ = true;
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options,
options.profiling_options.c_str());
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(),
prof_conf.options, options.profiling_options.c_str());
} else {
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
@@ -141,9 +143,6 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
}
try {
Json prof_options = Json::parse(options);
if (options.find(kTrainingTrace) == std::string::npos) {
return ge::SUCCESS;
}
const std::string training_trace = prof_options[kTrainingTrace];
if (training_trace.empty()) {
GELOGI("Training trace will not take effect.");
@@ -842,7 +841,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
return;
}
}
return;
}



+ 0
- 2
ge/common/proto/op_mapping_info.proto View File

@@ -15,7 +15,6 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -24,7 +23,6 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 8
- 0
ge/common/proto/tensorflow/attr_value.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/function.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/graph.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/graph_library.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/node_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/op_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/resource_handle.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/tensor.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/tensor_shape.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

// Protocol buffer representing the shape of tensors.

syntax = "proto3";


+ 8
- 0
ge/common/proto/tensorflow/types.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/versions.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 83
- 38
ge/executor/ge_executor.cc View File

@@ -209,33 +209,19 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,

namespace ge {
bool GeExecutor::isInit_ = false;

static void InitOpsProtoManger() {
string opsproto_path;
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
string path = path_env;
string file_path = RealPath(path.c_str());
if (file_path.empty()) {
GELOGE(FAILED, "File path %s is invalid.", path.c_str());
return;
class ModelListenerAdapter : public ModelListener {
public:
domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode,
std::vector<ge::OutputTensorInfo> &outputs) {
if (listener == nullptr) {
GELOGE(ge::FAILED, "listener is null.");
return FAILED;
}
opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
GELOGI("Get opsproto so path from env : %s", path.c_str());
} else {
string path_base = PluginManager::GetPath();
GELOGI("path_base is %s", path_base.c_str());
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}

GELOGI("Get opsproto path is %s", opsproto_path.c_str());
OpsProtoManager *manager = OpsProtoManager::Instance();
map<string, string> option_tmp;
option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
(void)manager->Initialize(option_tmp);
}
return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs);
}

std::shared_ptr<ge::ModelListener> listener;
};

GeExecutor::GeExecutor() {}

@@ -246,16 +232,6 @@ Status GeExecutor::Initialize() {
return ge::SUCCESS;
}

OpTilingManager::GetInstance().LoadSo();

Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
if (initHostCpuEngineStatus != SUCCESS) {
GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
return initHostCpuEngineStatus;
}

InitOpsProtoManger();

std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
mem_type.push_back(RT_MEMORY_P2P_DDR);
auto ret = MemManager::Instance().Initialize(mem_type);
@@ -560,6 +536,60 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
return SUCCESS;
}

// Load model
Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key,
int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
GELOGI("load model offline begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

string filePath = RealPath(path.c_str());
if (filePath.empty()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID,
"File path is invalid. please check your text file '%s'.", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
}

std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;

Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModelFromFile failed");
return ACL_ERROR_GE_LOAD_MODEL;
}
return SUCCESS;
}

Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
std::shared_ptr<ge::ModelListener> listener) {
GELOGI("Load model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;

Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModel failed.");
return ACL_ERROR_GE_LOAD_MODEL;
}
return ret;
}

Status GeExecutor::UnloadModel(uint32_t model_id) {
GELOGD("unload model %u begin.", model_id);
if (!isInit_) {
@@ -569,7 +599,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
return ACL_ERROR_GE_INTERNAL_ERROR;
return ret;
}

std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model =
@@ -587,11 +617,26 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
ret = GraphLoader::UnloadModel(model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
return ACL_ERROR_GE_UNLOAD_MODEL;
return ret;
}
return SUCCESS;
}

Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
GELOGI("run model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

InputData inputs;
GetDomiInputData(input_data, inputs);
OutputData outputs;
GetDomiOutputData(output_data, outputs);

return GraphExecutor::DataInput(inputs, outputs);
}

// Get input and output descriptor
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {


+ 0
- 2
ge/executor/proto/op_mapping_info.proto View File

@@ -15,7 +15,6 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -24,7 +23,6 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 2
- 0
ge/ge_inference.mk View File

@@ -191,6 +191,8 @@ OMG_HOST_SRC_FILES := \
graph/passes/control_trigger_pass.cc \
graph/passes/cond_pass.cc \
graph/passes/cond_remove_pass.cc \
graph/passes/remove_same_const_pass.cc \
graph/passes/useless_control_out_remove_pass.cc \
graph/passes/for_pass.cc \
graph/passes/enter_pass.cc \
graph/passes/assign_pass.cc \


+ 3
- 2
ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -39,7 +39,7 @@ namespace {
} \
ge_tensor = MakeShared<GeTensor>(out_desc); \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \
GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \
return MEMALLOC_FAILED; \
@@ -50,7 +50,8 @@ namespace {
} else { \
ge_tensor = outputs[i]; \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \
GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \
reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \
} \
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
auto tensor_name = op_desc->GetOutputNameByIndex(i); \


+ 2
- 0
ge/ge_runner.mk View File

@@ -126,6 +126,8 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/compile_nodes_pass.cc \
graph/passes/constant_folding_pass.cc \
graph/passes/constant_fuse_same_pass.cc \
graph/passes/remove_same_const_pass.cc \
graph/passes/useless_control_out_remove_pass.cc \
graph/passes/control_trigger_pass.cc \
graph/passes/dimension_adjust_pass.cc \
graph/passes/dimension_compute_pass.cc \


+ 42
- 20
ge/generator/ge_generator.cc View File

@@ -262,16 +262,26 @@ static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag)
change_shape_flag = true;
}
}
for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) {
auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
GE_CHECK_NOTNULL(output_desc);
// pass scalar output desc
auto dims = output_desc->GetShape().GetDims();
if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) {
change_shape_flag = true;
}
}
return SUCCESS;
}

static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
for (auto input : inputs) {
auto input_desc = input.GetTensorDesc();
GeShape shape_ori = input_desc.GetShape();

std::vector<int64_t> dynamic_shape_dims = {kDynamicDimValue};
GeShape dynamic_shape(dynamic_shape_dims);
std::vector<std::pair<int64_t, int64_t>> dynamic_shape_range;

ge::GeTensor inputTensor;
ge::GeTensorDesc desc(input_desc);
@@ -279,12 +289,20 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor>
bool is_const = false;
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
if (!is_const && shape_ori.GetDims().size() > 0) {
int64_t storage_format = FORMAT_NCHW;
if (ge::AttrUtils::GetInt(desc, ge::ATTR_NAME_STORAGE_FORMAT, storage_format) &&
!ge::AttrUtils::SetListInt(desc, ge::ATTR_NAME_STORAGE_SHAPE, dynamic_shape_dims)) {
GELOGE(FAILED, "Set attr ATTR_NAME_STORAGE_SHAPE fail.");
return FAILED;
}
desc.SetShape(dynamic_shape);
desc.SetShapeRange(dynamic_shape_range);
}

inputTensor.SetTensorDesc(desc);
inputs_dynamic.push_back(inputTensor);
}
return SUCCESS;
}

class GeGenerator::Impl {
@@ -528,6 +546,24 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
return true;
}

static Status SetModelNameForDump(GeRootModelPtr ge_root_model) {
ModelHelper model_helper;
string model_name = "";
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
model_name);
if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
return PARAM_INVALID;
}
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
ge_model->SetName(model_name);
return SUCCESS;
}

Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
ModelBufferData &model, bool is_offline) {
rtContext_t ctx = nullptr;
@@ -536,7 +572,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
GELOGD("Current ctx is null.");
ctx = nullptr;
}

GeRootModelPtr ge_root_model = nullptr;
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
@@ -560,22 +595,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
impl_->build_step_.c_str());
return SUCCESS;
}

GE_CHECK_NOTNULL(ge_root_model);
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
ModelHelper model_helper;
string model_name = "";
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
model_name);
if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
return PARAM_INVALID;
ret = SetModelNameForDump(ge_root_model);
if (ret != SUCCESS) {
return ret;
}
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
ge_model->SetName(model_name);
ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
if (ret != SUCCESS) {
GELOGE(ret, "Save model failed");
@@ -584,11 +608,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
}
return ret;
}

if (ctx != nullptr) {
(void)rtCtxSetCurrent(ctx);
}

return SUCCESS;
}

@@ -682,8 +704,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) {
vector<GeTensor> inputs_dynamic;
vector<GeTensor> outputs_dynamic;
ResetTensorVecShape(inputs, inputs_dynamic);
ResetTensorVecShape(outputs, outputs_dynamic);
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic));
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
GE_CHK_STATUS_RET_NOLOG(
impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
} else {


+ 1
- 1
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() {
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);

if (mem_assigner->GetP2PMemOffset() >= 0) {
if (mem_assigner->GetP2PMemOffset() > 0) {
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
}


+ 54
- 0
ge/graph/build/model_builder.cc View File

@@ -224,6 +224,7 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_
GeTensorDesc &tensor_desc = weight->MutableTensorDesc();
size_t output_size = weight->GetData().size();
TensorUtils::SetDataOffset(tensor_desc, mem_offset);
GELOGD("Node: %s, weight size: %zu.", node->GetName().c_str(), output_size);
mem_offset += output_size;
}
return SUCCESS;
@@ -581,9 +582,13 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
// Add TBE Kernels and custom aicpu op bin
std::set<std::string> tbe_name_set;
std::set<std::string> aicpu_name_set;
std::set<std::string> aicpu_op_types;
std::set<std::string> aicpu_tf_op_types;
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
auto node_op_desc = n->GetOpDesc();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
// check aicpu op type
CollectCheckAicpuAttr(node_op_desc, aicpu_op_types, aicpu_tf_op_types);
TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
std::string kernel_name;
@@ -605,6 +610,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
tbe_kernel_store_.AddTBEKernel(tbe_kernel);
}

SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types);

for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
auto node_op_desc = n->GetOpDesc();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
@@ -796,4 +803,51 @@ Status ModelBuilder::CompileSingleOp() {
GE_TIMESTAMP_CALLNUM_END(BatchCompileOp, "GraphBuild::CompileOp");
return ge::SUCCESS;
}

void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types) {
std::string aicpu_optype;
bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
std::vector<std::string> tf_optypes;
bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes);
if (has_attr_check_cpu && !aicpu_optype.empty()) {
aicpu_op_types.insert(aicpu_optype);
}

if (has_attr_check_tf && !tf_optypes.empty()) {
aicpu_tf_op_types.insert(tf_optypes.begin(), tf_optypes.end());
}

return;
}

void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types) {
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
aicpu_op_types.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
}

if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) {
GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
aicpu_tf_op_types.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
}

// reset list with set
aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end());
aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end());
GELOGI(
"Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, "
"aicpu_tf_optype_list:%zu.",
compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(),
aicpu_tf_optype_list.size());
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return,
"Set attr needCheckCpu fail.");

GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return,
"Set attr needCheckTf fail.");
return;
}
} // namespace ge

+ 6
- 0
ge/graph/build/model_builder.h View File

@@ -83,6 +83,12 @@ class ModelBuilder {

Status CompileSingleOp();

void CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types);

void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types);

uint64_t session_id_;

map<int64_t, size_t> mem_type_to_mem_offset_;


+ 6
- 4
ge/graph/build/stream_graph_optimizer.cc View File

@@ -66,13 +66,13 @@ bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &com
if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
label_set.insert(batch_label);
} else {
GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(),
GELOGD("Node %s[%s] has no batch_label, subgraph %s, stream id: %ld ", cur_node->GetName().c_str(),
cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id);
continue;
}

GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
GELOGD("Node %s in subgraph %s stream id: %ld, batch_label: %s, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, batch_label.c_str(), comp_graph->GetDirectNodesSize());
}
if (stream_set.size() > 1 || label_set.size() > 1) {
GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.",
@@ -126,12 +126,14 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
run_context.graphStreamList.size());
return FAILED;
}

run_context.stream = run_context.graphStreamList[stream_id];
std::string batch_label;
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());

for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
GE_CHECK_NOTNULL(*iter);
Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);


+ 1
- 1
ge/graph/build/task_generator.cc View File

@@ -54,7 +54,7 @@ const uint64_t kProfilingFpStartLogid = 1;
const uint64_t kProfilingBpEndLogid = 2;
const uint64_t kProfilingArStartLogid = 3;
const uint64_t kProfilingArEndLogid = 4;
const uint64_t kProfilingIterEndLogid = 255;
const uint64_t kProfilingIterEndLogid = 65535;
const int64_t kHashFactor = 100000;
const int64_t kInvalidGroupId = -1;
} // namespace


+ 67
- 10
ge/graph/load/graph_loader.cc View File

@@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
ModelData &model_data) {
Status ret;
if (!CheckInputPathValid(path)) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return GE_EXEC_MODEL_PATH_INVALID;
}

GELOGI("Load model begin, model path is: %s", path.c_str());
if (!key_path.empty() && !CheckInputPathValid(key_path)) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return ACL_ERROR_GE_PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return GE_EXEC_MODEL_KEY_PATH_INVALID;
}

ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
@@ -144,6 +144,63 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
return SUCCESS;
}

Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) {
Status ret;
ModelData model_data;
ret = LoadDataFromFile(path, key_path, priority, model_data);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
return ret;
}

ret = LoadModel(model_data, listener, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModel: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
}

if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}

return ret;
}

Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id) {
GELOGI("Load model begin, model_id:%u.", model_id);

// For GeOp, Open Device 0 here.
GE_CHK_RT_RET(rtSetDevice(0));
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(model_id, model_data, listener);
if (ret != SUCCESS) {
GE_CHK_RT(rtDeviceReset(0));
GELOGE(ret, "LoadModel: Load failed.");
return ret;
}
ret = model_manager->Start(model_id);
if (ret != SUCCESS) {
if (model_manager->Unload(model_id) != SUCCESS) {
GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start.");
}
GELOGE(ret, "LoadModel: Start failed.");
return ret;
}
GELOGI("LoadModel: Start model success, model_id:%u.", model_id);
return SUCCESS;
}

Status GraphLoader::CommandHandle(const Command &command) {
try {
auto model_manager = ModelManager::GetInstance();
@@ -168,16 +225,16 @@ Status GraphLoader::CommandHandle(const Command &command) {
}

Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr,
size_t mem_size, void *weight_ptr, size_t weight_size) {
size_t memsize, void *weight_ptr, size_t weightsize) {
GELOGI("Load model begin, model_id:%u.", model_id);
// For ACL, Open Device from App.
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(
model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size);
model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
GELOGE(ret, "Load model failed, model_id:%u.", model_id);
return ret;
}
GELOGI("Load model success, model_id:%u.", model_id);
return SUCCESS;
@@ -202,8 +259,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id);
return ret;
}

GELOGI("Load model with queue success, model_id:%u.", model_id);


+ 6
- 0
ge/graph/load/graph_loader.h View File

@@ -44,6 +44,12 @@ class GraphLoader {

static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size);

static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id);

static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id);

static Status CommandHandle(const Command &command);

static Status GetMemoryInfo(int64_t &free);


+ 0
- 6
ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -319,9 +319,6 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -479,9 +476,6 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);


+ 146
- 138
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
if (weight_ptr == nullptr) {
weights_mem_base_ = MallocWeightsMem(weights_size);
if (weights_mem_base_ == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
}
is_inner_weight_base_ = true;
}
@@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh

Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (is_feature_map_mem_has_inited_) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once .");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(FAILED, "call InitFeatureMapMem more than once .");
return FAILED;
}
is_feature_map_mem_has_inited_ = true;

@@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;

if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return FAILED;
}

mem_base_ = static_cast<uint8_t *>(dev_ptr);
@@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (TotalMemSize() && mem_base_ == nullptr) {
mem_base_ = MallocFeatureMapMem(data_size);
if (mem_base_ == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
}
GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
runtime_param_.graph_id, mem_base_, data_size);
@@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (p2p_data_size != 0) {
p2p_mem_base_ = MallocP2PMem(p2p_data_size);
if (p2p_mem_base_ == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return GE_EXEC_ALLOC_P2P_MEM_FAILED;
}
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
p2p_mem_base_, p2p_data_size);
@@ -485,6 +485,8 @@ Status DavinciModel::DoTaskSink() {

GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");

GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");

GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");

GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
@@ -710,7 +712,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
}

// collect profiling for ge
GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed");
auto &profiling_manager = ProfilingManager::Instance();
if (profiling_manager.ProfilingModelLoadOn()) {
Status p_ret = ReportProfilingData();
@@ -2087,61 +2088,12 @@ Status DavinciModel::SyncVarData() {
return ret;
}

Status DavinciModel::InitModelProfile() {
for (const auto &task : task_list_) {
GE_CHECK_NOTNULL(task);
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
// when type is RT_MODEL_TASK_KERNEL, ctx is not null
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
continue;
}

GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size());
op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID()));
}

std::set<uint32_t> task_id_set;
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (const auto &task : task_list_) {
GE_CHECK_NOTNULL(task);
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
continue;
}

if (task_id_set.count(task->GetTaskID()) > 0) {
continue;
}

const auto &op_desc = GetOpByIndex(fusion_op_info->op_index);
GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index);

ProfileInfo profile;
profile.fusion_info = *fusion_op_info;
Range range = op_id_map_.equal_range(fusion_op_info->op_index);
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
profile.task_count++;
task_id_set.insert(range_idx->second);
}

// memory info
TaskMemInfo &mem_info = profile.memory_info;
const auto input_size = ModelUtils::GetInputSize(op_desc);
const auto output_size = ModelUtils::GetOutputSize(op_desc);
const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc);
const auto weight_size = ModelUtils::GetWeightSize(op_desc);
mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0);
mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0);
mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0);
mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0);
mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size;

profile_list_.emplace_back(profile);
inline int64_t SumSize(const vector<int64_t> &size_list) {
int64_t sum_size = 0;
for (const int64_t &size : size_list) {
sum_size += size;
}

GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size());
return SUCCESS;
return sum_size;
}

Status DavinciModel::SinkModelProfile() {
@@ -2149,12 +2101,18 @@ Status DavinciModel::SinkModelProfile() {
auto &prof_mgr = ProfilingManager::Instance();
ReporterData reporter_data{};
// report model data tag name
std::string tag_name("model_load_info_" + std::to_string(this->Id()));
std::string tag_name;
tag_name.append("model_load_info_").append(std::to_string(this->Id()));
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");

// Model Header
std::string name = om_name_.empty() ? name_ : om_name_;
string name;
if (!om_name_.empty()) {
name = om_name_;
} else {
name = name_;
}
size_t name_len = name.size();
reporter_data.deviceId = device_id_;
reporter_data.data = (unsigned char *)&name_len;
@@ -2186,71 +2144,128 @@ Status DavinciModel::SinkModelProfile() {
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

int32_t task_num = task_list_.size();
std::multimap<uint32_t, uint32_t> op_id_map;
std::set<uint32_t> task_id_set;
for (int32_t i = 0; i < task_num; i++) {
auto task = task_list_[i];
GE_CHECK_NOTNULL(task);
auto fusion_op_info = task->GetFusionOpInfo();
// when type is RT_MODEL_TASK_KERNEL, ctx is not null
if (fusion_op_info != nullptr) {
uint32_t op_num = fusion_op_info->original_op_names.size();
uint32_t task_id = task->GetTaskID();
if (op_num > 0) {
GELOGI("task.id = %u, opNum = %u", task_id, op_num);
op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id));
}
}
}

struct memoryInfo {
int64_t input_size;
int64_t output_size;
int64_t weight_size;
int64_t workspace_size;
int64_t total_size;

memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {}
};

using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (const ProfileInfo &profile : profile_list_) {
// op name after fusion
string fusion_op_name = profile.fusion_info.op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
uint32_t op_num = profile.fusion_info.original_op_names.size();
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = profile.fusion_info.original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
for (int32_t i = 0; i < task_num; i++) {
auto task = task_list_[i];
GE_CHECK_NOTNULL(task);
auto fusion_op_info = task->GetFusionOpInfo();
if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) {
uint32_t task_id = task->GetTaskID();
uint32_t op_num = fusion_op_info->original_op_names.size();
uint32_t task_count = 0;
if (task_id_set.count(task_id) != 0) {
continue;
}

uint32_t op_id = fusion_op_info->op_index;
Range range = op_id_map.equal_range(op_id);
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
task_count++;
uint32_t task_id = range_idx->second;
task_id_set.insert(task_id);
}

// op name after fusion
string fusion_op_name = fusion_op_info->op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// stream id info
uint32_t streamId = profile.fusion_info.stream_id;
reporter_data.data = (unsigned char *)&streamId;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
reporter_data.data = (unsigned char *)&profile.memory_info;
reporter_data.dataLen = sizeof(profile.memory_info);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// task info
reporter_data.data = (unsigned char *)&profile.task_count;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = fusion_op_info->original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// stream id info
uint32_t streamId = task->GetStreamId();
reporter_data.data = (unsigned char *)&streamId;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
struct memoryInfo memory_info;
uint32_t op_index = fusion_op_info->op_index;
auto iter = op_list_.find(op_index);
GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index);
auto op_desc = iter->second;
memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc));
memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc));
memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc));
memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc));
memory_info.total_size =
memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size;
reporter_data.data = (unsigned char *)&memory_info;
reporter_data.dataLen = sizeof(struct memoryInfo);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// task info
reporter_data.data = (unsigned char *)&task_count;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Range task_range = op_id_map.equal_range(op_id);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}
}
}

return SUCCESS;
}

@@ -2824,19 +2839,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
return SUCCESS;
}

Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
for (size_t i = 0; i < total_io_addrs.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
Status DavinciModel::UpdateKnownZeroCopyAddr() {
for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
if (it_in != knonw_input_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_input_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_input_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
}
auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
if (it_out != knonw_output_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_output_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_output_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
}
}
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
@@ -2865,7 +2880,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
} else {
total_io_addrs_ = orig_total_io_addrs_;
}
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

if (total_args_size_ == 0) {
GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -2932,14 +2947,7 @@ Status DavinciModel::MallocKnownArgs() {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
// malloc dynamic and static hybrid memory
if (total_hybrid_args_size_ != 0) {
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

// malloc fixed addr memory, eg: rts op
if (total_fixed_addr_size_ != 0) {
GELOGI("Begin to allocate fixed addr.");


+ 7
- 33
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -76,20 +76,6 @@ struct timeInfo {
int64_t dumpEndTime;
};

struct TaskMemInfo {
int64_t input_size{0};
int64_t output_size{0};
int64_t weight_size{0};
int64_t workspace_size{0};
int64_t total_size{0};
};

struct ProfileInfo {
FusionOpInfo fusion_info;
TaskMemInfo memory_info;
uint32_t task_count{0};
};

enum ExecuteMode {
INITIALIZATION,
SYNCHRONIZATION,
@@ -240,6 +226,8 @@ class DavinciModel {
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }

// get Op
const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; }

OpDescPtr GetOpByIndex(uint32_t index) const {
if (op_list_.find(index) == op_list_.end()) {
return nullptr;
@@ -448,6 +436,10 @@ class DavinciModel {

int64_t GetLoadEndTime() { return load_end_time_; }

Status SinkModelProfile();

Status SinkTimeProfile(const InputData &current_data);

Status ReportProfilingData();

void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
@@ -484,14 +476,6 @@ class DavinciModel {
void SetTotalIOAddrs(vector<void *> &io_addrs) {
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
}
void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
uint32_t GetHybridArgsSize() {
return total_hybrid_args_size_;
}
void *GetCurrentHybridArgsAddr(uint32_t offset) {
void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
return cur_args;
}
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
int64_t GetFixedAddrsSize(string tensor_name);
void *GetCurrentFixedAddr(int64_t offset) const {
@@ -510,7 +494,7 @@ class DavinciModel {
Status MallocKnownArgs();
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
Status UpdateKnownZeroCopyAddr();
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@@ -812,11 +796,6 @@ class DavinciModel {

void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);

Status InitModelProfile();
Status SinkModelProfile();

Status SinkTimeProfile(const InputData &current_data);

Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs);

@@ -952,8 +931,6 @@ class DavinciModel {
void *args_ = nullptr;
void *args_host_ = nullptr;
void *fixed_addrs_ = nullptr;
void *hybrid_addrs_ = nullptr;
uint32_t total_hybrid_args_size_ = 0;
int64_t total_fixed_addr_size_ = 0;
std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_;
@@ -993,9 +970,6 @@ class DavinciModel {
// key: input_index: input is merge node; value: each gear info and each output shape
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
std::vector<std::vector<int64_t>> all_gears_info_;

std::multimap<uint32_t, uint32_t> op_id_map_;
std::vector<ProfileInfo> profile_list_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

+ 234
- 40
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -18,6 +18,7 @@

#include <string>

#include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
#include "common/dump/dump_manager.h"
#include "common/l2_cache_optimize.h"
#include "common/profiling/profiling_manager.h"
@@ -30,6 +31,7 @@
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "model/ge_root_model.h"
#include "graph/common/local_context.h"
#include "graph/utils/attr_utils.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "hybrid/hybrid_davinci_model.h"

@@ -52,6 +54,7 @@ const char *const kDeleteCustOp = "deleteCustOp";
const int kTimeSpecNano = 1000000000;
const int kTimeSpecMiro = 1000000;
const int kSessionMaxBias = 100;
const int kOpNameMaxSize = 100;
struct CustAicpuSoBuf {
uint64_t kernelSoBuf;
uint32_t kernelSoBufLen;
@@ -89,7 +92,6 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto iter = model_aicpu_kernel_.find(model_key);
if (iter != model_aicpu_kernel_.end()) {
GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
@@ -177,7 +179,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
}

void ModelManager::DestroyAicpuSession(uint64_t session_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
auto it = sess_ids_.find(session_id);
if (it == sess_ids_.end()) {
GELOGI("The session: %lu not created.", session_id);
@@ -206,7 +208,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
}

ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
if (hybrid_davinci_model != hybrid_model_map_.end()) {
uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
@@ -216,8 +218,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

auto it = model_map_.find(model_id);
if (it == model_map_.end()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return GE_EXEC_MODEL_ID_INVALID;
}
uint64_t session_id = it->second->GetSessionId();
DestroyAicpuSession(session_id);
@@ -226,7 +228,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
@@ -239,7 +241,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
}

ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
@@ -251,7 +253,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i
}

ModelManager::~ModelManager() {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
model_map_.clear();
model_aicpu_kernel_.clear();
cust_aicpu_so_.clear();
@@ -359,18 +361,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
model_map_[id] = davinci_model;
}

void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
hybrid_model_map_[id] = hybrid_model;
}

Status ModelManager::DeleteModel(uint32_t id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);

auto it = model_map_.find(id);
auto hybrid_model_it = hybrid_model_map_.find(id);
@@ -385,22 +387,22 @@ Status ModelManager::DeleteModel(uint32_t id) {
} else if (hybrid_model_it != hybrid_model_map_.end()) {
(void)hybrid_model_map_.erase(hybrid_model_it);
} else {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return GE_EXEC_MODEL_ID_INVALID;
}

return SUCCESS;
}

std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);

auto it = model_map_.find(id);
return (it == model_map_.end()) ? nullptr : it->second;
}

std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);

auto it = hybrid_model_map_.find(id);
return (it == hybrid_model_map_.end()) ? nullptr : it->second;
@@ -903,7 +905,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);

davinci_model->SetModelDescVersion(new_model_desc);
@@ -971,9 +973,8 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
}

Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
auto davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetCurShape Failed, Invalid Model ID %u!", model_id);
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetCurShape(batch_info, dynamic_type);
return SUCCESS;
}
@@ -986,8 +987,7 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetModelAttr Failed, Invalid Model ID %u!", model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetModelAttr(dynamic_output_shape_info);
return SUCCESS;
}
@@ -997,8 +997,9 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
std::vector<uint32_t> &inputFormats,
std::vector<uint32_t> &outputFormats) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
model_id);

return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
}

@@ -1013,14 +1014,18 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.", model_id);
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);

return davinci_model->GetAIPPInfo(index, aipp_info);
}

Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.", model_id);
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);

return davinci_model->GetAippType(index, type, aipp_index);
}

@@ -1053,15 +1058,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
mmTimespec timespec = mmGetTickCount();

ModelHelper model_helper;
Status ret = model_helper.LoadRootModel(model);
if (model_helper.GetModelType()) {
bool is_shape_unknown = false;
GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
"CheckIsUnknownShape failed, model id:%u", model_id);
if (is_shape_unknown || GetContext().GetHostExecFlag()) {
return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
}
}
Status ret = model_helper.LoadModel(model);
if (ret != SUCCESS) {
GELOGE(ret, "load model failed.");
return ret;
@@ -1075,8 +1072,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
} catch (...) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
return INTERNAL_ERROR;
}
ret = davinci_model->Assign(ge_model);
if (ret != SUCCESS) {
@@ -1088,7 +1085,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
davinci_model->SetDeviceId(device_id);
@@ -1220,7 +1217,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"Invalid model id %u, check whether model has been loaded or not.", model_id);
"Invalid model id %u, check weather model has been loaded or not.", model_id);

if (davinci_model->NeedDestroyAicpuKernel()) {
GELOGI("Start to destroy specified aicpu kernel.");
@@ -1243,7 +1240,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
}

Status ModelManager::CreateAicpuSession(uint64_t session_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
auto it = sess_ids_.find(session_id);
// never been created by any model
if (it == sess_ids_.end()) {
@@ -1462,7 +1459,8 @@ void ModelManager::GenModelId(uint32_t *id) {
if (id == nullptr) {
return;
}
std::lock_guard<std::recursive_mutex> lock(map_mutex_);

std::lock_guard<std::mutex> lock(map_mutex_);
*id = ++max_model_id_;
}

@@ -1534,4 +1532,200 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
return SUCCESS;
}

Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
std::vector<std::string> &aicpu_tf_optype_list) {
std::string kernel_name = "checkOpType";
GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str());
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
std::vector<SysOpInfo> req_aicpu_op_info_list;
std::vector<SysOpInfo> res_aicpu_op_info_list;
std::vector<ReturnCode> res_ret_code_list;

if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) {
GELOGI("No need to check aicpu op type.");
return SUCCESS;
}

vector<void *> allocated_mem;
rtError_t status;
rtStream_t stream = nullptr;
void *args = nullptr;

void *d_req_op_list = nullptr;
void *d_res_op_list = nullptr;
void *d_ret_code_list = nullptr;

size_t aicpu_op_nums = aicpu_optype_list.size();
size_t tf_op_nums = aicpu_tf_optype_list.size();
size_t op_nums = aicpu_op_nums + tf_op_nums;
// malloc sysOpInfoList in SysOpCheckInfo
status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_req_op_list);

// malloc sysOpInfoList in SysOpCheckResp
status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_res_op_list);

// malloc returnCodeList in SysOpCheckResp
status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_ret_code_list);

for (const auto &op_type : aicpu_optype_list) {
SysOpInfo op_info;
// malloc op_type name in SysOpInfo
void *d_op_type_name = nullptr;
status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_op_type_name);
GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE));
op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
op_info.opLen = op_type.length();
op_info.kernelsType = CPU_KERNEL;
req_aicpu_op_info_list.emplace_back(op_info);
}

for (const auto &op_type : aicpu_tf_optype_list) {
SysOpInfo op_info;
// malloc op_type name in SysOpInfo
void *d_op_type_name = nullptr;
status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_op_type_name);
GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE));
op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
op_info.opLen = op_type.size();
op_info.kernelsType = TF_KERNEL;
req_aicpu_op_info_list.emplace_back(op_info);
}
GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size());
GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),
sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));

SysOpCheckInfo op_check_info_req = { 0 };
SysOpCheckResp op_check_info_res = { 0 };
op_check_info_req.opListNum = op_nums;
op_check_info_req.offSetLen = sizeof(SysOpCheckInfo);
op_check_info_req.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list));

op_check_info_res.opListNum = 0;
op_check_info_res.isWithoutJson = 0;
op_check_info_res.returnCodeList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list));
op_check_info_res.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list));

uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp);
status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(args);
GE_CHK_RT(
rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)),
sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtStreamCreate(&stream, 0));
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));

status = rtStreamSynchronize(stream);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}

// Check the response
SysOpCheckResp *d_op_check_info_res = reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
(void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp));
GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp),
RT_MEMCPY_DEVICE_TO_HOST));
std::function<void()> callback = [&]() {
for (auto mem : allocated_mem) {
GE_CHK_RT(rtFree(mem));
}
GE_CHK_RT(rtStreamDestroy(stream));
};

if (op_check_info_res.isWithoutJson) {
GELOGI("No need to check aicpu in this scenoria.");
GE_MAKE_GUARD(release, callback);
return SUCCESS;
}
uint64_t res_op_nums = op_check_info_res.opListNum;
GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums);
if (res_op_nums != 0) {
res_ret_code_list.clear();
res_ret_code_list.resize(res_op_nums);
res_aicpu_op_info_list.clear();
res_aicpu_op_info_list.resize(res_op_nums);
GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums,
reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.returnCodeList)),
sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums,
reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),
sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
GE_MAKE_GUARD(release, callback);
return FAILED;
}
std::string fail_reason;
for (uint32_t i = 0; i < res_op_nums; i++) {
ReturnCode ret_code = res_ret_code_list.at(i);
SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType,
aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
std::vector<char> op_name;
op_name.clear();
op_name.resize(kOpNameMaxSize);
GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType),
aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
std::string kernel_type =
(static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
string op_name_str(op_name.data());
fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type +
" ret code:" + std::to_string(static_cast<int>(ret_code)) +
"<0: op_type, 1: format, 2: datatype> \n";
}
fail_reason += "not support.";
GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
GE_MAKE_GUARD(release, callback);
return FAILED;
}

GE_MAKE_GUARD(release, callback);
GELOGI("Cpu kernel launch check optype task success.");
return SUCCESS;
}

Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
if (!aicpu_need_check && !tf_need_check) {
GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str());
return SUCCESS;
}
GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
"Launch check aicpu op type failed.");
return SUCCESS;
}

} // namespace ge

+ 7
- 1
ge/graph/load/new_model_manager/model_manager.h View File

@@ -295,6 +295,11 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

ge::Status LaunchKernelCustAicpuSo(const string &kernel_name);

ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
std::vector<std::string> &aicpu_tf_optype_list);

ge::Status CheckAicpuOpList(GeModelPtr ge_model);

ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);

ge::Status GenSessionId(uint64_t &session_id);
@@ -353,7 +358,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_;
uint32_t max_model_id_;
std::recursive_mutex map_mutex_;
std::mutex map_mutex_;
std::mutex sess_ids_mutex_;
std::mutex session_id_create_mutex_;
static::std::mutex exeception_infos_mutex_;
uint64_t session_id_bias_;


+ 74
- 58
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -90,18 +90,20 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names;
fusion_op_info_.op_name = op_desc_->GetName());

string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
// get bin_file_key
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
// new aicpu kernel(rtCpuKernelLaunch) no need to check function
if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
kernel_def.stub_func().c_str());
return RT_ERROR_TO_GE_STATUS(rt_ret););
} else if (kernel_type_ == ccKernelType::TE) {
// get bin_file_key
string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
return RT_ERROR_TO_GE_STATUS(rt_ret););
@@ -370,11 +372,7 @@ Status KernelTaskInfo::SuperKernelDistribute() {
Status KernelTaskInfo::Distribute() {
GELOGD("KernelTaskInfo Distribute Start.");
if (davinci_model_->IsKnownNode()) {
if (kernel_type_ == ccKernelType::TE) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
}
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
}
rtError_t rt_ret = RT_ERROR_NONE;
@@ -430,31 +428,36 @@ Status KernelTaskInfo::UpdateArgs() {
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);

vector<void *> io_addrs;
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
if (kernel_type_ == ccKernelType::TE) {
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
davinci_model_->SetTotalIOAddrs(io_addrs);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
auto addrs_size = sizeof(uint64_t) * io_addrs.size();
errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}
// copy args to device
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
} else {
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > output_data_addrs.size()) {
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
output_data_addrs.size(), output_index);
return FAILED;
}
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
for (size_t i = 0; i < output_data_addrs.size(); ++i) {
if (i == output_index) {
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
io_addrs.emplace_back(fixed_addr);
continue;
}
io_addrs.emplace_back(output_data_addrs[i]);
}
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
}
}

davinci_model_->SetTotalIOAddrs(io_addrs);
GELOGI("KernelTaskInfo::UpdateArgs success.");
return SUCCESS;
}
@@ -530,18 +533,33 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
}

Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
const domi::KernelDef &kernel_def = task_def.kernel();
domi::KernelDef kernel_def = task_def.kernel();
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);

// get opcontext stored in model
const domi::KernelContext &context = kernel_def.context();
kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type_ == ccKernelType::TE) {
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
davinci_model->SetHybridArgsSize(kernel_def.args_size());
GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
// get opdesc
op_desc_ = davinci_model->GetOpByIndex(context.op_index());
GE_CHECK_NOTNULL(op_desc_);
// alloc fixed addr
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > op_desc_->GetOutputsSize()) {
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
output_index);
return FAILED;
}
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
auto tensor_desc = op_desc_->GetOutputDesc(output_index);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
fixed_addr_offset_);
}
return SUCCESS;
}
@@ -870,7 +888,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}

// copy args to new host memory
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
if (sec_ret != EOK) {
@@ -878,23 +896,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return FAILED;
}

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

if (davinci_model_->IsKnownNode()) {
return SUCCESS;
}
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();

vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
vector<void *> io_addrs;
@@ -911,6 +914,19 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}
}

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

// malloc device memory for args
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {


+ 0
- 2
ge/graph/load/new_model_manager/task_info/kernel_task_info.h View File

@@ -159,9 +159,7 @@ class KernelTaskInfo : public TaskInfo {
OpDescPtr op_desc_;
DavinciModel *davinci_model_;
uint32_t args_offset_ = 0;
uint32_t hybrid_args_offset_ = 0;
int64_t fixed_addr_offset_ = 0;
std::unique_ptr<uint8_t[]> args_addr = nullptr;
bool call_save_dump_ = false;

// aicpu ext_info device mem


+ 28
- 2
ge/graph/manager/graph_manager.cc View File

@@ -23,15 +23,25 @@
#include <sstream>
#include <string>
#include <thread>
#include <utility>

#include "common/ge/ge_util.h"
#include "common/math/math_util.h"
#include "common/thread_pool.h"
#include "common/util.h"
#include "external/graph/types.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "framework/common/ge_types.h"
#include "analyzer/analyzer.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/common/local_context.h"
#include "graph/common/transop_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/ge_global_options.h"
#include "graph/ge_local_context.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/util/rt_context_util.h"
#include "graph/partition/dynamic_shape_partition.h"
#include "graph/passes/enter_pass.h"
@@ -51,6 +61,8 @@
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/input_output_connection_identify_pass.h"
#include "graph/passes/iterator_op_pass.h"
@@ -65,6 +77,8 @@
#include "graph/passes/permute_pass.h"
#include "graph/passes/prune_pass.h"
#include "graph/passes/ref_identity_delete_op_pass.h"
#include "graph/passes/replace_with_empty_const_pass.h"
#include "graph/passes/remove_same_const_pass.h"
#include "graph/passes/reshape_recovery_pass.h"
#include "graph/passes/reshape_remove_pass.h"
#include "graph/passes/same_transdata_breadth_fusion_pass.h"
@@ -74,11 +88,14 @@
#include "graph/passes/switch_logic_remove_pass.h"
#include "graph/passes/switch_to_stream_switch_pass.h"
#include "graph/passes/transop_breadth_fusion_pass.h"
#include "graph/passes/transop_depth_fusion_pass.h"
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h"
#include "graph/passes/transop_symmetry_elimination_pass.h"
#include "graph/passes/transop_without_reshape_fusion_pass.h"
#include "graph/passes/transpose_transdata_pass.h"
#include "graph/passes/useless_control_out_remove_pass.h"
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
#include "graph/passes/end_of_sequence_add_control_pass.h"
@@ -89,6 +106,9 @@
#include "graph/passes/memcpy_addr_async_pass.h"
#include "graph/build/label_allocator.h"
#include "graph/utils/tensor_adapter.h"
#include "graph/utils/type_utils.h"
#include "graph/graph_util.h"
#include "graph/types.h"
#include "inc/pass_manager.h"
#include "init/gelib.h"
#include "ir_build/atc_ir_common.h"
@@ -532,7 +552,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@@ -547,7 +568,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@@ -2130,6 +2152,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
TransposeTransDataPass transpose_transdata_pass;
TransOpSymmetryEliminationPass symmetry_elimination_pass;
DimensionComputePass dimension_compute_pass;
UselessControlOutRemovePass useless_control_out_remove_pass;
names_to_passes.emplace_back("EnterPass", &enter_pass);
names_to_passes.emplace_back("AddNPass", &addn_pass);
names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination);
@@ -2143,6 +2166,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
names_to_passes.emplace_back("DimensionComputePass", &dimension_compute_pass);
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
names_to_passes.emplace_back("DimensionAdjustPass", &dimension_adjust_pass);
names_to_passes.emplace_back("UselessControlOutRemovePass", &useless_control_out_remove_pass);
GE_TIMESTAMP_START(names_to_passes);
ret = GEPass(compute_graph).Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "GraphManager::OptimizeStage1_2");
@@ -2183,6 +2207,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass",
new (std::nothrow) VariableRefUselessControlOutDeletePass))
GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ReshapeRecoveryPass", new (std::nothrow) ReshapeRecoveryPass))
GE_CHK_STATUS_RET(
graph_pass.AddPass("OptimizeStage1_3::RemoveSameConstPass", new (std::nothrow) RemoveSameConstPass))
if (options_.train_graph_flag) {
// Priority: The GlobalStepInsertPass should work before graph partitioner.
// Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory


+ 3
- 0
ge/graph/manager/graph_mem_allocator.cc View File

@@ -16,7 +16,10 @@

#include "graph/manager/graph_mem_allocator.h"

#include <set>
#include <string>

#include "framework/common/debug/ge_log.h"
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"



+ 1
- 2
ge/graph/optimize/graph_optimize.cc View File

@@ -76,8 +76,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) {
}
}

Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph,
const std::string &engine_name) {
Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name) {
if (compute_graph == nullptr) {
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr.");
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;


+ 5
- 23
ge/graph/passes/attach_stream_label_pass.cc View File

@@ -18,6 +18,8 @@
#include "ge/ge_api_types.h"
#include "graph/common/omg_util.h"

using std::string;

namespace ge {
Status AttachStreamLabelPass::Run(ComputeGraphPtr graph) {
GELOGD("AttachStreamLabelPass Enter.");
@@ -187,21 +189,10 @@ Status AttachStreamLabelPass::UpdateEnterNode() {
}

std::stack<NodePtr> enter_nodes;
std::string batch_label;
for (const auto &enter_node : pair.second) {
enter_nodes.emplace(enter_node);
std::string tmp_label;
(void)AttrUtils::GetStr(enter_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label);
if (!tmp_label.empty()) {
if (batch_label.empty()) {
batch_label = tmp_label;
} else if (batch_label != tmp_label) {
GELOGE(FAILED, "multi batch_label exist, label1=%s, label2=%s.", batch_label.c_str(), tmp_label.c_str());
return FAILED;
}
}
}
if (UpdateLoopBranch(enter_nodes, active_label_list[0], batch_label) != SUCCESS) {
if (UpdateLoopBranch(enter_nodes, active_label_list[0]) != SUCCESS) {
GELOGE(FAILED, "Update stream_label for loop_branch failed.");
return FAILED;
}
@@ -226,10 +217,7 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector<NodePtr> &enter_no
}

for (const auto &enter_node : enter_nodes) {
GE_CHECK_NOTNULL(enter_node->GetOpDesc());
if (enter_node->GetOpDesc()->HasAttr(ATTR_NAME_STREAM_LABEL)) {
GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed.");
}
GE_CHK_STATUS_RET(SetStreamLabel(enter_node, stream_label), "Set stream label failed.");
}
return SUCCESS;
}
@@ -241,8 +229,7 @@ Status AttachStreamLabelPass::SetEnterLabel(const std::vector<NodePtr> &enter_no
/// @param [in] batch_label
/// @return Status
///
Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes, const std::string &stream_label,
const std::string &batch_label) {
Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes, const string &stream_label) {
std::stack<NodePtr> nodes(enter_nodes);
NodePtr cur_node = nullptr;
while (!nodes.empty()) {
@@ -251,11 +238,6 @@ Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack<NodePtr> &enter_
for (const NodePtr &out_node : cur_node->GetOutAllNodes()) {
OpDescPtr out_desc = out_node->GetOpDesc();
GE_CHECK_NOTNULL(out_desc);
std::string tmp_label;
(void)AttrUtils::GetStr(out_desc, ATTR_NAME_BATCH_LABEL, tmp_label);
if (!tmp_label.empty() && (tmp_label != batch_label)) {
continue;
}
std::string out_type = out_desc->GetType();
bool need_skip =
out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) ||


+ 1
- 3
ge/graph/passes/attach_stream_label_pass.h View File

@@ -58,11 +58,9 @@ class AttachStreamLabelPass : public GraphPass {
/// @brief Update stream_label for loop_branch
/// @param [in] enter_nodes
/// @param [in] stream_label
/// @param [in] batch_label
/// @return Status
///
static Status UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes, const std::string &stream_label,
const std::string &batch_label);
static Status UpdateLoopBranch(const std::stack<NodePtr> &enter_nodes, const std::string &stream_label);

///
/// @brief Update stream_label start with enter nodes


+ 1
- 1
ge/graph/passes/base_pass.cc View File

@@ -96,7 +96,7 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder
node->GetName().c_str(), node->GetType().c_str());
continue;
}
if (node_to_re_pass->IsAllInNodesSeen(nodes_seen)) {
if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) {
GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str());
nodes_re_pass.insert(node_to_re_pass);
} else {


+ 64
- 0
ge/graph/passes/dimension_adjust_pass.cc View File

@@ -80,7 +80,71 @@ Status DimensionAdjustPass::Run(ge::NodePtr &node) {
}
}

ret = DealWithInNodes(node);
if (ret != SUCCESS) {
GELOGE(ret, "DealWithInNodes of %s failed.", node->GetName().c_str());
return ret;
}

std::vector<int> data_relink_io_map = {kDataInputIndex};
return IsolateAndDeleteNode(node, data_relink_io_map);
}

Status DimensionAdjustPass::DealWithInNodes(NodePtr &node) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
auto graph = node->GetOwnerComputeGraph();
auto in_data_anchors = node->GetAllInDataAnchors();
for (auto &in_data_anchor : in_data_anchors) {
if (in_data_anchor == nullptr) {
continue;
}
auto in_node_anchor = in_data_anchor->GetPeerOutAnchor();
if (in_node_anchor == nullptr) {
continue;
}
auto in_node = in_node_anchor->GetOwnerNode();
if (in_node->GetType() == SWITCHN) {
auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx());
auto identity =
AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph);
GE_CHECK_NOTNULL(identity);
GELOGI("Create new identity node[%s] after node %s[type: %s] success.", identity->GetName().c_str(),
in_node->GetName().c_str(), in_node->GetType().c_str());
GE_CHK_STATUS_RET(GraphUtils::AddEdge(in_node_anchor, identity->GetInDataAnchor(0)))
GE_CHECK_NOTNULL(identity->GetOutControlAnchor());
if (identity->GetOutControlAnchor()->IsLinkedWith(node->GetInControlAnchor())) {
continue;
}
GE_CHK_STATUS_RET(GraphUtils::AddEdge(identity->GetOutControlAnchor(), node->GetInControlAnchor()))
}
}

return SUCCESS;
}

NodePtr DimensionAdjustPass::AddIdentityNodeToGraph(const string &name, const GeTensorDesc &tensor,
ComputeGraphPtr &graph) {
if (graph == nullptr) {
GELOGE(INTERNAL_ERROR, "Comput graph ptr is null in creating identity node.");
return nullptr;
}

OpDescPtr desc = MakeShared<OpDesc>("", "");
if (desc == nullptr) {
GELOGE(MEMALLOC_FAILED, "Failed to create op desc.");
return nullptr;
}

desc->SetName(name);
desc->SetType(IDENTITY);
auto ret = desc->AddInputDesc(tensor);
auto ret2 = desc->AddOutputDesc(tensor);
if ((ret != GRAPH_SUCCESS) || (ret2 != GRAPH_SUCCESS)) {
GELOGE(INTERNAL_ERROR, "Failed to add input/output desc in creating identity.");
return nullptr;
}

return graph->AddNodeFront(desc);
}
} // namespace ge

+ 4
- 0
ge/graph/passes/dimension_adjust_pass.h View File

@@ -34,6 +34,10 @@ namespace ge {
class DimensionAdjustPass : public BaseNodePass {
public:
Status Run(ge::NodePtr &node) override;

private:
Status DealWithInNodes(ge::NodePtr &node);
NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tensor, ComputeGraphPtr &graph);
};
} // namespace ge



+ 4
- 8
ge/graph/passes/dynamic_single_op_reset_shape_pass.cc View File

@@ -113,16 +113,13 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) {
GE_CHECK_NOTNULL(op_desc);
std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim};
GeShape dynamic_shape(dynamic_shape_dims);
bool reset_shape_flag = false;
if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) {
(void)ResetOutputTensorShape(op_desc, dynamic_shape);
}
(void)ResetInputTensorShape(op_desc, dynamic_shape);
(void)ResetOutputTensorShape(op_desc, dynamic_shape);
return SUCCESS;
}

Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape,
bool &reset_shape_flag) {
reset_shape_flag = false;
Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc,
const GeShape &dynamic_shape) {
GE_CHECK_NOTNULL(op_desc);
for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) {
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
@@ -136,7 +133,6 @@ Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc,
if (CheckIfConstInput(input_desc)) {
continue;
}
reset_shape_flag = true;
input_desc->SetShape(dynamic_shape);
}
return SUCCESS;


+ 1
- 1
ge/graph/passes/dynamic_single_op_reset_shape_pass.h View File

@@ -27,7 +27,7 @@ class DynamicSingleOpResetShapePass : public GraphPass {

private:
Status ResetOpShape(OpDescPtr &op_desc);
Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag);
Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu);
bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc);


+ 57
- 7
ge/graph/passes/enter_pass.cc View File

@@ -23,6 +23,7 @@

namespace {
const size_t kOutNodesNum = 1;
const size_t kInCtrlNodesNum = 1;
}

namespace ge {
@@ -55,6 +56,7 @@ Status EnterPass::Run(NodePtr &node) {
if (out_ctrl_node == nullptr) {
continue;
}
GELOGI("Remove control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str());
if (GraphUtils::RemoveEdge(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Remove Enter ctrl output fail, %s->%s", node->GetName().c_str(),
out_ctrl_node->GetName().c_str());
@@ -62,8 +64,12 @@ Status EnterPass::Run(NodePtr &node) {
}
}
} else {
if (OptimizeEnter(node, in_node) != SUCCESS) {
GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str());
if (OptimizeEnterWithOnlyDataOut(node, in_node) != SUCCESS) {
GELOGE(FAILED, "Optimize enter node[%s] with only out data node failed.", node->GetName().c_str());
return FAILED;
}
if (UnlinkCtrlEdgeBeforeConst(node) != SUCCESS) {
GELOGE(FAILED, "Unlink control edge before const of node[%s]'s out nodes failed.", node->GetName().c_str());
return FAILED;
}
}
@@ -72,7 +78,7 @@ Status EnterPass::Run(NodePtr &node) {
return SUCCESS;
}

Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) {
Status EnterPass::OptimizeEnterWithOnlyDataOut(NodePtr &node, NodePtr &in_node) {
if ((in_node->GetOutAllNodes().size() != kOutNodesNum) || !node->GetOutControlNodes().empty()) {
return SUCCESS;
}
@@ -83,17 +89,61 @@ Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) {
}

GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0));
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0)));
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0)))
const auto &out_data_anchor = node->GetOutDataAnchor(0);
GE_CHECK_NOTNULL(out_data_anchor);
for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor));
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor));
GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor))
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor))
}
GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node));
GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(node->GetOwnerComputeGraph(), node))
AddNodeDeleted(node);
AddRePassNodesWithInOut(in_node);

return SUCCESS;
}

Status EnterPass::UnlinkCtrlEdgeBeforeConst(NodePtr &node) {
auto out_ctrl_nodes = node->GetOutControlNodes();
if (out_ctrl_nodes.empty()) {
return SUCCESS;
}
auto out_ctrl_anchor = node->GetOutControlAnchor();
GE_CHECK_NOTNULL(out_ctrl_anchor);

for (auto &out_ctrl_node : out_ctrl_nodes) {
GE_CHECK_NOTNULL(out_ctrl_node);
if ((out_ctrl_node->GetType() != CONSTANT) && (out_ctrl_node->GetType() != CONSTANTOP)) {
continue;
}
auto in_ctrl_nodes = out_ctrl_node->GetInControlNodes();
if (in_ctrl_nodes.size() != kInCtrlNodesNum) {
continue;
}

// Skip when has merge out
bool has_merge_out = false;
auto out_nodes_of_const = out_ctrl_node->GetOutAllNodes();
for (const auto &out_node_of_const : out_nodes_of_const) {
GE_CHECK_NOTNULL(out_node_of_const);
if (out_node_of_const->GetType() == MERGE || out_node_of_const->GetType() == REFMERGE) {
has_merge_out = true;
break;
}
}
if (has_merge_out) {
continue;
}

GELOGI("Unlink control edge from %s to %s.", node->GetName().c_str(), out_ctrl_node->GetName().c_str());
GE_CHK_STATUS_RET(out_ctrl_anchor->Unlink(out_ctrl_node->GetInControlAnchor()))
for (auto &out_node_of_const : out_nodes_of_const) {
if (!out_ctrl_anchor->IsLinkedWith(out_node_of_const->GetInControlAnchor())) {
GELOGI("Link control edge from %s to %s.", node->GetName().c_str(), out_node_of_const->GetName().c_str());
GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(out_node_of_const->GetInControlAnchor()))
}
}
}
return SUCCESS;
}
} // namespace ge

+ 2
- 1
ge/graph/passes/enter_pass.h View File

@@ -25,7 +25,8 @@ class EnterPass : public BaseNodePass {
Status Run(NodePtr &node) override;

private:
Status OptimizeEnter(NodePtr &node, NodePtr &in_node);
Status OptimizeEnterWithOnlyDataOut(NodePtr &node, NodePtr &in_node);
Status UnlinkCtrlEdgeBeforeConst(NodePtr &node);
};
} // namespace ge
#endif // GE_GRAPH_PASSES_ENTER_PASS_H_

+ 1
- 4
ge/graph/passes/folding_pass.cc View File

@@ -173,10 +173,7 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) {
continue;
}
auto in_node = in_node_anchor->GetOwnerNode();
if (in_node == nullptr) {
continue;
}
if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH)) {
if ((in_node->GetType() == SWITCH) || (in_node->GetType() == REFSWITCH) || (in_node->GetType() == SWITCHN)) {
GELOGI("The in_node name is %s, and node type is %s.", in_node->GetName().c_str(), in_node->GetType().c_str());
auto ret = in_node_anchor->Unlink(in_data_anchor);
if (ret != SUCCESS) {


+ 0
- 10
ge/graph/passes/merge_to_stream_merge_pass.cc View File

@@ -89,16 +89,6 @@ Status MergeToStreamMergePass::ReplaceMergeNode(const ComputeGraphPtr &graph, co
GE_CHK_STATUS_RET(SetNextIteration(stream_merge, next_iteration_name), "Set next iteration failed");
}

if (merge_op_desc->HasAttr(ATTR_NAME_BATCH_LABEL)) {
string batch_label;
(void)AttrUtils::GetStr(merge_op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
if (!batch_label.empty()) {
auto stream_merge_desc = stream_merge->GetOpDesc();
GE_CHECK_NOTNULL(stream_merge_desc);
(void)AttrUtils::SetStr(stream_merge_desc, ATTR_NAME_BATCH_LABEL, batch_label);
}
}

return AddActiveNodes(graph, stream_merge);
}



+ 30
- 44
ge/graph/passes/multi_batch_clone_pass.cc View File

@@ -503,12 +503,24 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {

///
/// @ingroup ge
/// @brief Set shape to Data node in branch.
/// @param [in] const NodePtr &data: data in branch.
/// @brief Update Data node in Subgraph.
/// @param [in] const NodePtr &data: data in Subgraph.
/// @param [in] size_t index: The batch index.
/// @return 0: SUCCESS / others: FAILED
///
Status MultiBatchClonePass::UpdateShapeToData(const NodePtr &data, size_t index) {
Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) {
int node_index = -1;
if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) {
GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str());
return FAILED;
}

int parent_index = node_index + 1;
if (!AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED, "Failed to set parent index for node %s", data->GetName().c_str());
return FAILED;
}

auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
const auto &dims = data_shape.GetDims();
if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
@@ -580,13 +592,15 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
subgraph->SetParentGraph(graph);
graph->AddSubgraph(subgraph->GetName(), subgraph);
all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT);
GE_CHK_STATUS_RET(UpdateSubgraphOutput(all_branch_output_[subgraph]),
"Update %s failed", all_branch_output_[subgraph]->GetName().c_str());

const string key_name = "branches" + std::to_string(i);
op_desc->AddSubgraphName(key_name);
op_desc->SetSubgraphInstanceName(i, subgraph->GetName());

for (const auto &data : input_nodes) {
GE_CHK_STATUS_RET(UpdateShapeToData(data, i), "Update %s failed", subgraph->GetName().c_str());
GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str());
}
}

@@ -595,55 +609,27 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
const auto &op_desc = n->GetOpDesc();
op_desc->SetName(n->GetName() + kMultiBatchNodePostfix + "0");
if (n->GetType() == DATA) {
GE_CHK_STATUS_RET(UpdateShapeToData(n, 0), "Update %s failed", branch->GetName().c_str());
GE_CHK_STATUS_RET(UpdateSubgraphData(n, 0), "Update %s failed", branch->GetName().c_str());
}
}

return PostProcSubgraph(graph);
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Assign parent index for branches.
/// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
/// @brief Update output_node in Subgraph.
/// @param [in] const NodePtr &output_node: output_node in Subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status MultiBatchClonePass::PostProcSubgraph(const ComputeGraphPtr &graph) {
auto func_desc = case_node_->GetOpDesc();
domi::ParseSubgraphFuncV2 parse_func_v2 = nullptr;
auto post_func = domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType());
if (post_func == nullptr) {
GELOGW("The subgraph post func for node %s type %s is null.", case_node_->GetName().c_str(),
case_node_->GetType().c_str());
if (domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType(), parse_func_v2) != SUCCESS ||
parse_func_v2 == nullptr) {
GELOGW("The subgraph new post func v2 for node %s type %s is null", case_node_->GetName().c_str(),
case_node_->GetType().c_str());
return FAILED;
}
}

for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
const auto &subgraph = graph->GetSubgraph(name);
if (subgraph == nullptr) {
GELOGE(FAILED, "Subgraph not found, name: %s", name.c_str());
return FAILED;
}

std::string subgraph_name;
GE_CHK_STATUS_RET(func_desc->GetSubgraphNameByInstanceName(subgraph->GetName(), subgraph_name),
"Subgraph: %s get subgraph name failed.", subgraph->GetName().c_str());

auto graph = GraphUtils::CreateGraphFromComputeGraph(subgraph);
Status ret = FAILED;
if (post_func != nullptr) {
ret = post_func(subgraph_name, graph);
} else if (parse_func_v2 != nullptr) {
ret = parse_func_v2(subgraph_name.c_str(), graph);
}
if (ret != SUCCESS) {
GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", graph.GetName().c_str(),
case_node_->GetName().c_str(), case_node_->GetType().c_str());
Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) {
const auto &op_desc = output_node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
for (size_t index = 0; index < op_desc->GetInputsSize(); ++index) {
GeTensorDescPtr tensor = op_desc->MutableInputDesc(index);
GE_CHECK_NOTNULL(tensor);
if (!AttrUtils::SetInt(tensor, ATTR_NAME_PARENT_NODE_INDEX, index)) {
GELOGE(FAILED, "Failed to set parent index for node %s", output_node->GetName().c_str());
return FAILED;
}
}


+ 11
- 11
ge/graph/passes/multi_batch_clone_pass.h View File

@@ -105,12 +105,20 @@ class MultiBatchClonePass : public GraphPass {

///
/// @ingroup ge
/// @brief Set shape to Data node in branch.
/// @param [in] const NodePtr &data: data in branch.
/// @brief Update Data node in Subgraph.
/// @param [in] const NodePtr &data: data in Subgraph.
/// @param [in] size_t index: The batch index.
/// @return 0: SUCCESS / others: FAILED
///
Status UpdateShapeToData(const NodePtr &data, size_t index);
Status UpdateSubgraphData(const NodePtr &data, size_t index);

///
/// @ingroup ge
/// @brief Update output_node in Subgraph.
/// @param [in] const NodePtr &output_node: output_node in Subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status UpdateSubgraphOutput(const NodePtr &output_node);

///
/// @ingroup ge
@@ -133,14 +141,6 @@ class MultiBatchClonePass : public GraphPass {

///
/// @ingroup ge
/// @brief Assign parent index for branches.
/// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
/// @return 0: SUCCESS / others: FAILED
///
Status PostProcSubgraph(const ComputeGraphPtr &graph);

///
/// @ingroup ge
/// @brief Remove subgraph supend output anchor.
/// @param [in] ComputeGraphPtr &graph: Parent compute graph.
/// @return 0: SUCCESS / others: FAILED


+ 89
- 173
ge/graph/passes/next_iteration_pass.cc View File

@@ -19,6 +19,8 @@
#include "common/ge/ge_util.h"
#include "graph/common/omg_util.h"

using std::string;

namespace ge {
Status NextIterationPass::Run(ComputeGraphPtr graph) {
GELOGD("NextIterationPass Enter");
@@ -35,10 +37,6 @@ Status NextIterationPass::Run(ComputeGraphPtr graph) {
return INTERNAL_ERROR;
}
}
if (GroupWithNoBatch(graph) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Group enter_nodes failed without batch_label attr.");
return INTERNAL_ERROR;
}

if (FindWhileGroups() != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Find while groups failed.");
@@ -73,75 +71,22 @@ Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) {
return FAILED;
}

std::string batch_label;
(void)ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label);
if (batch_label.empty()) {
auto frame_iter = frame_enter_map_.find(frame_name);
if (frame_iter == frame_enter_map_.end()) {
std::vector<NodePtr> enter_nodes;
enter_nodes.emplace_back(enter_node);
frame_enter_map_[frame_name] = enter_nodes;
} else {
frame_iter->second.emplace_back(enter_node);
}
return SUCCESS;
string batch_label;
if (ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
frame_name += batch_label;
}

auto group_iter = loop_group_map_.find(frame_name);
if (group_iter == loop_group_map_.end()) {
auto iter = loop_group_map_.find(frame_name);
if (iter == loop_group_map_.end()) {
LoopCondGroupPtr loop_group = MakeShared<LoopCondGroup>();
if (loop_group == nullptr) {
GELOGE(FAILED, "MakeShared for LoopCondGroup failed.");
return FAILED;
}
loop_group->enter_nodes.emplace_back(enter_node);
loop_group_map_[frame_name][batch_label] = loop_group;
loop_group_map_[frame_name] = loop_group;
} else {
auto batch_iter = group_iter->second.find(batch_label);
if (batch_iter == group_iter->second.end()) {
LoopCondGroupPtr loop_group = MakeShared<LoopCondGroup>();
if (loop_group == nullptr) {
GELOGE(FAILED, "MakeShared for LoopCondGroup failed.");
return FAILED;
}
loop_group->enter_nodes.emplace_back(enter_node);
group_iter->second[batch_label] = loop_group;
} else {
batch_iter->second->enter_nodes.emplace_back(enter_node);
}
}

return SUCCESS;
}

///
/// @brief Group Enter nodes without batch_label attr
/// @param [in] compute_graph
/// @return Status
///
Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) {
if (frame_enter_map_.empty()) {
GELOGI("All enter nodes in graph %s has batch_label attr.", graph->GetName().c_str());
return SUCCESS;
}
for (const auto &item : frame_enter_map_) {
const std::string &frame_name = item.first;
auto iter = loop_group_map_.find(frame_name);
if (iter == loop_group_map_.end()) {
LoopCondGroupPtr loop_group = MakeShared<LoopCondGroup>();
if (loop_group == nullptr) {
GELOGE(FAILED, "MakeShared for LoopCondGroup failed.");
return FAILED;
}
loop_group->enter_nodes = item.second;
loop_group_map_[frame_name][""] = loop_group;
} else {
for (auto &batch_item : iter->second) {
for (const auto &enter_node : item.second) {
batch_item.second->enter_nodes.emplace_back(enter_node);
}
}
}
iter->second->enter_nodes.emplace_back(enter_node);
}

return SUCCESS;
@@ -154,55 +99,39 @@ Status NextIterationPass::GroupWithNoBatch(const ComputeGraphPtr &graph) {
Status NextIterationPass::FindWhileGroups() {
for (const auto &loop_group_iter : loop_group_map_) {
const std::string &frame_name = loop_group_iter.first;
for (const auto &batch_iter : loop_group_iter.second) {
const std::string &batch_label = batch_iter.first;
for (const auto &enter_node : batch_iter.second->enter_nodes) {
for (const auto &out_node : enter_node->GetOutAllNodes()) {
GELOGI("Find while_group for enter_node %s, frame_name:%s, batch_label:%s.", enter_node->GetName().c_str(),
frame_name.c_str(), batch_label.c_str());
if ((out_node->GetType() != MERGE) && (out_node->GetType() != REFMERGE)) {
continue;
}
std::string tmp_label;
GE_CHECK_NOTNULL(out_node->GetOpDesc());
(void)AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label);
bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label));
if (need_skip) {
continue;
}

NodePtr next_node = nullptr;
if (FindTargetNode(out_node, NEXTITERATION, true, batch_label, next_node) != SUCCESS) {
GELOGE(INTERNAL_ERROR,
"Get NextIteration node failed: inputs of Merge should be Enter/NextIteration, current_Merge=%s",
out_node->GetName().c_str());
return INTERNAL_ERROR;
}
batch_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node));

NodePtr switch_node = nullptr;
if (FindTargetNode(out_node, SWITCH, false, batch_label, switch_node) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Get Switch node failed: output of Merge should be Switch, current_Merge=%s",
out_node->GetName().c_str());
return INTERNAL_ERROR;
}
if (switch_node == nullptr) {
continue;
}

NodePtr loop_cond = nullptr;
if (FindTargetNode(switch_node, LOOPCOND, true, batch_label, loop_cond) != SUCCESS) {
GELOGE(INTERNAL_ERROR,
"Get LoopCond node failed: pred input of Switch should be LoopCond, current_Switch=%s",
switch_node->GetName().c_str());
return INTERNAL_ERROR;
}
if (batch_iter.second->loop_cond == nullptr) {
batch_iter.second->loop_cond = loop_cond;
} else if (batch_iter.second->loop_cond != loop_cond) {
GELOGE(FAILED, "Multi LoopCond nodes exist.");
return FAILED;
}
for (const auto &enter_node : loop_group_iter.second->enter_nodes) {
for (const auto &out_node : enter_node->GetOutAllNodes()) {
const string &type = out_node->GetType();
if ((type != MERGE) && (type != REFMERGE)) {
continue;
}

NodePtr next_node = nullptr;
if (FindTargetNode(out_node, NEXTITERATION, true, next_node) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Get NextIteration node failed, frame_name: %s", frame_name.c_str());
return INTERNAL_ERROR;
}
loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node));

NodePtr switch_node = nullptr;
if (FindTargetNode(out_node, SWITCH, false, switch_node) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Get Switch node failed, frame_name: %s.", frame_name.c_str());
return INTERNAL_ERROR;
}
if (switch_node == nullptr) {
continue;
}

NodePtr loop_cond = nullptr;
if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Get LoopCond node failed, frame_name: %s.", frame_name.c_str());
return INTERNAL_ERROR;
}
if (loop_group_iter.second->loop_cond == nullptr) {
loop_group_iter.second->loop_cond = loop_cond;
} else if (loop_group_iter.second->loop_cond != loop_cond) {
GELOGE(FAILED, "Multi LoopCond nodes exist, frame_name: %s.", frame_name.c_str());
return FAILED;
}
}
}
@@ -223,18 +152,16 @@ bool NextIterationPass::VerifyWhileGroup() {
GELOGE(INTERNAL_ERROR, "Verify while group failed, frame_name is empty.");
return false;
}
for (const auto &batch_iter : loop_group_iter.second) {
if (batch_iter.second->loop_cond == nullptr) {
GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str());
return false;
}
if (loop_group_iter.second->loop_cond == nullptr) {
GELOGE(INTERNAL_ERROR, "Verify while group failed, LoopCond is null, frame_name: %s.", frame_name.c_str());
return false;
}

for (const auto &pair_iter : batch_iter.second->merge_next_pairs) {
if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) {
GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.",
frame_name.c_str());
return false;
}
for (const auto &pair_iter : loop_group_iter.second->merge_next_pairs) {
if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) {
GELOGE(INTERNAL_ERROR, "Verify while group failed, merge_node/next_node is null, frame_name: %s.",
frame_name.c_str());
return false;
}
}
}
@@ -249,56 +176,53 @@ bool NextIterationPass::VerifyWhileGroup() {
///
Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) {
for (const auto &loop_cond_iter : loop_group_map_) {
for (const auto &batch_iter : loop_cond_iter.second) {
const std::string &cond_name = batch_iter.second->loop_cond->GetName();
GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str());

// Create Active node, Enter->Active->Merge, NextIteration->Active->Merge
NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE);
NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE);
if ((enter_active == nullptr) || (next_active == nullptr)) {
GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str());
const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName();
GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str());

// Create Active node, Enter->Active->Merge, NextIteration->Active->Merge
NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE);
NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE);
if ((enter_active == nullptr) || (next_active == nullptr)) {
GELOGE(INTERNAL_ERROR, "Create active node failed, cond_name: %s.", cond_name.c_str());
return INTERNAL_ERROR;
}

for (const auto &enter_node : loop_cond_iter.second->enter_nodes) {
// Enter --> Active
if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Add control edge from %s to %s failed.", enter_node->GetName().c_str(),
enter_active->GetName().c_str());
return INTERNAL_ERROR;
}
}

for (const auto &enter_node : batch_iter.second->enter_nodes) {
// Enter --> Active
if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) !=
GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Add control edge failed.");
return INTERNAL_ERROR;
}
for (const auto &pair : loop_cond_iter.second->merge_next_pairs) {
NodePtr merge_node = pair.first;
NodePtr next_node = pair.second;
// Active --> Merge
if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Add control edge failed.");
return INTERNAL_ERROR;
}

for (const auto &pair : batch_iter.second->merge_next_pairs) {
NodePtr merge_node = pair.first;
NodePtr next_node = pair.second;
// Active --> Merge
if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) !=
GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Add control edge failed.");
return INTERNAL_ERROR;
}

// NextIteration --> Active
if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Add control edge failed.");
return INTERNAL_ERROR;
}

// break link between NextIteration and Merge
if (BreakNextIteration(next_node, merge_node) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Break NextIteration failed");
return INTERNAL_ERROR;
}
// NextIteration --> Active
if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Add control edge failed.");
return INTERNAL_ERROR;
}

if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) ||
(SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) {
GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed.");
// break link between NextIteration and Merge
if (BreakNextIteration(next_node, merge_node) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Break NextIteration failed");
return INTERNAL_ERROR;
}
}

if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) ||
(SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) {
GELOGE(INTERNAL_ERROR, "Set attr ACTIVE_LABEL_LIST failed.");
return INTERNAL_ERROR;
}
}

return SUCCESS;
@@ -365,12 +289,11 @@ Status NextIterationPass::BreakNextIteration(const NodePtr &next_node, NodePtr &
/// @param [in] node
/// @param [in] target_type
/// @param [in] is_input
/// @param [in] batch_label
/// @param [out] target_node
/// @return Status
///
Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input,
const std::string &batch_label, NodePtr &target_node) {
NodePtr &target_node) {
if (node == nullptr) {
GELOGE(PARAM_INVALID, "node is null.");
return PARAM_INVALID;
@@ -387,12 +310,6 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string
}

for (const auto &tmp_node : nodes) {
std::string tmp_label;
(void)AttrUtils::GetStr(tmp_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, tmp_label);
bool need_skip = !(batch_label.empty() || tmp_label.empty() || (batch_label == tmp_label));
if (need_skip) {
continue;
}
const std::string type = tmp_node->GetType();
if ((target_type == LOOPCOND) && (type == target_type)) {
target_node = tmp_node;
@@ -415,7 +332,6 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string
/// @return SUCCESS
///
Status NextIterationPass::ClearStatus() {
frame_enter_map_.clear();
loop_group_map_.clear();
return SUCCESS;
}


+ 3
- 13
ge/graph/passes/next_iteration_pass.h View File

@@ -47,13 +47,6 @@ class NextIterationPass : public GraphPass {
Status GroupEnterNode(const NodePtr &enter_node);

///
/// @brief Group Enter nodes without batch_label attr
/// @param [in] compute_graph
/// @return Status
///
Status GroupWithNoBatch(const ComputeGraphPtr &graph);

///
/// @brief Find while groups
/// @return Status
///
@@ -97,13 +90,10 @@ class NextIterationPass : public GraphPass {
/// @param [out] target_node
/// @return Status
///
Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input,
const std::string &batch_label, NodePtr &target_node);
Status FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input, NodePtr &target_node);

// map<frame_name, vector<enter_node>>
std::unordered_map<std::string, std::vector<NodePtr>> frame_enter_map_;
// map<frame_name, map<batch_label, LoopCondGroup>>
std::unordered_map<std::string, std::unordered_map<std::string, LoopCondGroupPtr>> loop_group_map_;
// map<frame_name, LoopCondGroup>
std::unordered_map<std::string, LoopCondGroupPtr> loop_group_map_;
};
} // namespace ge
#endif // GE_GRAPH_PASSES_NEXT_ITERATION_PASS_H_

+ 106
- 0
ge/graph/passes/remove_same_const_pass.cc View File

@@ -0,0 +1,106 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "remove_same_const_pass.h"

#include <sstream>
#include <string>
#include <set>

#include "common/base64.h"
#include "ge_local_engine/engine/host_cpu_engine.h"
#include "graph/utils/node_utils.h"

namespace ge {
namespace {
std::string GetCseKey(const NodePtr &node) {
std::stringstream ss;
ss << node->GetType() << "control-inputs-";
std::set<std::string> control_in_node_names;
for (auto &src_node : node->GetInControlNodes()) {
control_in_node_names.insert(src_node->GetName());
}
for (auto &name : control_in_node_names) {
ss << name << "-";
}

ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc());

return ss.str();
}

bool IsConstType(const NodePtr &node) { return (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP); }
} // namespace
Status RemoveSameConstPass::Run(ComputeGraphPtr graph) {
GELOGD("Begin to run RemoveSameConstPass on the graph");
GE_CHECK_NOTNULL(graph);
std::map<std::string, NodePtr> keys_to_node;
for (const auto &node : graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node);
if (!IsConstType(node)) {
continue;
}
bool is_unknown = false;
auto ret = NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown);
if (ret != GRAPH_SUCCESS) {
GELOGW("Get node unknown status failed, node name:%s, type:%s.",
node->GetName().c_str(), node->GetType().c_str());
continue;
}
if (is_unknown) {
GELOGI("Current node %s, type %s is unknown shape which should be skip.",
node->GetName().c_str(), node->GetType().c_str());
continue;
}
auto key = GetCseKey(node);
GELOGD("The const node %s cse key %s", node->GetName().c_str(), ge::base64::EncodeToBase64(key).c_str());
auto iter = keys_to_node.find(key);
if (iter == keys_to_node.end()) {
keys_to_node[key] = node;
continue;
}

if (node->GetAllOutDataAnchorsSize() != iter->second->GetAllOutDataAnchorsSize()) {
GELOGW("The const node %s and %s have the same CSE key, but different output anchor count, skip to fusion them",
iter->second->GetName().c_str(), node->GetName().c_str());
continue;
}

std::vector<int> output_map(node->GetAllOutDataAnchorsSize());
for (size_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) {
output_map[i] = i;
}

ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map);
if (ret != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s", node->GetName().c_str(),
iter->second->GetName().c_str(), ret);
return INTERNAL_ERROR;
}

NodeUtils::UnlinkAll(*node);

ret = GraphUtils::RemoveNodeWithoutRelink(graph, node);
if (ret != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to remove node %s from graph", node->GetName().c_str());
return INTERNAL_ERROR;
}

GELOGI("Remove const node %s by RemoveSameConstPass, replace it with node %s", node->GetName().c_str(),
iter->second->GetName().c_str());
}
return SUCCESS;
}
} // namespace ge

+ 28
- 0
ge/graph/passes/remove_same_const_pass.h View File

@@ -0,0 +1,28 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_
#define GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_

#include "graph/types.h"
#include "inc/graph_pass.h"

namespace ge {
class RemoveSameConstPass : public GraphPass {
public:
Status Run(ge::ComputeGraphPtr graph) override ;
};
} // namespace ge
#endif //GE_GRAPH_PASSES_REMOVE_SAME_CONST_PASS_H_

+ 8
- 4
ge/graph/passes/switch_to_stream_switch_pass.cc View File

@@ -17,8 +17,13 @@
#include "graph/passes/switch_to_stream_switch_pass.h"
#include <stack>
#include "common/ge/ge_util.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "framework/common/types.h"
#include "ge/ge_api_types.h"
#include "graph/common/omg_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/utils/type_utils.h"

@@ -120,13 +125,12 @@ void SwitchToStreamSwitchPass::MarkCycleDependence(
if (visited.count(tmp_node) > 0) {
continue;
}
GELOGD("MarkCycleDependence: tmp_node=%s.", tmp_node->GetName().c_str());
for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) {
if (switch_nodes.find(out_node) == switch_nodes.end()) {
out_nodes.push(out_node);
continue;
}
GELOGD("MarkCycleDependence: tmp_node=%s, switch_node=%s.",
tmp_node->GetName().c_str(), out_node->GetName().c_str());
GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS,
GELOGW("set cyclic dependence attr failed."); return );
auto map_iter = switch_cyclic_map_.find(out_node);
@@ -598,7 +602,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons
///
Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node,
const std::set<NodePtr> &same_cond_switch) {
GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
GELOGI("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
cast_node->GetName().c_str());
std::string orig_switch_name = switch_node->GetName();
OpDescPtr switch_desc = switch_node->GetOpDesc();
@@ -649,7 +653,7 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no
///
Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch,
const NodePtr &active_node) {
GELOGD("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
GELOGI("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
stream_switch->GetName().c_str(), active_node->GetName().c_str());
auto find_res = switch_node_map_.find(switch_node);
GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), {


+ 51
- 0
ge/graph/passes/useless_control_out_remove_pass.cc View File

@@ -0,0 +1,51 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/useless_control_out_remove_pass.h"

#include "graph/debug/ge_attr_define.h"
#include "graph/utils/graph_utils.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"

namespace ge {
Status UselessControlOutRemovePass::Run(NodePtr &node) {
GE_CHECK_NOTNULL(node);

if ((node->GetType() != CONSTANT) && (node->GetType() != CONSTANTOP)) {
return SUCCESS;
}
GELOGD("UselessControlOutRemovePass running, node: %s.", node->GetName().c_str());

// const has no control input
if (node->GetInControlNodes().empty()) {
if (node->GetOutDataNodes().empty()) {
// It is an isolated const, just remove it.
GELOGI("Delete isolated const: %s.", node->GetName().c_str());
GE_CHK_STATUS_RET(IsolateAndDeleteNode(node, {}))
AddNodeDeleted(node);
} else {
auto out_ctrl_anchor = node->GetOutControlAnchor();
if (out_ctrl_anchor != nullptr && !out_ctrl_anchor->GetPeerAnchors().empty()) {
GELOGI("Node: %s unlink all out control edge.", node->GetName().c_str());
out_ctrl_anchor->UnlinkAll();
}
}
}

return SUCCESS;
}
} // namespace ge

+ 29
- 0
ge/graph/passes/useless_control_out_remove_pass.h View File

@@ -0,0 +1,29 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_
#define GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_

#include "graph/passes/base_pass.h"

namespace ge {
class UselessControlOutRemovePass : public BaseNodePass {
public:
Status Run(NodePtr &node) override;
};
} // namespace ge

#endif // GE_GRAPH_PASSES_USELESS_CONTROL_OUT_REMOVE_PASS_H_

+ 40
- 0
ge/graph/preprocess/graph_preprocess.cc View File

@@ -18,6 +18,7 @@
#include <map>
#include <set>
#include <string>
#include <utility>
#include "common/formats/format_transfers/format_transfer_fractal_nz.h"
#include "common/formats/format_transfers/format_transfer_fractal_z.h"
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
@@ -27,9 +28,13 @@
#include "common/helper/model_helper.h"
#include "common/math/math_util.h"
#include "common/op/ge_op_utils.h"
#include "common/util/error_manager/error_manager.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "framework/common/debug/ge_log.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/common/local_context.h"
#include "graph/common/transop_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/shape_refiner.h"
#include "graph/manager/graph_var_manager.h"
@@ -39,21 +44,29 @@
#include "graph/passes/aicpu_constant_folding_pass.h"
#include "graph/passes/assert_pass.h"
#include "graph/passes/assign_pass.h"
#include "graph/passes/base_pass.h"
#include "graph/passes/common_subexpression_elimination_pass.h"
#include "graph/passes/cond_pass.h"
#include "graph/passes/cond_remove_pass.h"
#include "graph/passes/constant_folding_pass.h"
#include "graph/passes/constant_fuse_same_pass.h"
#include "graph/passes/control_trigger_pass.h"
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/dropout_pass.h"
#include "graph/passes/enter_pass.h"
#include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/for_pass.h"
#include "graph/passes/get_original_format_pass.h"
#include "graph/passes/guarantee_const_pass.h"
#include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/infershape_pass.h"
#include "graph/passes/iterator_op_pass.h"
#include "graph/passes/merge_pass.h"
#include "graph/passes/net_output_pass.h"
#include "graph/passes/next_iteration_pass.h"
#include "graph/passes/no_use_reshape_remove_pass.h"
#include "graph/passes/parallel_concat_start_op_pass.h"
#include "graph/passes/placeholder_with_default_pass.h"
@@ -68,18 +81,45 @@
#include "graph/passes/shape_operate_op_remove_pass.h"
#include "graph/passes/snapshot_pass.h"
#include "graph/passes/stop_gradient_pass.h"
#include "graph/passes/subgraph_pass.h"
#include "graph/passes/switch_data_edges_bypass.h"
#include "graph/passes/switch_dead_branch_elimination.h"
#include "graph/passes/switch_logic_remove_pass.h"
#include "graph/passes/merge_to_stream_merge_pass.h"
#include "graph/passes/switch_to_stream_switch_pass.h"
#include "graph/passes/attach_stream_label_pass.h"
#include "graph/passes/unused_const_pass.h"
#include "graph/passes/unused_op_remove_pass.h"
#include "graph/passes/var_is_initialized_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/preprocess/insert_op/util_insert_aipp_op.h"
#include "graph/types.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "inc/pass_manager.h"
#include "init/gelib.h"
#include "multi_batch_copy_graph.h"
#include "runtime/dev.h"

#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/link_gen_mask_nodes_pass.h"
#include "graph/passes/permute_pass.h"
#include "graph/passes/reshape_remove_pass.h"
#include "graph/passes/same_transdata_breadth_fusion_pass.h"
#include "graph/passes/transop_breadth_fusion_pass.h"
#include "graph/passes/transop_depth_fusion_pass.h"
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h"

#include "graph/passes/cast_remove_pass.h"
#include "graph/passes/data_pass.h"
#include "graph/passes/transop_without_reshape_fusion_pass.h"
#include "graph/passes/transpose_transdata_pass.h"
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/mark_agnostic_pass.h"


namespace ge {
namespace {
static std::map<std::string, ge::DataType> output_type_str_to_datatype = {


+ 344
- 58
ge/graph/preprocess/multi_batch_copy_graph.cc View File

@@ -44,6 +44,8 @@
using std::set;
using std::string;
using std::vector;
using std::map;
using std::queue;

namespace ge {
namespace multibatch {
@@ -57,10 +59,15 @@ const int kDataInIndex = 0;
const int kMergeDataOutIndex = 0;
const int kStaticOutput = -1;
const int kDivisionConst = 2;
const int32_t kOneInDataNode = 1;
const int32_t kFindNoMatch = 0;


inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); }

inline bool IsEnterType(const string &node_type) { return (node_type == ENTER) || (node_type == REFENTER); }
const set<string> unchange_types({CONSTANT, CONSTANTOP, ENTER, REFENTER});

inline bool IsGetNextType(const NodePtr &node) {
std::string original_type;
GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS,
@@ -218,12 +225,6 @@ Status MultiBatchGraphCopyer::CopyGraph() {
return ret;
}

ret = InsertIdentityAfterSwitchN();
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to insert identity nodes after switchn node.");
return INTERNAL_ERROR;
}

GELOGI("Begin to remove useless nodes by prune pass after copy process");
PrunePass prune_pass;
ret = prune_pass.Run(graph_);
@@ -240,6 +241,18 @@ Status MultiBatchGraphCopyer::Init() {
return ret;
}

ret = RelinkConstCtrlEdge();
if (ret != SUCCESS) {
GELOGE(FAILED, "Relink const's control edge failed.");
return FAILED;
}

ret = ExtractUnchangedStructureOutofCycle();
if (ret != SUCCESS) {
GELOGE(FAILED, "Extract unchanged structure out of cycle failed.");
return FAILED;
}

for (auto &node : graph_->GetAllNodes()) {
origin_all_nodes_.emplace_back(node);
if (IsDataLikeType(node->GetType())) {
@@ -252,6 +265,281 @@ Status MultiBatchGraphCopyer::Init() {
return SUCCESS;
}

Status MultiBatchGraphCopyer::RelinkConstCtrlEdge() {
for (auto &node : graph_->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
if ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) {
if (node->GetOutDataNodes().empty()) {
continue;
}
if (!node->GetInControlNodes().empty()) {
auto in_ctrl_nodes = node->GetInControlNodes();
auto out_nodes = node->GetOutAllNodes();
bool has_merge_out = false;
for (const auto &out_node : out_nodes) {
GE_CHECK_NOTNULL(out_node);
if (out_node->GetType() == MERGE || out_node->GetType() == REFMERGE) {
has_merge_out = true;
break;
}
}
if (has_merge_out) {
continue;
}
auto in_ctrl_anchor = node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_ctrl_anchor);
in_ctrl_anchor->UnlinkAll();
for (auto &in_ctrl_node : in_ctrl_nodes) {
auto out_ctrl_anchor_of_in_ctrl_node = in_ctrl_node->GetOutControlAnchor();
GE_CHECK_NOTNULL(out_ctrl_anchor_of_in_ctrl_node);
for (auto &out_node : out_nodes) {
if (IsEnterType(out_node->GetType())) {
continue;
}
if (!out_ctrl_anchor_of_in_ctrl_node->IsLinkedWith(out_node->GetInControlAnchor())) {
GE_CHK_STATUS_RET(out_ctrl_anchor_of_in_ctrl_node->LinkTo(out_node->GetInControlAnchor()))
}
}
}
}
auto out_ctrl_anchor = node->GetOutControlAnchor();
if (out_ctrl_anchor != nullptr) {
out_ctrl_anchor->UnlinkAll();
}
}
}

return SUCCESS;
}

Status MultiBatchGraphCopyer::ExtractUnchangedStructureOutofCycle() {
map<string, vector<NodePtr>> frame_enter;
if (GetEnterNodesGroupByFrame(frame_enter) != SUCCESS) {
GELOGE(FAILED, "Get enter nodes grouped by frame_name failed.");
return FAILED;
}

queue<NodePtr> nodes_to_extract;
if (GetNodeNeedExtract(frame_enter, nodes_to_extract) != SUCCESS) {
GELOGE(FAILED, "Get nodes needed to extract failed.");
return FAILED;
}

while (!nodes_to_extract.empty()) {
auto node = nodes_to_extract.front();
nodes_to_extract.pop();
OpDescPtr enter_desc = nullptr;
if (MoveInEntersInDataAnchorDown(node, enter_desc) != SUCCESS) {
GELOGE(FAILED, "Move in enter nodes' in data anchors down of %s failed.", node->GetName().c_str());
return FAILED;
}
set<NodePtr> out_nodes;
if (InsertEnterAfterNode(node, enter_desc, out_nodes) != SUCCESS) {
GELOGE(FAILED, "Insert enter node after %s failed.", node->GetName().c_str());
return FAILED;
}

if (MoveCtrlEdgeToOutNodes(node, out_nodes) != SUCCESS) {
GELOGE(FAILED, "Move %s's control edge to out nodes failed.", node->GetName().c_str());
return FAILED;
}

for (auto &out_node : out_nodes) {
GE_CHECK_NOTNULL(out_node);
if (AllInDataNodesUnchangeAndNoMergeOut(out_node)) {
nodes_to_extract.push(out_node);
}
}
}

if (DeleteEnterWithoutDataOut() != SUCCESS) {
GELOGE(FAILED, "Delete enter node without out data nodes failed.");
return FAILED;
}

return SUCCESS;
}

Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(map<string, vector<NodePtr>> &frame_enter) {
for (auto &node : graph_->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
if (IsEnterType(node->GetType())) {
if (!node->GetInControlNodes().empty() || !node->GetOutControlNodes().empty()) {
continue;
}
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
string frame_name;
if (!AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) {
GELOGE(FAILED, "Get attr frame_name of enter[%] failed.", node->GetName().c_str());
return FAILED;
}
frame_enter[frame_name].emplace_back(node);
}
}

return SUCCESS;
}

Status MultiBatchGraphCopyer::GetNodeNeedExtract(const map<string, vector<NodePtr>> &frame_enter,
queue<NodePtr> &nodes_to_extract) {
for (const auto &one_group : frame_enter) {
auto enters = one_group.second;
for (const auto &enter : enters) {
auto out_data_nodes = enter->GetOutDataNodes();
for (const auto &out_data_node : out_data_nodes) {
GE_CHECK_NOTNULL(out_data_node);
if (AllInDataNodesUnchangeAndNoMergeOut(out_data_node)) {
nodes_to_extract.push(out_data_node);
}
}
}
}

return SUCCESS;
}

bool MultiBatchGraphCopyer::AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node) {
auto out_data_nodes = node->GetOutDataNodes();
for (const auto &out_data_node : out_data_nodes) {
if (out_data_node == nullptr) {
return false;
}

if (out_data_node->GetType() == MERGE || out_data_node->GetType() == REFMERGE) {
return false;
}
}

auto in_data_nodes = node->GetInDataNodes();
if (in_data_nodes.size() == kOneInDataNode) {
return true;
}

for (const auto &in_data_node : in_data_nodes) {
if (in_data_node == nullptr) {
return false;
}
if (unchange_types.count(in_data_node->GetType()) == kFindNoMatch) {
return false;
}
}

return true;
}

Status MultiBatchGraphCopyer::MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc) {
auto in_data_anchors = node->GetAllInDataAnchors();
for (auto &in_data_anchor : in_data_anchors) {
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(peer_out_data_anchor);
auto peer_in_data_node = peer_out_data_anchor->GetOwnerNode();
if (IsEnterType(peer_in_data_node->GetType())) {
GE_CHK_STATUS_RET(peer_out_data_anchor->Unlink(in_data_anchor))
GELOGD("Unlink data edge from %s to %s.", peer_in_data_node->GetName().c_str(), node->GetName().c_str());
auto enter_in_data_anchors = peer_in_data_node->GetAllInDataAnchors();
for (auto &enter_in_data_anchor : enter_in_data_anchors) {
auto peer_out_data_anchor_of_enter = enter_in_data_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(peer_out_data_anchor_of_enter);
if (peer_out_data_anchor_of_enter->IsLinkedWith(in_data_anchor)) {
continue;
}
GE_CHK_STATUS_RET(peer_out_data_anchor_of_enter->LinkTo(in_data_anchor))
GELOGD("Relink data edge from %s to %s.", peer_out_data_anchor_of_enter->GetOwnerNode()->GetName().c_str(),
node->GetName().c_str());
}
enter_desc = peer_in_data_node->GetOpDesc();
GE_CHECK_NOTNULL(enter_desc);
}
}

return SUCCESS;
}

Status MultiBatchGraphCopyer::InsertEnterAfterNode(NodePtr &node, const OpDescPtr &copy_desc, set<NodePtr> &out_nodes) {
if (copy_desc == nullptr) {
return SUCCESS;
}
map<OutDataAnchorPtr, vector<std::pair<InDataAnchorPtr, NodePtr>>> outanchors_inanchors_nodes;
auto out_data_anchors = node->GetAllOutDataAnchors();
for (auto &out_data_anchor : out_data_anchors) {
auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors();
for (auto peer_in_data_anchor : peer_in_data_anchors) {
GE_CHECK_NOTNULL(peer_in_data_anchor);
auto peer_in_data_node = peer_in_data_anchor->GetOwnerNode();
out_nodes.emplace(peer_in_data_node);
outanchors_inanchors_nodes[out_data_anchor].emplace_back(std::make_pair(peer_in_data_anchor, peer_in_data_node));
}
}

int32_t i = 0;
auto node_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(node_desc);
// Insert one enter node after node's per out data anchor
for (auto &outanchor_inanchors_nodes : outanchors_inanchors_nodes) {
string name = node->GetName() + "_" + ENTER + "_" + std::to_string(i++);
GELOGD("Create Enter op %s after %s.", name.c_str(), node->GetName().c_str());
auto enter_desc = AttrUtils::CopyOpDesc(copy_desc);
enter_desc->SetName(name);
GE_CHK_STATUS_RET(
enter_desc->UpdateInputDesc("x", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx())))
GE_CHK_STATUS_RET(
enter_desc->UpdateOutputDesc("y", node_desc->GetOutputDesc(outanchor_inanchors_nodes.first->GetIdx())))
auto enter_node = graph_->AddNode(enter_desc);
GE_CHECK_NOTNULL(enter_node);
GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->LinkTo(enter_node->GetInDataAnchor(kDataInIndex)))
GE_CHECK_NOTNULL(enter_node->GetOutDataAnchor(kDataInIndex));
for (auto &inanchor_node : outanchor_inanchors_nodes.second) {
GE_CHK_STATUS_RET(outanchor_inanchors_nodes.first->Unlink(inanchor_node.first))
GE_CHK_STATUS_RET(enter_node->GetOutDataAnchor(kDataInIndex)->LinkTo(inanchor_node.first))
GELOGD("Unlink from %s to %s, link from %s to %s then to %s.", node->GetName().c_str(),
inanchor_node.second->GetName().c_str(), node->GetName().c_str(), enter_node->GetName().c_str(),
inanchor_node.second->GetName().c_str());
}
}

return SUCCESS;
}

// Move node's in control edges to out data nodes
Status MultiBatchGraphCopyer::MoveCtrlEdgeToOutNodes(NodePtr &node, set<NodePtr> &out_nodes) {
auto in_ctrl_anchor = node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_ctrl_anchor);
auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors();
for (auto &peer_out_ctrl_anchor : peer_out_ctrl_anchors) {
GE_CHK_STATUS_RET(peer_out_ctrl_anchor->Unlink(in_ctrl_anchor))
GELOGD("Unlink control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(),
node->GetName().c_str());
for (auto &out_node : out_nodes) {
auto in_ctrl_anchor_of_out_node = out_node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_ctrl_anchor_of_out_node);
if (!peer_out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor_of_out_node)) {
GE_CHK_STATUS_RET(peer_out_ctrl_anchor->LinkTo(in_ctrl_anchor_of_out_node))
GELOGD("Link control edge from %s to %s.", peer_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(),
out_node->GetName().c_str());
}
}
}

return SUCCESS;
}

Status MultiBatchGraphCopyer::DeleteEnterWithoutDataOut() {
for (auto &node : graph_->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
if (IsEnterType(node->GetType())) {
auto out_nodes = node->GetOutAllNodes();
if (out_nodes.empty()) {
GELOGD("Delete enter node: %s which has no output.", node->GetName().c_str());
GE_CHK_STATUS_RET(GraphUtils::IsolateNode(node, {}))
GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph_, node))
}
}
}

return SUCCESS;
}

void MultiBatchGraphCopyer::LabelStatusForData(const NodePtr &data) {
auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
GELOGI("Label status for %s, shape_dims is %s.", data->GetName().c_str(),
@@ -297,6 +585,9 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() {
LabelStatusForGetNextSink(data);
}
}

map<string, vector<NodePtr>> frame_enters;
InitStatus(frame_enters);
bool changed = true;
// If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch
while (changed) {
@@ -306,12 +597,13 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() {
if (iter != origin_nodes_status_.end()) {
continue;
}
for (auto &in_node : node->GetInAllNodes()) {
bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() &&
origin_nodes_status_[in_node.get()] == kNodeInBatchBranch;
if (is_in_batch) {
origin_nodes_status_[node.get()] = kNodeInBatchBranch;
changed = true;
for (auto &in_node : node->GetInDataNodes()) {
if (origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end()) {
if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end()) {
origin_nodes_status_[node.get()] == kNodeInBatchBranch;
ResetEnterStatus(frame_enters, node);
changed = true;
}
break;
}
}
@@ -320,6 +612,45 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() {
return SUCCESS;
}

void MultiBatchGraphCopyer::InitStatus(map<string, vector<NodePtr>> &frame_enters) {
for (const auto &node : origin_all_nodes_) {
if (!IsEnterType(node->GetType())) {
continue;
}
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
string frame_name;
if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) {
frame_enters[frame_name].emplace_back(node);
}
}

for (const auto &data : origin_data_nodes_) {
auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
if (!IsAllDimsPositive(data_shape.GetDims())) {
origin_nodes_status_[data.get()] = kNodeInBatchBranch;
}
}
}

void MultiBatchGraphCopyer::ResetEnterStatus(map<string, vector<NodePtr>> &frame_enters, const NodePtr &node) {
if (!IsEnterType(node->GetType())) {
return;
}

for (const auto &frame_enter : frame_enters) {
auto &enters = frame_enter.second;
if (std::find(enters.begin(), enters.end(), node) != enters.end()) {
for (const auto &enter : enters) {
origin_nodes_status_[enter.get()] = kNodeInBatchBranch;
}
break;
}
}
}

Status MultiBatchGraphCopyer::LabelStatus() {
if (LabelInBatchBranchStatus() != SUCCESS) {
GELOGE(PARAM_INVALID, "Failed to label no in batch branch");
@@ -1360,52 +1691,6 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) {
return SUCCESS;
}

Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() {
for (auto &node : graph_->GetAllNodes()) {
if (node->GetType() != SWITCHN) {
continue;
}
auto switchn_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(switchn_desc);
size_t i = 0;
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
for (auto &in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
auto out_node = in_data_anchor->GetOwnerNode();
auto op_desc = out_node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
if ((out_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) {
GELOGD("No need to insert identity between %s and %s.", node->GetName().c_str(), out_node->GetName().c_str());
continue;
}

auto identity_desc = MakeShared<OpDesc>(node->GetName() + "_identity_" + std::to_string(i), IDENTITY);
GE_CHECK_NOTNULL(identity_desc);

string batch_label;
if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
if (!AttrUtils::SetStr(identity_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
GELOGE(FAILED, "Set attr ATTR_NAME_BATCH_LABEL failed, node:%s.", identity_desc->GetName().c_str());
return FAILED;
}
}

auto data_desc = switchn_desc->GetOutputDesc(i);
i++;
GE_CHK_STATUS_RET(identity_desc->AddInputDesc("x", data_desc));
GE_CHK_STATUS_RET(identity_desc->AddOutputDesc("y", data_desc));

auto identity_node = graph_->AddNode(identity_desc);
GE_CHECK_NOTNULL(identity_node);
GE_CHK_STATUS_RET(out_data_anchor->LinkTo(identity_node->GetInDataAnchor(0)));
GE_CHECK_NOTNULL(identity_node->GetOutControlAnchor());
GE_CHK_STATUS_RET(identity_node->GetOutControlAnchor()->LinkTo(out_node->GetInControlAnchor()));
}
}
}

return SUCCESS;
}

Status ProcessMultiBatch(ComputeGraphPtr &graph) {
if (GetLocalOmgContext().dynamic_node_type.empty()) {
const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
@@ -1415,6 +1700,7 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) {
return pass_manager.Run(graph);
}
}

if (!GetLocalOmgContext().need_multi_batch) {
GELOGI("No need to process_multi for no_train graph.");
return SUCCESS;


+ 15
- 1
ge/graph/preprocess/multi_batch_copy_graph.h View File

@@ -18,6 +18,7 @@
#include <map>
#include <queue>
#include <vector>
#include <set>

#include "external/ge/ge_api_error_codes.h"

@@ -64,12 +65,26 @@ class MultiBatchGraphCopyer {
private:
Status Init();
Status CheckArguments();
Status RelinkConstCtrlEdge();

Status ExtractUnchangedStructureOutofCycle();
Status GetEnterNodesGroupByFrame(std::map<std::string, std::vector<NodePtr>> &frame_enter);
Status GetNodeNeedExtract(const std::map<std::string, std::vector<NodePtr>> &frame_enter,
std::queue<NodePtr> &nodes_to_extract);
bool AllInDataNodesUnchangeAndNoMergeOut(const NodePtr &node);
Status MoveInEntersInDataAnchorDown(NodePtr &node, OpDescPtr &enter_desc);
Status InsertEnterAfterNode(NodePtr &node, const OpDescPtr &enter_desc, std::set<NodePtr> &out_nodes);
Status MoveCtrlEdgeToOutNodes(NodePtr &node, std::set<NodePtr> &out_nodes);
Status DeleteEnterWithoutDataOut();

// label status for origin_all_nodes_
Status LabelStatus();
Status LabelInBatchBranchStatus();
void LabelStatusForData(const NodePtr &data);
void LabelStatusForGetNextSink(const NodePtr &data);
void InitStatus(std::map<std::string, std::vector<NodePtr>> &frame_enters);
void ResetEnterStatus(std::map<std::string, std::vector<NodePtr>> &frame_enters, const NodePtr &node);

// add nodes functions
Status CreateNewNodes();

@@ -81,7 +96,6 @@ class MultiBatchGraphCopyer {
Status InsertSwitchNForData(const NodePtr &node, const size_t &out_anchor_index, const size_t &peer_in_anchor_index,
std::vector<std::pair<Node *, NodePtr>> &dynamic_out_to_switchn);

Status InsertIdentityAfterSwitchN();
Status UpdateMaxShapeToData(const NodePtr &node, size_t out_anchor_index);
Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index);



+ 10
- 1
ge/host_kernels/dynamic_stitch_kernel.cc View File

@@ -33,6 +33,8 @@ namespace {
const int kDoubleAttrN = 2;
const int kFirstOutputDescIdx = 0;
const int kMergedShapeSecondDim = 1;
const size_t kNullTensorDimNum = 1;
const int64_t kNullTensorDimValue = 0;
const std::set<DataType> kSupportedTypeSet = {DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE};
} // namespace
@@ -177,7 +179,14 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec
int64_t src_offset = 0;
std::set<int32_t> indices_set;
for (int i = 0; i < n_; i++) {
auto indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize();
GeShape indices_shape = input[i]->GetTensorDesc().GetShape();
size_t indices_dim_num = indices_shape.GetDimNum();
// skip null indices tensor
if (indices_dim_num == kNullTensorDimNum && indices_shape.GetDim(0) == kNullTensorDimValue) {
GELOGD("Input indices[%d] has null tensor, skip it.", i);
continue;
}
auto indices_shape_size = indices_shape.GetShapeSize();
// to normalize logic, assume scalar as vector with shape of [1].
indices_shape_size = (indices_shape_size == 0) ? 1 : indices_shape_size;
// all index for input is less than size of input


+ 2
- 0
ge/hybrid/executor/hybrid_execution_context.h View File

@@ -22,6 +22,7 @@
#include "common/blocking_queue.h"
#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "graph/ge_local_context.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/common/tensor_value.h"
#include "hybrid/executor/hybrid_profiler.h"
@@ -38,6 +39,7 @@ struct GraphExecutionContext {

uint64_t session_id = 0;
const HybridModel *model = nullptr;
const GEThreadLocalContext *ge_context = nullptr;
rtStream_t stream = nullptr;
rtContext_t rt_context = nullptr;
rtContext_t rt_gen_context = nullptr;


+ 1
- 0
ge/hybrid/executor/hybrid_model_executor.cc View File

@@ -95,6 +95,7 @@ Status HybridModelExecutor::InitExecutionContext() {
context_.stream = stream_;
context_.model = model_;
context_.session_id = ::ge::GetContext().SessionId();
context_.ge_context = &GetThreadLocalContext();
GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id);
context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_);
GE_CHECK_NOTNULL(context_.allocator);


+ 22
- 34
ge/hybrid/executor/node_state.cc View File

@@ -18,7 +18,6 @@
#include <chrono>
#include "framework/common/debug/log.h"
#include "graph/compute_graph.h"
#include "graph/utils/tensor_utils.h"
#include "hybrid_execution_context.h"
#include "subgraph_context.h"

@@ -36,31 +35,29 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
this->num_pending_shapes_);
}

Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) {
Status ShapeInferenceState::UpdateInputShape(int idx,
const GeShape &ori_shape,
const GeShape &shape) {
if (node_item.IsInputShapeStatic(idx)) {
GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
node_item.NodeName().c_str(),
idx,
node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
target.GetShape().ToString().c_str());
shape.ToString().c_str());
return SUCCESS;
}

int64_t tensor_size = -1;
(void) TensorUtils::GetSize(target, tensor_size);
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld",
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
node_item.NodeName().c_str(),
idx,
target.GetShape().ToString().c_str(),
target.GetOriginShape().ToString().c_str(),
tensor_size);
shape.ToString().c_str(),
ori_shape.ToString().c_str());

std::lock_guard<std::mutex> lk(mu_);
auto tensor_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(tensor_desc);
tensor_desc->SetShape(target.GetShape());
tensor_desc->SetOriginShape(target.GetOriginShape());
(void) TensorUtils::SetSize(*tensor_desc, tensor_size);
tensor_desc->SetShape(shape);
tensor_desc->SetOriginShape(ori_shape);
if (--num_pending_shapes_ == 0) {
ready_cv_.notify_all();
}
@@ -113,24 +110,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
for (auto &p : shape_futures) {
auto idx = p.first;
auto &future = p.second;
GeShape shape;
GeShape ori_shape;
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx);
auto src_tensor_desc = future.GetTensorDesc();
GE_CHECK_NOTNULL(src_tensor_desc);
GE_CHK_STATUS_RET(future.Get(ori_shape, shape),
"[%s] Get shape failed. index = %u",
node_item.NodeName().c_str(),
idx);
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);

auto input_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(input_desc);
int64_t tensor_size = -1;
(void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
node_item.NodeName().c_str(),
idx,
src_tensor_desc->GetShape().ToString().c_str(),
src_tensor_desc->GetOriginShape().ToString().c_str(),
tensor_size);
input_desc->SetShape(src_tensor_desc->GetShape());
input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
(void) TensorUtils::SetSize(*input_desc, tensor_size);
shape.ToString().c_str(),
ori_shape.ToString().c_str());
auto input_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(input_desc);
input_desc->SetShape(std::move(shape));
input_desc->SetOriginShape(ori_shape);
}

return SUCCESS;
@@ -193,14 +190,5 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
return SUCCESS;
}

GeTensorDescPtr ShapeFuture::GetTensorDesc() {
GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
if (!subgraph_context_->Await(src_node_)) {
GELOGE(INTERNAL_ERROR, "cancelled");
return nullptr;
}
return src_node_->GetOpDesc()->MutableOutputDesc(src_index_);
}
} // namespace hybrid
} // namespace ge

+ 1
- 2
ge/hybrid/executor/node_state.h View File

@@ -35,7 +35,6 @@ class ShapeFuture {
ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context);
~ShapeFuture() = default;
Status Get(GeShape &ori_shape, GeShape &shape);
GeTensorDescPtr GetTensorDesc();

private:
NodePtr src_node_;
@@ -46,7 +45,7 @@ class ShapeFuture {
struct ShapeInferenceState {
explicit ShapeInferenceState(const NodeItem &node_item);

Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);
Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);

void UpdateInputShapeFuture(int idx, ShapeFuture &&future);



+ 8
- 1
ge/hybrid/executor/subgraph_executor.cc View File

@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
GE_CHECK_NOTNULL(tensor_desc);
auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
GE_CHECK_NOTNULL(node_state);
node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
}
}

@@ -268,6 +268,13 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
} else {
node_state.SetKernelTask(node_item.kernel_task);
}

GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
"[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
return SUCCESS;
}



+ 4
- 5
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -20,9 +20,12 @@
#include "graph/utils/tensor_adapter.h"
#include "graph/debug/ge_attr_define.h"
#include "hybrid/node_executor/node_executor.h"
#include "hybrid/executor//worker//shape_inference_engine.h"
#include "common/dump/dump_manager.h"
#include "common/dump/dump_op.h"
#include "common/types.h"
#include "common/ge_types.h"
#include "common/profiling/profiling_manager.h"
#include "runtime/base.h"

namespace ge {
namespace hybrid {
@@ -345,10 +348,6 @@ Status NodeDoneCallback::OnNodeDone() {
}

GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
// update output tensor sizes
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
}
// PropagateOutputs for type == DEPEND_COMPUTE
if (node_item.shape_inference_type == DEPEND_COMPUTE) {
if (graph_context_->trace_enabled) {


+ 18
- 103
ge/hybrid/executor/worker/shape_inference_engine.cc View File

@@ -17,15 +17,9 @@
#include "hybrid/executor/worker/shape_inference_engine.h"
#include "graph/shape_refiner.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "common/math/math_util.h"
#include "hybrid/node_executor/node_executor.h"

namespace ge {
namespace {
const int kAlignment = 32;
}
namespace hybrid {
ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context)
: execution_context_(execution_context),
@@ -46,9 +40,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
}

if (node_item.fused_subgraph != nullptr) {
GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
return SUCCESS;
return InferShapeForSubgraph(node_item, *node_item.fused_subgraph);
}

// Skip shape inference for node of type DEPEND_COMPUTE
@@ -71,15 +63,21 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
std::lock_guard<std::mutex> lk(mu_);
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
"Invoke InferShapeAndType failed.");
"Invoke InferShapeAndType failed.");
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
}
// Check again to make sure shape is valid after shape inference
if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) {
bool is_unknown_shape = false;
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape),
"Failed to get shape status. node = %s",
node_item.NodeName().c_str());

// update output tensor sizes after shape inference
// error if shape is still unknown and not of type DEPEND_SHAPE_RANGE
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE));
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
GE_CHK_BOOL_RET_STATUS(!is_unknown_shape,
INTERNAL_ERROR,
"[%s] Shape is still unknown after shape inference.",
node_item.NodeName().c_str());
}

GELOGD("[%s] [HybridTrace] After shape inference. Node = %s",
node_item.NodeName().c_str(),
@@ -129,6 +127,8 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
// propagate each output
for (int i = 0; i < node_item.num_outputs; ++i) {
auto output_desc = node_item.op_desc->MutableOutputDesc(i);
const auto &shape = output_desc->MutableShape();
const auto &ori_shape = output_desc->GetOriginShape();
auto &output_nodes = node_item.outputs[i];

// propagate output to all sub-inputs
@@ -149,7 +149,9 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
std::move(future));
} else {
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc));
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
ori_shape,
shape));
}
}
}
@@ -228,92 +230,5 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
}
return SUCCESS;
}

Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
std::vector<int64_t> &shape,
bool fallback_with_range) {
const auto &tensor_shape = tensor_desc.MutableShape();
if (tensor_shape.IsUnknownShape()) {
if (!fallback_with_range) {
GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
tensor_shape.ToString().c_str());
return INTERNAL_ERROR;
}

GELOGD("Calc output size by range");
std::vector<std::pair<int64_t, int64_t>> shape_range;
GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
if (shape_range.size() != shape.size()) {
GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
shape_range.size(),
shape.size());
return INTERNAL_ERROR;
}

for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) {
if (shape[dim_index] == ge::UNKNOWN_DIM) {
shape[dim_index] = shape_range[dim_index].second;
}
}

GELOGD("After canonicalization, shape = [%s], before = [%s]",
GeShape(shape).ToString().c_str(),
tensor_shape.ToString().c_str());
}

return SUCCESS;
}

Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
const std::vector<int64_t> &shape,
int64_t &tensor_size) {
GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
uint32_t type_size;
if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
GELOGE(INTERNAL_ERROR, "Failed to get data type size");
return INTERNAL_ERROR;
}

tensor_size = type_size;
for (const auto &dim : shape) {
GE_CHECK_GE(dim, 0);
GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
"Shape size overflow, shape = [%s]",
GeShape(shape).ToString().c_str());
tensor_size *= dim;
}

GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
"Tensor size is too large: %ld, shape = [%s]",
tensor_size,
GeShape(shape).ToString().c_str());
tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
return SUCCESS;
}

Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
auto op_desc = node_item.GetOpDesc();
for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
auto tensor_desc = op_desc->MutableOutputDesc(output_index);
GE_CHECK_NOTNULL(tensor_desc);
const auto &shape = tensor_desc->MutableShape();
// modify on copy
auto dims = shape.GetDims();
GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
"[%s] Failed to canonicalize shape for output %zu",
node_item.NodeName().c_str(),
output_index);

int64_t tensor_size;
GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
"[%s] Failed to calc tensor size for output %zu",
node_item.NodeName().c_str(),
output_index);
GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
(void) TensorUtils::SetSize(*tensor_desc, tensor_size);
}

return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 0
- 4
ge/hybrid/executor/worker/shape_inference_engine.h View File

@@ -34,11 +34,7 @@ class ShapeInferenceEngine {

Status PropagateOutputShapes(const NodeItem &node_item);

static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);

private:
static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range);
static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size);
static Status UpdatePeerNodeShape(const Node &node);
Status AwaitDependentNodes(NodeState &node_state);



+ 3
- 0
ge/hybrid/executor/worker/task_compile_engine.cc View File

@@ -26,6 +26,9 @@ Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext *
RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start");
GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context));

if (context->ge_context != nullptr) {
GetThreadLocalContext() = *context->ge_context;
}
shared_ptr<NodeTask> kernel_task;
auto ret = node_item.node_executor->CompileTask(*context->model, node_item.node, kernel_task);
RECORD_COMPILE_EVENT(context, node_state.GetName().c_str(), "[Compile] End");


+ 26
- 0
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -21,6 +21,7 @@
#include "graph/build/memory/var_mem_assign_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/host_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h"
@@ -921,6 +922,7 @@ Status HybridModelBuilder::InitWeights() {
}

Status HybridModelBuilder::LoadTasks() {
GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed.");
for (auto &it : hybrid_model_.node_items_) {
auto &node_item = it.second;
auto &node_ptr = node_item->node;
@@ -1557,5 +1559,29 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,

return SUCCESS;
}

Status HybridModelBuilder::CheckAicpuOpList() {
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
std::set<std::string> aicpu_optype_set;
std::set<std::string> aicpu_tf_optype_set;
for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
auto &ge_model = it.second;
GE_CHECK_NOTNULL(ge_model);
if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) {
aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
}

if (ge::AttrUtils::GetListStr(*ge_model, "needCheckTf", aicpu_tf_optype_list)) {
aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
}
}
// reset list with set
aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
"Launch check aicpu op type failed.");
return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 1
- 0
ge/hybrid/model/hybrid_model_builder.h View File

@@ -78,6 +78,7 @@ class HybridModelBuilder {
Status ParseVarOutputs(NodeItem &node_item);
Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
Status RecoverGraphUnknownFlag();
Status CheckAicpuOpList();

const char* GetGraphName() const {
return hybrid_model_.model_name_.c_str();


+ 34
- 57
ge/hybrid/model/node_item.cc View File

@@ -22,7 +22,6 @@
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/node_utils.h"
#include "hybrid/node_executor/node_executor.h"
#include "hybrid/executor/worker/shape_inference_engine.h"

namespace ge {
namespace hybrid {
@@ -48,7 +47,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
GE_CHECK_NOTNULL(dst_op_desc);
auto in_idx = node_and_anchor.second->GetIdx();
auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx);
fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc);
fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc);
GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx);
}

@@ -65,7 +64,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap
return FAILED;
}

fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc);
fused_subgraph.output_mapping.emplace(parent_index, op_desc);
return SUCCESS;
}

@@ -127,7 +126,12 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
return SUCCESS;
}

void NodeItem::ResolveOptionalInputs() {
Status NodeItem::Init() {
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
num_inputs = static_cast<int>(op_desc->GetInputsSize());
num_outputs = static_cast<int>(op_desc->GetOutputsSize());

if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
has_optional_inputs = true;
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
@@ -139,18 +143,7 @@ void NodeItem::ResolveOptionalInputs() {
}
}
}
}

Status NodeItem::InitInputsAndOutputs() {
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
num_inputs = static_cast<int>(op_desc->GetInputsSize());
num_outputs = static_cast<int>(op_desc->GetOutputsSize());
ResolveOptionalInputs();
return SUCCESS;
}

Status NodeItem::ResolveDynamicState() {
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
if (!is_dynamic) {
@@ -158,54 +151,38 @@ Status NodeItem::ResolveDynamicState() {
"[%s] Failed to get shape status.",
node->GetName().c_str());
}
return SUCCESS;
}

Status NodeItem::ResolveStaticInputsAndOutputs() {
for (int i = 0; i < num_inputs; ++i) {
const auto &input_desc = MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);
if (input_desc->MutableShape().IsUnknownShape()) {
is_input_shape_static_.push_back(false);
} else {
num_static_input_shapes++;
is_input_shape_static_.push_back(true);
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
if (is_dynamic) {
for (int i = 0; i < num_inputs; ++i) {
const auto &input_desc = MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);
if (input_desc->MutableShape().IsUnknownShape()) {
is_input_shape_static_.push_back(false);
} else {
num_static_input_shapes++;
is_input_shape_static_.push_back(true);
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
}
}
}

for (int i = 0; i < num_outputs; ++i) {
const auto &output_desc = op_desc->MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_desc);
if (output_desc->MutableShape().IsUnknownShape()) {
is_output_shape_static = false;
break;
for (int i = 0; i < num_outputs; ++i) {
const auto &output_desc = op_desc->MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_desc);
if (output_desc->MutableShape().IsUnknownShape()) {
is_output_shape_static = false;
break;
}
}
}

if (is_output_shape_static) {
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
}
return SUCCESS;
}

void NodeItem::ResolveUnknownShapeType() {
if (IsControlOp() || node_type == PARTITIONEDCALL) {
shape_inference_type = DEPEND_COMPUTE;
} else {
int32_t unknown_shape_type_val = 0;
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
}
}
if (IsControlOp() || node_type == PARTITIONEDCALL) {
shape_inference_type = DEPEND_COMPUTE;
} else {
int32_t unknown_shape_type_val = 0;
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
}

Status NodeItem::Init() {
GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
if (is_dynamic) {
ResolveUnknownShapeType();
GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
}



+ 0
- 5
ge/hybrid/model/node_item.h View File

@@ -103,11 +103,6 @@ struct NodeItem {
private:
explicit NodeItem(NodePtr node);
Status Init();
Status InitInputsAndOutputs();
void ResolveOptionalInputs();
Status ResolveDynamicState();
Status ResolveStaticInputsAndOutputs();
void ResolveUnknownShapeType();

std::vector<bool> is_input_shape_static_;
std::vector<uint32_t> input_desc_indices_;


+ 0
- 22
ge/hybrid/node_executor/task_context.cc View File

@@ -148,10 +148,6 @@ Status TaskContext::AllocateWorkspaces() {
}

Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
if (callback_fun == nullptr) {
GELOGW("[%s] Callback is NULL", GetNodeName());
return SUCCESS;
}
auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
if (ret != SUCCESS) {
GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
@@ -388,20 +384,6 @@ const char *TaskContext::GetNodeName() const {
return node_item_->NodeName().c_str();
}

void TaskContext::ReleaseInputsAndOutputs() {
for (int i = 0; i < node_item_->num_inputs; ++i) {
auto tensor = inputs_start_ + i;
tensor->Destroy();
GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
}

for (int i = 0; i < node_item_->num_outputs; ++i) {
auto tensor = outputs_start_ + i;
tensor->Destroy();
GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
}
}

void TaskContext::ReleaseInput(int index) {
auto input_tensor = MutableInput(index);
if (input_tensor != nullptr) {
@@ -474,9 +456,5 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con
const DumpProperties &TaskContext::GetDumpProperties() const {
return execution_context_->dump_properties;
}

bool TaskContext::NeedCallback() {
return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
}
} // namespace hybrid
} // namespace ge

+ 0
- 2
ge/hybrid/node_executor/task_context.h View File

@@ -50,8 +50,6 @@ class TaskContext {
ConstGeTensorDescPtr GetOutputDesc(int index) const;
GeTensorDescPtr MutableInputDesc(int index) const;
GeTensorDescPtr MutableOutputDesc(int index) const;
void ReleaseInputsAndOutputs();
bool NeedCallback();
void ReleaseInput(int index);
const TensorValue *GetInput(int index) const;
const TensorValue *GetOutput(int index) const;


+ 3
- 4
ge/ir_build/atc_ir_common.cc View File

@@ -63,19 +63,18 @@ vector<string> SplitInputShape(const std::string &input_shape) {
}
} // namespace

Status CheckInputFormat(const string &input_format) {
Status CheckInputFormat(const std::string &input_format) {
if (input_format.empty()) {
return ge::SUCCESS;
}
if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"});
GELOGE(ge::PARAM_INVALID, "input format [%s] is invalid!", input_format.c_str());
"E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format not found"});
GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str());
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
}

bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
std::string &dynamic_batch_size) {
int32_t size = 0;


+ 1
- 1
ge/ir_build/atc_ir_common.h View File

@@ -75,7 +75,7 @@ Status CheckInsertOpConfParamValid(const std::string insert_op_conf);
Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory);
Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream);
Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode);
Status CheckInputFormat(const string &input_format);
Status CheckInputFormat(const std::string &input_format);
void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips);
void EraseEndSemicolon(std::string &param);
}


+ 1
- 1
ge/offline/main.cc View File

@@ -305,7 +305,7 @@ class GFlagUtils {
" --debug_dir Set the save path of operator compilation intermediate files.\n"
"Default value: ./kernel_meta\n"
" --op_compiler_cache_dir Set the save path of operator compilation cache files.\n"
"Default value: $HOME/atc_data/kernel_cache\n"
"Default value: $HOME/atc_data\n"
" --op_compiler_cache_mode Set the operator compilation cache mode."
"Options are disable(default), enable and force(force to refresh the cache)");



+ 8
- 0
ge/proto/caffe/caffe.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software caffe, version 1.0 https://github.com/BVLC/caffe
*
* This file is included by GraphEngine so as to support model format conversion from caffe model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto2";

package domi.caffe;


+ 0
- 2
ge/proto/op_mapping_info.proto View File

@@ -15,7 +15,6 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -24,7 +23,6 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 8
- 0
ge/proto/tensorflow/attr_value.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/function.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/graph.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/graph_library.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/node_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/op_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/resource_handle.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/tensor.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/tensor_shape.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

// Protocol buffer representing the shape of tensors.

syntax = "proto3";


+ 8
- 0
ge/proto/tensorflow/types.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/proto/tensorflow/versions.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 4
- 4
ge/single_op/task/op_task.cc View File

@@ -119,11 +119,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam &param, bool keep_works
uintptr_t *arg_base = nullptr;
size_t arg_num = 0;
GetIoAddr(arg_base, arg_num);
if (arg_num != all_addresses.size()) {
GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect = %zu, but got = %zu",
if (arg_num < all_addresses.size()) {
GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu",
op_desc_->GetName().c_str(),
arg_num,
all_addresses.size());
all_addresses.size(),
arg_num);
return INTERNAL_ERROR;
}



+ 101
- 101
inc/external/acl/error_codes/rt_error_codes.h View File

@@ -1,101 +1,101 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
#ifdef __cplusplus
}
#endif
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __INC_EXTERNEL_RT_ERROR_CODES_H__
#define __INC_EXTERNEL_RT_ERROR_CODES_H__
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
static const int32_t ACL_RT_SUCCESS = 0; // success
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error
#ifdef __cplusplus
}
#endif
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save