Browse Source

synchronize with latest Ascend software suite 28 Jun 2020

tags/v0.6.0-beta
yanghaoran 5 years ago
parent
commit
aef44e1557
100 changed files with 4461 additions and 1153 deletions
  1. +9
    -0
      inc/external/ge/ge_api.h
  2. +15
    -7
      inc/external/ge/ge_api_types.h
  3. +1
    -0
      inc/external/graph/types.h
  4. +6
    -5
      inc/external/register/register_fmk_types.h
  5. +2
    -2
      inc/framework/common/debug/log.h
  6. +2
    -0
      inc/framework/common/ge_inner_error_codes.h
  7. +71
    -5
      inc/framework/common/ge_types.h
  8. +2
    -3
      inc/framework/common/helper/om_file_helper.h
  9. +1
    -0
      inc/framework/common/types.h
  10. +10
    -0
      inc/framework/executor/ge_executor.h
  11. +0
    -113
      inc/framework/ge_runtime_dummy/davinci_model.h
  12. +0
    -58
      inc/framework/ge_runtime_dummy/model_runner.h
  13. +0
    -72
      inc/framework/ge_runtime_dummy/op_info.h
  14. +0
    -394
      inc/framework/ge_runtime_dummy/task_info.h
  15. +6
    -0
      inc/framework/omg/omg.h
  16. +2
    -2
      inc/framework/omg/omg_inner_types.h
  17. +1
    -2
      inc/graph/compute_graph.h
  18. +4
    -0
      inc/graph/debug/ge_attr_define.h
  19. +3
    -0
      inc/graph/ge_tensor.h
  20. +24
    -19
      inc/graph/utils/graph_utils.h
  21. +2
    -0
      inc/graph/utils/type_utils.h
  22. +22
    -128
      src/common/graph/compute_graph.cc
  23. +1
    -1
      src/common/graph/format_refiner.cc
  24. +5
    -1
      src/common/graph/ge_attr_define.cc
  25. +5
    -0
      src/common/graph/ge_attr_value.cc
  26. +18
    -0
      src/common/graph/ge_tensor.cc
  27. +182
    -0
      src/common/graph/graph.mk
  28. +39
    -0
      src/common/graph/model_serialize.cc
  29. +3
    -0
      src/common/graph/module.mk
  30. +2
    -0
      src/common/graph/tensor.cc
  31. +18
    -18
      src/common/graph/utils/graph_utils.cc
  32. +0
    -1
      src/common/graph/utils/op_desc_utils.cc
  33. +2
    -0
      src/common/graph/utils/tensor_utils.cc
  34. +20
    -2
      src/common/graph/utils/type_utils.cc
  35. +12
    -9
      src/ge/CMakeLists.txt
  36. +26
    -3
      src/ge/client/ge_api.cc
  37. +111
    -0
      src/ge/client/module.mk
  38. +1
    -2
      src/ge/common/auth/file_saver.cc
  39. +25
    -19
      src/ge/common/ge/plugin_manager.cc
  40. +293
    -0
      src/ge/common/ge/tbe_plugin_manager.cc
  41. +73
    -0
      src/ge/common/ge/tbe_plugin_manager.h
  42. +241
    -0
      src/ge/common/ge_common.mk
  43. +3
    -3
      src/ge/common/helper/model_cache_helper.cc
  44. +1
    -1
      src/ge/common/helper/model_helper.cc
  45. +6
    -6
      src/ge/common/helper/om_file_helper.cc
  46. +8
    -2
      src/ge/common/model_saver.cc
  47. +3
    -0
      src/ge/common/module.mk
  48. +2
    -0
      src/ge/common/op/ge_op_utils.cc
  49. +16
    -8
      src/ge/common/profiling/profiling_manager.cc
  50. +1
    -0
      src/ge/common/types.cc
  51. +11
    -13
      src/ge/common/util.cc
  52. +2
    -2
      src/ge/engine_manager/dnnengine_manager.cc
  53. +1
    -0
      src/ge/executor/CMakeLists.txt
  54. +142
    -27
      src/ge/executor/ge_executor.cc
  55. +202
    -0
      src/ge/executor/module.mk
  56. +407
    -0
      src/ge/ge_inference.mk
  57. +3
    -3
      src/ge/ge_local_engine/engine/host_cpu_engine.cc
  58. +59
    -0
      src/ge/ge_local_engine/module.mk
  59. +1
    -1
      src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc
  60. +1
    -1
      src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc
  61. +429
    -0
      src/ge/ge_runner.mk
  62. +333
    -0
      src/ge/ge_train.mk
  63. +75
    -22
      src/ge/generator/ge_generator.cc
  64. +41
    -60
      src/ge/graph/build/memory/block_mem_assigner.cc
  65. +11
    -8
      src/ge/graph/build/memory/block_mem_assigner.h
  66. +98
    -0
      src/ge/graph/build/memory/module.mk
  67. +32
    -5
      src/ge/graph/build/model_builder.cc
  68. +16
    -6
      src/ge/graph/build/stream_allocator.cc
  69. +1
    -0
      src/ge/graph/build/stream_allocator.h
  70. +21
    -15
      src/ge/graph/build/task_generator.cc
  71. +71
    -0
      src/ge/graph/execute/graph_execute.cc
  72. +11
    -0
      src/ge/graph/execute/graph_execute.h
  73. +1
    -1
      src/ge/graph/label/while_label_maker.cc
  74. +90
    -0
      src/ge/graph/load/new_model_manager/aipp_utils.cc
  75. +48
    -0
      src/ge/graph/load/new_model_manager/aipp_utils.h
  76. +3
    -17
      src/ge/graph/load/new_model_manager/data_dumper.cc
  77. +265
    -10
      src/ge/graph/load/new_model_manager/davinci_model.cc
  78. +28
    -0
      src/ge/graph/load/new_model_manager/davinci_model.h
  79. +130
    -4
      src/ge/graph/load/new_model_manager/model_manager.cc
  80. +27
    -1
      src/ge/graph/load/new_model_manager/model_manager.h
  81. +1
    -0
      src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
  82. +28
    -5
      src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
  83. +2
    -0
      src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
  84. +29
    -0
      src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  85. +5
    -0
      src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
  86. +5
    -3
      src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
  87. +36
    -31
      src/ge/graph/manager/graph_manager.cc
  88. +2
    -1
      src/ge/graph/manager/graph_manager.h
  89. +1
    -0
      src/ge/graph/manager/graph_manager_utils.cc
  90. +7
    -1
      src/ge/graph/manager/graph_manager_utils.h
  91. +1
    -1
      src/ge/graph/manager/graph_var_manager.cc
  92. +1
    -1
      src/ge/graph/manager/graph_var_manager.h
  93. +5
    -1
      src/ge/graph/manager/util/rt_context_util.h
  94. +4
    -4
      src/ge/graph/optimize/graph_optimize.cc
  95. +397
    -0
      src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc
  96. +55
    -0
      src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h
  97. +3
    -2
      src/ge/graph/partition/dynamic_shape_partition.cc
  98. +12
    -17
      src/ge/graph/partition/graph_partition.cc
  99. +2
    -2
      src/ge/graph/passes/aicpu_constant_folding_pass.cc
  100. +5
    -2
      src/ge/graph/passes/atomic_addr_clean_pass.cc

+ 9
- 0
inc/external/ge/ge_api.h View File

@@ -79,6 +79,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session {

///
/// @ingroup ge_graph
/// @brief build graph in the session with specific session id
/// @param [in] graphId: graph id
/// @param [in] inputs: input data
/// @return Status result of function
///
Status BuildGraph(uint32_t graphId, const std::vector<InputTensorInfo> &inputs);

///
/// @ingroup ge_graph
/// @brief run graph in the session with specific session id asynchronously
/// @param [in] graphId: graph id
/// @param [in] inputs: input data


+ 15
- 7
inc/external/ge/ge_api_types.h View File

@@ -157,6 +157,9 @@ const std::string OUTPUT_DATATYPE = "ge.outputDatatype";
// congigure opSelectImplmode to setting op select implmode
const std::string OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";

// congigure optypelist_for_implmode to setting which op use implmode
const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";

// configure whether to enable hcom parallel by session constructor options param,
// its value should be "0" or "1", default value is "0"
const std::string HCOM_PARALLEL = "ge.hcomParallel";
@@ -258,12 +261,12 @@ using RunAsyncCallback = std::function<void(Status, std::vector<ge::OutputTensor
namespace ir_option {
static const char *const INPUT_FORMAT = "input_format";
static const char *const INPUT_SHAPE = "input_shape";
static const char *const OP_NAME_MAP = "op_name_map";
static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
static const char *const HEAD_STREAM = ge::HEAD_STREAM.c_str();
static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
static const char *const CORE_TYPE = ge::CORE_TYPE.c_str();
static const char *const SOC_VERSION = ge::SOC_VERSION.c_str();
@@ -280,16 +283,20 @@ static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
static const char *const LOG_LEVEL = "log";
static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();

// for interface: aclgrphBuildModel
const std::set<std::string> ir_builder_suppported_options = {
INPUT_FORMAT, INPUT_SHAPE, DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE,
INSERT_OP_FILE, OUTPUT_TYPE, BUFFER_OPTIMIZE, ENABLE_COMPRESS_WEIGHT,
COMPRESS_WEIGHT_CONF, OUT_NODES, INPUT_FP16_NODES, LOG_LEVEL};
INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, DYNAMIC_BATCH_SIZE,
DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY,
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES,
LOG_LEVEL};
// for interface: aclgrphBuildInitialize
const std::set<std::string> global_options = {HEAD_STREAM,
CORE_TYPE,
const std::set<std::string> global_options = {CORE_TYPE,
SOC_VERSION,
BUFFER_OPTIMIZE,
ENABLE_COMPRESS_WEIGHT,
COMPRESS_WEIGHT_CONF,
PRECISION_MODE,
EXEC_DISABLE_REUSED_MEMORY,
AUTO_TUNE_MODE,
@@ -298,7 +305,8 @@ const std::set<std::string> global_options = {HEAD_STREAM,
FUSION_SWITCH_FILE,
ENABLE_SMALL_CHANNEL,
QUANT_OPTIMIZE,
OP_SELECT_IMPL_MODE};
OP_SELECT_IMPL_MODE,
OPTYPELIST_FOR_IMPLMODE};
} // namespace ir_option
} // namespace ge



+ 1
- 0
inc/external/graph/types.h View File

@@ -143,6 +143,7 @@ enum Format {
FORMAT_DHWNC,
FORMAT_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format
FORMAT_FRACTAL_ZN_LSTM,
FORMAT_FRACTAL_Z_G,
FORMAT_RESERVED,
FORMAT_ALL
};


+ 6
- 5
inc/external/register/register_fmk_types.h View File

@@ -25,11 +25,12 @@ namespace domi {
/// @brief AI framework types
///
enum FrameworkType {
FMK_TYPE_C = 0,
FMK_TYPE_MINDSPORE = 1,
FMK_TYPE_T = 3,
FMK_TYPE_A_NN,
FMK_TYPE_RESERVED,
CAFFE = 0,
MINDSPORE = 1,
TENSORFLOW = 3,
ANDROID_NN,
ONNX,
FRAMEWORK_RESERVED,
};
} // namespace domi



+ 2
- 2
inc/framework/common/debug/log.h View File

@@ -231,7 +231,7 @@ using cce::ccStatus_t;
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
exec_expr; \
} \
}
};

// If expr is not RT_ERROR_NONE, print the log and return
#define GE_CHK_RT_RET(expr) \
@@ -259,7 +259,7 @@ using cce::ccStatus_t;
if (expr) { \
exec_expr; \
} \
}
};

// If make_shared is abnormal, print the log and execute the statement
#define GE_MAKE_SHARED(exec_expr0, exec_expr1) \


+ 2
- 0
inc/framework/common/ge_inner_error_codes.h View File

@@ -280,6 +280,8 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r

// Executor module error code definition
GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized.");
GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist.");
GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily.");

// Generator module error code definition
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed.");


+ 71
- 5
inc/framework/common/ge_types.h View File

@@ -33,11 +33,11 @@ enum RuntimeType { HOST = 0, DEVICE = 1 };
enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 };

enum FrameworkType {
FMK_TYPE_C = 0,
FMK_TYPE_MINDSPORE = 1,
FMK_TYPE_T = 3,
FMK_TYPE_A_NN,
FMK_TYPE_RESERVED,
CAFFE = 0,
MINDSPORE = 1,
TENSORFLOW = 3,
ANDROID_NN,
FRAMEWORK_RESERVED,
};

enum OpEngineType {
@@ -111,6 +111,72 @@ struct InputOutputDescInfo {
ShapeDescription shape_info;
};

// Definition of model io dims
struct InputOutputDims {
std::string name;
size_t dim_num;
uint32_t size;
std::vector<int64_t> dims;
};

// Definition of model io dims
struct OriginInputInfo {
Format format;
DataType data_type;
uint32_t dim_num;
};

// The structure of AIPP info
struct AippConfigInfo {
int8_t input_format;
int32_t src_image_size_w;
int32_t src_image_size_h;
int8_t crop;
int32_t load_start_pos_w;
int32_t load_start_pos_h;
int32_t crop_size_w;
int32_t crop_size_h;
int8_t resize;
int32_t resize_output_w;
int32_t resize_output_h;
int8_t padding;
int32_t left_padding_size;
int32_t right_padding_size;
int32_t top_padding_size;
int32_t bottom_padding_size;
int8_t csc_switch;
int8_t rbuv_swap_switch;
int8_t ax_swap_switch;
int8_t single_line_mode;
int32_t matrix_r0c0;
int32_t matrix_r0c1;
int32_t matrix_r0c2;
int32_t matrix_r1c0;
int32_t matrix_r1c1;
int32_t matrix_r1c2;
int32_t matrix_r2c0;
int32_t matrix_r2c1;
int32_t matrix_r2c2;
int32_t output_bias_0;
int32_t output_bias_1;
int32_t output_bias_2;
int32_t input_bias_0;
int32_t input_bias_1;
int32_t input_bias_2;
int32_t mean_chn_0;
int32_t mean_chn_1;
int32_t mean_chn_2;
int32_t mean_chn_3;
float min_chn_0;
float min_chn_1;
float min_chn_2;
float min_chn_3;
float var_reci_chn_0;
float var_reci_chn_1;
float var_reci_chn_2;
float var_reci_chn_3;
};

// The structure of offline Modeldata
struct ModelData {
void *model_data = nullptr; // Model binary data start addr


+ 2
- 3
inc/framework/common/helper/om_file_helper.h View File

@@ -59,15 +59,14 @@ class OmFileLoadHelper {

Status GetModelPartition(ModelPartitionType type, ModelPartition &partition);

OmFileContext context_;

private:
Status CheckModelValid(const ge::ModelData &model) const;

Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size);

bool is_inited_{false};

public:
OmFileContext context_;
};

class OmFileSaveHelper {


+ 1
- 0
inc/framework/common/types.h View File

@@ -160,6 +160,7 @@ REGISTER_OPTYPE_DECLARE(SLICE, "Slice");
REGISTER_OPTYPE_DECLARE(SLICED, "SliceD");
REGISTER_OPTYPE_DECLARE(FLOORDIV, "FloorDiv");
REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze");
REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze");
REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice");
REGISTER_OPTYPE_DECLARE(RANGE, "Range");
REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals");


+ 10
- 0
inc/framework/executor/ge_executor.h View File

@@ -96,6 +96,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
///
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);

///
/// @ingroup ge
/// @brief Set dynamic image info
@@ -110,6 +112,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
const std::vector<kAippDynamicBatchPara> &aippBatchPara,
const kAippDynamicPara &aippParms);

ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
ge::Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);

ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc);

@@ -206,6 +211,11 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {

static ge::Status ReleaseSingleOpResource(void *stream);

ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);

private:
static bool isInit_;
};


+ 0
- 113
inc/framework/ge_runtime_dummy/davinci_model.h View File

@@ -1,113 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_
#define INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_

#include <memory>
#include <vector>

#include "ge_runtime/op_info.h"
#include "ge_runtime/task_info.h"

namespace ge {
namespace model_runner {
class DavinciModel {
public:
DavinciModel(const std::vector<std::shared_ptr<TaskInfo>> &task_info_list,
const std::vector<std::shared_ptr<OpInfo>> &data_info_list,
const std::vector<std::shared_ptr<OpInfo>> &output_info_list,
const std::vector<std::shared_ptr<OpInfo>> &constant_info_list,
const std::vector<model_runner::OpInfoPtr> &variable_info_list,
const std::vector<uint32_t> &wait_active_stream_list,
const std::vector<uint32_t> &force_copy_stream_list, uint64_t mem_size = 0, uint64_t weight_size = 0,
uint64_t var_size = 0, uintptr_t logic_mem_base = 0, uintptr_t logic_weight_base = 0,
uintptr_t logic_var_base = 0, uint32_t stream_num = 0, uint32_t batch_num = 0, uint32_t event_num = 0,
int32_t priority = 0)
: task_info_list_(task_info_list),
data_info_list_(data_info_list),
output_info_list_(output_info_list),
constant_info_list_(constant_info_list),
variable_info_list_(variable_info_list),
wait_active_stream_list_(wait_active_stream_list),
force_copy_stream_list_(force_copy_stream_list),
mem_size_(mem_size),
weight_size_(weight_size),
var_size_(var_size),
logic_mem_base_(logic_mem_base),
logic_weight_base_(logic_weight_base),
logic_var_base_(logic_var_base),
stream_num_(stream_num),
batch_num_(batch_num),
event_num_(event_num),
priority_(priority) {}
~DavinciModel() {}

uint64_t GetMemSize() const { return mem_size_; }
uint64_t GetWeightSize() const { return weight_size_; }
uint64_t GetVarSize() const { return var_size_; }

uintptr_t GetLogicMemBase() const { return logic_mem_base_; }
uintptr_t GetLogicWeightBase() const { return logic_weight_base_; }
uintptr_t GetLogicVarBase() const { return logic_var_base_; }

uint32_t GetStreamNum() const { return stream_num_; }
uint32_t GetBatchNum() const { return batch_num_; }
uint32_t GetEventNum() const { return event_num_; }

const std::vector<uint32_t> &GetWaitActiveStreams() const { return wait_active_stream_list_; }
const std::vector<uint32_t> &GetForceCopyStreams() const { return force_copy_stream_list_; }

int32_t GetPriority() const { return priority_; }

const std::vector<std::shared_ptr<TaskInfo>> &GetTaskInfoList() const { return task_info_list_; }
const std::vector<std::shared_ptr<OpInfo>> &GetDataInfoList() const { return data_info_list_; }
const std::vector<std::shared_ptr<OpInfo>> &GetOutputInfoList() const { return output_info_list_; }
const std::vector<std::shared_ptr<OpInfo>> &GetConstantInfoList() const { return output_info_list_; }
const std::vector<model_runner::OpInfoPtr> &GetVariableInfoList() const { return variable_info_list_; }

private:
std::vector<std::shared_ptr<TaskInfo>> task_info_list_;
std::vector<std::shared_ptr<OpInfo>> data_info_list_;
std::vector<std::shared_ptr<OpInfo>> output_info_list_;
std::vector<std::shared_ptr<OpInfo>> constant_info_list_;
std::vector<model_runner::OpInfoPtr> variable_info_list_;

std::vector<uint32_t> wait_active_stream_list_;
std::vector<uint32_t> force_copy_stream_list_;

uint64_t mem_size_;
uint64_t weight_size_;
uint64_t var_size_;

uintptr_t logic_mem_base_;
uintptr_t logic_weight_base_;
uintptr_t logic_var_base_;

uint32_t stream_num_;
uint32_t batch_num_;
uint32_t event_num_;

int32_t priority_;

// Disable to copy constructor and assignment operator
DavinciModel &operator=(const DavinciModel &) = delete;
DavinciModel(const DavinciModel &) = delete;
};
} // namespace model_runner
} // namespace ge

#endif // INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_

+ 0
- 58
inc/framework/ge_runtime_dummy/model_runner.h View File

@@ -1,58 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_
#define INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_

#include <memory>
#include <unordered_map>
#include <vector>

#include "common/ge_inner_error_codes.h"
#include "common/ge_types.h"
#include "ge_runtime/davinci_model.h"

namespace ge {
namespace model_runner {
class RuntimeModel;

class ModelRunner {
public:
static ModelRunner &Instance();

bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id,
std::shared_ptr<DavinciModel> davinci_model, std::shared_ptr<ModelListener> listener);

const std::vector<uint32_t> &GetTaskIdList(uint32_t model_id) const;

bool UnloadModel(uint32_t model_id);

bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data);

bool GetInputOutputDescInfo(uint32_t model_id, bool zero_copy, std::vector<InputOutputDescInfo> *input_desc,
std::vector<InputOutputDescInfo> *output_desc, std::vector<uint32_t> *input_format,
std::vector<uint32_t> *output_format);

private:
ModelRunner() = default;
~ModelRunner() = default;

std::unordered_map<uint32_t, std::shared_ptr<RuntimeModel>> runtime_models_;
};
} // namespace model_runner
} // namespace ge

#endif // INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_

+ 0
- 72
inc/framework/ge_runtime_dummy/op_info.h View File

@@ -1,72 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_
#define INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_

#include <memory>
#include <string>
#include <vector>

namespace ge {
namespace model_runner {
struct TensorInfo {
int64_t GetShapeSize() const {
int64_t res = 1;
if (dims.empty()) {
return 0;
}
for (auto dim : dims) {
res *= dim;
}
return res;
}

int64_t GetDim(uint32_t index) {
if (index >= dims.size()) {
return 0;
}
return dims[index];
}

std::vector<int64_t> dims;
uint32_t datatype;
uint32_t format;
uint32_t real_dim_cnt;
uint32_t size;
bool is_output;
};

struct OpInfo {
uint32_t index;
std::string name;
std::string type;
bool var_is_broadcast;
std::vector<uintptr_t> input_addrs;
std::vector<uintptr_t> output_addrs;
std::vector<TensorInfo> input_tensors;
std::vector<TensorInfo> output_tensors;
std::vector<TensorInfo> weight_tensors;
std::vector<std::string> src_name;
std::vector<int64_t> src_index;
std::string weight_data;
};

using TensorInfoPtr = std::shared_ptr<TensorInfo>;
using OpInfoPtr = std::shared_ptr<OpInfo>;
} // namespace model_runner
} // namespace ge
#endif // INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_

+ 0
- 394
inc/framework/ge_runtime_dummy/task_info.h View File

@@ -1,394 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_
#define INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_

#include <stdint.h>
#include <functional>
#include <memory>
#include <string>
#include <vector>

#include "cce/taskdown_api.h"

namespace ge {
namespace model_runner {
enum TaskInfoType {
CCE = 0,
TBE,
AICPU,
LABEL_SET,
LABEL_SWITCH,
LABEL_GOTO,
EVENT_RECORD,
EVENT_WAIT,
FUSION_START,
FUSION_END,
HCCL,
PROFILER_TRACE,
MEMCPY_ASYNC,
STREAM_SWITCH,
STREAM_ACTIVE,
// Insert new task type here
REVSERVED = 23
};

class TaskInfo {
public:
virtual ~TaskInfo() {}
uint32_t stream_id() const { return stream_id_; }
TaskInfoType type() const { return type_; }

protected:
TaskInfo(uint32_t stream_id, TaskInfoType type) : stream_id_(stream_id), type_(type) {}

private:
uint32_t stream_id_;
TaskInfoType type_;
};

class CceTaskInfo : public TaskInfo {
public:
CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim,
const std::vector<uint8_t> &args, uint32_t args_size, const std::vector<uint8_t> &sm_desc,
const std::vector<uint8_t> &flow_table, const std::vector<uint8_t> &args_offset, bool is_flowtable)
: TaskInfo(stream_id, TaskInfoType::CCE),
ctx_(ctx),
stub_func_(stub_func),
block_dim_(block_dim),
args_(args),
args_size_(args_size),
sm_desc_(sm_desc),
flow_table_(flow_table),
args_offset_(args_offset),
is_flowtable_(is_flowtable) {}
~CceTaskInfo() override {}

cce::ccOpContext cc_context() const { return ctx_; }
std::string stub_func() const { return stub_func_; }
uint32_t block_dim() const { return block_dim_; }
const std::vector<uint8_t> &args() const { return args_; }
uint32_t args_size() const { return args_size_; }
const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
const std::vector<uint8_t> &flow_table() const { return flow_table_; }
const std::vector<uint8_t> &args_offset() const { return args_offset_; }
bool is_flowtable() const { return is_flowtable_; }

private:
cce::ccOpContext ctx_;
std::string stub_func_;
uint32_t block_dim_;
std::vector<uint8_t> args_;
uint32_t args_size_;
std::vector<uint8_t> sm_desc_;
std::vector<uint8_t> flow_table_;
std::vector<uint8_t> args_offset_;
bool is_flowtable_;
};

class TbeTaskInfo : public TaskInfo {
public:
TbeTaskInfo(uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, const std::vector<uint8_t> &args,
uint32_t args_size, const std::vector<uint8_t> &sm_desc, void *binary, uint32_t binary_size,
const std::vector<uint8_t> &meta_data, const std::vector<void *> &input_data_addrs,
const std::vector<void *> &output_data_addrs, const std::vector<void *> &workspace_addrs)
: TaskInfo(stream_id, TaskInfoType::TBE),
stub_func_(stub_func),
block_dim_(block_dim),
args_(args),
args_size_(args_size),
sm_desc_(sm_desc),
binary_(binary),
binary_size_(binary_size),
meta_data_(meta_data),
input_data_addrs_(input_data_addrs),
output_data_addrs_(output_data_addrs),
workspace_addrs_(workspace_addrs) {}
~TbeTaskInfo() override {}

const std::string &stub_func() const { return stub_func_; }
uint32_t block_dim() const { return block_dim_; }
const std::vector<uint8_t> &args() const { return args_; }
uint32_t args_size() const { return args_size_; }
const std::vector<uint8_t> &sm_desc() const { return sm_desc_; }
void *binary() const { return binary_; }
uint32_t binary_size() const { return binary_size_; }
const std::vector<uint8_t> &meta_data() const { return meta_data_; }
const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
const std::vector<void *> &workspace_addrs() const { return workspace_addrs_; }

void SetBinary(void *binary, uint32_t binary_size) {
binary_ = binary;
binary_size_ = binary_size;
}

private:
std::string stub_func_;
uint32_t block_dim_;
std::vector<uint8_t> args_;
uint32_t args_size_;
std::vector<uint8_t> sm_desc_;
void *binary_;
uint32_t binary_size_;
std::vector<uint8_t> meta_data_;
std::vector<void *> input_data_addrs_;
std::vector<void *> output_data_addrs_;
std::vector<void *> workspace_addrs_;
};

class AicpuTaskInfo : public TaskInfo {
public:
AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def,
const std::vector<void *> &input_data_addrs, const std::vector<void *> &output_data_addrs)
: TaskInfo(stream_id, TaskInfoType::AICPU),
so_name_(so_name),
kernel_name_(kernel_name),
node_def_(node_def),
input_data_addrs_(input_data_addrs),
output_data_addrs_(output_data_addrs) {}
~AicpuTaskInfo() override {}

const std::string &so_name() const { return so_name_; }
const std::string &kernel_name() const { return kernel_name_; }
const std::string &node_def() const { return node_def_; }
const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }

private:
std::string so_name_;
std::string kernel_name_;
std::string node_def_;
std::vector<void *> input_data_addrs_;
std::vector<void *> output_data_addrs_;
};

class LabelTaskInfo : public TaskInfo {
public:
uint32_t label_id() const { return label_id_; }

protected:
LabelTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t label_id)
: TaskInfo(stream_id, type), label_id_(label_id) {}
virtual ~LabelTaskInfo() override {}

uint32_t label_id_;
};

class LabelSetTaskInfo : public LabelTaskInfo {
public:
LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id)
: LabelTaskInfo(stream_id, TaskInfoType::LABEL_SET, label_id) {}
~LabelSetTaskInfo() override {}
};

class LabelSwitchTaskInfo : public LabelTaskInfo {
public:
LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_id)
: LabelTaskInfo(stream_id, TaskInfoType::LABEL_SWITCH, label_id) {}
~LabelSwitchTaskInfo() override {}
};

class LabelGotoTaskInfo : public LabelTaskInfo {
public:
LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id)
: LabelTaskInfo(stream_id, TaskInfoType::LABEL_GOTO, label_id) {}
~LabelGotoTaskInfo() override {}
};

class EventTaskInfo : public TaskInfo {
public:
uint32_t event_id() const { return event_id_; }

protected:
EventTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t event_id)
: TaskInfo(stream_id, type), event_id_(event_id) {}
virtual ~EventTaskInfo() override {}

uint32_t event_id_;
};

class EventRecordTaskInfo : public EventTaskInfo {
public:
EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id)
: EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {}
~EventRecordTaskInfo() override {}
};

class EventWaitTaskInfo : public EventTaskInfo {
public:
EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id)
: EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {}
~EventWaitTaskInfo() override {}
};

class FusionStartTaskInfo : public TaskInfo {
public:
explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {}
~FusionStartTaskInfo() override {}
};

class FusionEndTaskInfo : public TaskInfo {
public:
explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {}
~FusionEndTaskInfo() override {}
};

class HcclTaskInfo : public TaskInfo {
public:
HcclTaskInfo(uint32_t stream_id, const std::string hccl_type, void *input_data_addr, void *output_data_addr,
void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num,
const std::vector<uint8_t> &private_def, void *ops_kernel_store, int32_t count, int64_t root_id,
int64_t op_type, int64_t data_type, std::function<bool(void *, void *)> hcom_bind_model,
std::function<bool(void *)> hcom_unbind_model,
std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task)
: TaskInfo(stream_id, TaskInfoType::HCCL),
hccl_type_(hccl_type),
input_data_addr_(input_data_addr),
output_data_addr_(output_data_addr),
workspace_addr_(workspace_addr),
workspace_size_(workspace_size),
hccl_stream_num_(hccl_stream_num),
private_def_(private_def),
ops_kernel_store_(ops_kernel_store),
count_(count),
root_id_(root_id),
op_type_(op_type),
data_type_(data_type),
hcom_bind_model_(hcom_bind_model),
hcom_unbind_model_(hcom_unbind_model),
hcom_distribute_task_(hcom_distribute_task) {}
~HcclTaskInfo() override {}

const std::string &hccl_type() const { return hccl_type_; }
void *input_data_addr() const { return input_data_addr_; }
void *output_data_addr() const { return output_data_addr_; }
void *workspace_addr() const { return workspace_addr_; }
int64_t workspace_size() const { return workspace_size_; }
int64_t hccl_stream_num() const { return hccl_stream_num_; }
const std::vector<uint8_t> &private_def() const { return private_def_; }
void *ops_kernel_store() const { return ops_kernel_store_; }
int32_t count() const { return count_; }
int64_t root_id() const { return root_id_; }
int64_t op_type() const { return op_type_; }
int64_t data_type() const { return data_type_; }
std::function<bool(void *, void *)> hcom_bind_model() const { return hcom_bind_model_; }
std::function<bool(void *)> hcom_unbind_model() const { return hcom_unbind_model_; }
std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task() const {
return hcom_distribute_task_;
}

private:
std::string hccl_type_;
void *input_data_addr_;
void *output_data_addr_;
void *workspace_addr_;
int64_t workspace_size_;
int64_t hccl_stream_num_;
std::vector<uint8_t> private_def_;
void *ops_kernel_store_;
int32_t count_;
int64_t root_id_;
int64_t op_type_;
int64_t data_type_;
std::function<bool(void *, void *)> hcom_bind_model_;
std::function<bool(void *)> hcom_unbind_model_;
std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task_;
};

class ProfilerTraceTaskInfo : public TaskInfo {
public:
ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat)
: TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {}
~ProfilerTraceTaskInfo() override {}

uint64_t log_id() const { return log_id_; }
bool notify() const { return notify_; }
uint32_t flat() const { return flat_; }

private:
uint64_t log_id_;
bool notify_;
uint32_t flat_;
};

class MemcpyAsyncTaskInfo : public TaskInfo {
public:
MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind)
: TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC),
dst_(dst),
dst_max_(dst_max),
src_(src),
count_(count),
kind_(kind) {}
~MemcpyAsyncTaskInfo() override {}

void *dst() const { return dst_; }
uint64_t dst_max() const { return dst_max_; }
void *src() const { return src_; }
uint64_t count() const { return count_; }
uint32_t kind() const { return kind_; }

private:
void *dst_;
uint64_t dst_max_;
void *src_;
uint64_t count_;
int32_t kind_;
};

class StreamSwitchTaskInfo : public TaskInfo {
public:
StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond,
int64_t data_type)
: TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH),
true_stream_id_(true_stream_id),
input_addr_(input_addr),
value_addr_(value_addr),
cond_(cond),
data_type_(data_type) {}
~StreamSwitchTaskInfo() override {}

int64_t true_stream_id() const { return true_stream_id_; }
void *input_addr() const { return input_addr_; }
void *value_addr() const { return value_addr_; }
int64_t cond() const { return cond_; }
int64_t data_type() const { return data_type_; }

private:
int64_t true_stream_id_;
void *input_addr_;
void *value_addr_;
int64_t cond_;
int64_t data_type_;
};

class StreamActiveTaskInfo : public TaskInfo {
public:
StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id)
: TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {}
~StreamActiveTaskInfo() override {}

uint32_t active_stream_id() const { return active_stream_id_; }

private:
uint32_t active_stream_id_;
};
} // namespace model_runner
} // namespace ge

#endif // INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_

+ 6
- 0
inc/framework/omg/omg.h View File

@@ -23,6 +23,7 @@
#include <vector>
#include "framework/common/types.h"
#include "framework/omg/omg_inner_types.h"
#include "framework/omg/parser/parser_inner_ctx.h"
#include "proto/ge_ir.pb.h"
#include "proto/om.pb.h"

@@ -99,6 +100,11 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const

Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
std::vector<std::string> &output_nodes_name);

void UpdateOmgCtxWithParserCtx();

void UpdateParserCtxWithOmgCtx();

} // namespace ge

namespace domi {


+ 2
- 2
inc/framework/omg/omg_inner_types.h View File

@@ -31,7 +31,7 @@
using domi::DOMI_TENSOR_ND;
using domi::DOMI_TENSOR_RESERVED;
using domi::domiTensorFormat_t;
using domi::FMK_TYPE_RESERVED;
using domi::FRAMEWORK_RESERVED;
using domi::FrameworkType;
using std::map;
using std::string;
@@ -100,7 +100,7 @@ struct OmgContext {
std::string ddk_version;
// preferential format used by the entire network
domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED;
domi::FrameworkType type = domi::FMK_TYPE_RESERVED;
domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
RunMode run_mode = ONLY_PRE_CHECK;
bool train_flag = false;
// whether to use FP16 high precision


+ 1
- 2
inc/graph/compute_graph.h View File

@@ -80,6 +80,7 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
Vistor<NodePtr> GetOutputNodes() const;

NodePtr FindNode(const std::string &name) const;
NodePtr FindFirstNodeMatchType(const std::string &name) const;
// AddNode with NodePtr
NodePtr AddNode(NodePtr node);
NodePtr AddNode(OpDescPtr op);
@@ -235,8 +236,6 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
std::vector<NodePtr> &stack);
graphStatus BFSTopologicalSorting(std::vector<NodePtr> &node_vec, std::map<NodePtr, uint32_t> &map_in_edge_num,
std::deque<NodePtr> &stack);
graphStatus BFSTopologicalSortingWithGroup(std::vector<NodePtr> &node_vec,
std::map<NodePtr, uint32_t> &map_in_edge_num, std::deque<NodePtr> &stack);
graphStatus CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num,
std::map<string, NodePtr> &breadth_node_map);
graphStatus TopologicalSortingGraph();


+ 4
- 0
inc/graph/debug/ge_attr_define.h View File

@@ -136,6 +136,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP_CONV_OP;

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS;

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME;

@@ -176,6 +179,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS;

// to be deleted
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED;


+ 3
- 0
inc/graph/ge_tensor.h View File

@@ -102,6 +102,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc : public AttrH
Format GetOriginFormat() const;
void SetOriginFormat(Format originFormat);

void SetName(const std::string &name);
const std::string GetName() const;

DataType GetDataType() const;
void SetDataType(DataType dt);



+ 24
- 19
inc/graph/utils/graph_utils.h View File

@@ -22,6 +22,7 @@
#include <map>
#include <string>
#include <vector>
#include <list>
#include "graph/anchor.h"
#include "graph/node.h"
#include "graph/compute_graph.h"
@@ -111,21 +112,25 @@ enum IOType { kIn, kOut };

struct NodeIndexIO {
NodeIndexIO(ge::NodePtr node, uint32_t index, IOType io_type)
: node(std::move(node)), index(index), io_type(io_type) {}
: node_(std::move(node)), index_(index), io_type_(io_type) {
if (node_ != nullptr) {
value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_);
}
}
NodeIndexIO(ge::NodePtr node, int index, IOType io_type)
: node(std::move(node)), index(static_cast<uint32_t>(index)), io_type(io_type) {}
: node_(std::move(node)), index_(static_cast<uint32_t>(index)), io_type_(io_type) {
if (node_ != nullptr) {
value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_);
}
}
~NodeIndexIO() {}

NodePtr node = nullptr;
uint32_t index = 0;
IOType io_type = kOut;
NodePtr node_ = nullptr;
uint32_t index_ = 0;
IOType io_type_ = kOut;
std::string value_;

std::string ToString() const {
if ((node == nullptr) || (node->GetOwnerComputeGraph() == nullptr)) {
return "";
}
return node->GetName() + (io_type == kOut ? "_out_" : "_in_") + std::to_string(index);
}
std::string ToString() const { return value_; }
};

class GraphUtils {
@@ -310,7 +315,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus GetRefMapping(const ComputeGraphPtr &graph,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol);

///
@@ -340,7 +345,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus HandleInAnchorMapping(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol);

///
@@ -351,7 +356,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus HandleOutAnchorMapping(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol);

///
@@ -362,7 +367,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus HandleSubgraphInput(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol);

///
@@ -373,7 +378,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus HandleMergeInput(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol);

///
@@ -384,7 +389,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus HandleSubgraphOutput(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol);

///
@@ -397,7 +402,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol);

///
@@ -409,7 +414,7 @@ class GraphUtils {
/// @return success: GRAPH_SUCESS
///
static graphStatus UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol);

///


+ 2
- 0
inc/graph/utils/type_utils.h View File

@@ -25,6 +25,7 @@
#include "graph/types.h"
#include "graph/usr_types.h"
#include "register/register_types.h"
#include "external/register/register_fmk_types.h"

namespace ge {
class TypeUtils {
@@ -39,6 +40,7 @@ class TypeUtils {
static Format SerialStringToFormat(const std::string &str);
static Format DataFormatToFormat(const std::string &str);
static Format DomiFormatToFormat(domi::domiTensorFormat_t domi_format);
static std::string FmkTypeToSerialString(domi::FrameworkType fmk_type);

static graphStatus Usr2DefQuantizeFactorParams(const UsrQuantizeFactorParams &usr, QuantizeFactorParams &def);
static graphStatus Def2UsrQuantizeFactorParams(const QuantizeFactorParams &def, UsrQuantizeFactorParams &usr);


+ 22
- 128
src/common/graph/compute_graph.cc View File

@@ -48,63 +48,6 @@ bool IsUseBFS() {
}
return false;
}
bool IsTailingOptimization() {
string is_tailing_optimization_option;
auto ret = GetContext().GetOption(ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, is_tailing_optimization_option);
if (ret == GRAPH_SUCCESS) {
GELOGI("Option ge.exec.isTailingOptimization is %s", is_tailing_optimization_option.c_str());
// "1" means it's True from frontend option
return is_tailing_optimization_option == "1";
}
GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default.");
return false;
}
bool IsFusedNode(const NodePtr &node) {
bool is_fused_node = false;
AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_FLAG, is_fused_node);
return is_fused_node;
}
string GetGroupId(const NodePtr &node) {
string group_id;
AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id);
return group_id;
}
bool IsGroupEnd(const NodePtr &node) {
if (GetGroupId(node).empty()) {
return false;
}
if (node->GetOutDataNodesSize() == 0) {
return true;
}
for (const auto &out_data_node : node->GetOutDataNodes()) {
if (IsFusedNode(out_data_node)) {
return true;
}
}
return false;
}
void SplitNodeToStack(const std::map<string, NodePtr> &breadth_node_map, string current_group_id,
std::vector<NodePtr> &stack_input, std::deque<NodePtr> &group_stack, std::deque<NodePtr> &stack) {
for (const auto &name_node : breadth_node_map) {
// group first
string group_id;
if (AttrUtils::GetStr(name_node.second->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id)) {
GELOGI("current node %s, group id: %s , current group id %s", name_node.second->GetName().c_str(),
group_id.c_str(), current_group_id.c_str());
if (!current_group_id.empty() && group_id != current_group_id) {
GELOGI("node go to input_stack back: %s", name_node.second->GetName().c_str());
(void)stack_input.insert(stack_input.begin(), name_node.second);
} else {
current_group_id = group_id;
GELOGI("node go to group_stack: %s", name_node.second->GetName().c_str());
(void)group_stack.push_front(name_node.second);
}
continue;
}
GELOGI("node go to stack: %s ", name_node.second->GetName().c_str());
(void)stack.push_front(name_node.second);
}
}
} // namespace

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::ComputeGraph(const std::string &name)
@@ -193,6 +136,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::FindNode(co
return nullptr;
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr
ComputeGraph::FindFirstNodeMatchType(const std::string &name) const {
for (const auto &node : nodes_) {
if (node == nullptr) {
continue;
}
if (node->GetType() == name) {
return node;
}
}
return nullptr;
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphAttrsAreEqual(
const ComputeGraph &r_graph) const {
// ProtoMsgOwner <::google::protobuf::Message> is temporarily ignored
@@ -642,9 +598,9 @@ ComputeGraph::UpdateInputMapping(const std::map<uint32_t, uint32_t> &input_mappi
///
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
ComputeGraph::UpdateOutputMapping(const std::map<uint32_t, uint32_t> &output_mapping) {
NodePtr net_output = FindNode(NODE_NAME_NET_OUTPUT);
NodePtr net_output = FindFirstNodeMatchType(NETOUTPUT);
if (net_output == nullptr) {
GE_LOGE("UpdateOutputMapping failed: node %s not exist in graph.", NODE_NAME_NET_OUTPUT);
GE_LOGE("UpdateOutputMapping failed: node type %s not exist in graph.", NETOUTPUT);
return GRAPH_FAILED;
}
OpDescPtr op_desc = net_output->GetOpDesc();
@@ -799,65 +755,6 @@ graphStatus ComputeGraph::BFSTopologicalSorting(std::vector<NodePtr> &node_vec,
return GRAPH_SUCCESS;
}

graphStatus ComputeGraph::BFSTopologicalSortingWithGroup(std::vector<NodePtr> &node_vec,
std::map<NodePtr, uint32_t> &map_in_edge_num,
std::deque<NodePtr> &stack) {
GELOGI("Runing_Bfs_Sort_With_Group");
std::string current_group_id;
std::vector<NodePtr> stack_input;
std::deque<NodePtr> group_stack;
std::deque<NodePtr> fused_node_stack;
std::map<string, NodePtr> breadth_node_map;
// Record the number of non data nodes but no input nodes
GE_CHK_BOOL_EXEC(SortNodes(stack_input, map_in_edge_num) == GRAPH_SUCCESS, return GRAPH_FAILED, "sort nodes failed");

// Only data nodes here
while (!stack_input.empty() || !stack.empty() || !group_stack.empty()) {
NodePtr node = nullptr;
if (!group_stack.empty()) {
// Traversal node in group has priority
node = group_stack.back();
group_stack.pop_back();
} else if (!stack.empty()) {
node = stack.back();
stack.pop_back();
} else {
node = stack_input.back();
stack_input.pop_back();
}

if (IsFusedNode(node) && current_group_id.empty()) {
current_group_id = node->GetName();
}
if (GetGroupId(node).empty() || GetGroupId(node) == current_group_id) {
node_vec.push_back(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str());
} else {
if (current_group_id.empty()) {
current_group_id = GetGroupId(node);
node_vec.push_back(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str());
} else {
GELOGI("current group id is %s ,node go to input_stack back: %s", current_group_id.c_str(),
node->GetName().c_str());
(void)stack_input.insert(stack_input.begin(), node);
continue;
}
}
CollectBreadthOutNode(node, map_in_edge_num, breadth_node_map);
SplitNodeToStack(breadth_node_map, current_group_id, stack_input, group_stack, stack);
breadth_node_map.clear();
// check the end of group
if (IsGroupEnd(node)) {
GELOGI("Current node %s is end of group %s.", node->GetName().c_str(), current_group_id.c_str());
current_group_id = "";
}
}
return GRAPH_SUCCESS;
}

graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num,
std::map<string, NodePtr> &breadth_node_map) {
for (const auto &anchor : node->GetAllOutDataAnchors()) {
@@ -907,7 +804,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::Topolog
}

std::vector<std::shared_ptr<ComputeGraph>> subgraphs;
(void)AllGraphNodes(subgraphs);
auto nodes = AllGraphNodes(subgraphs);
for (size_t i = 0; i < nodes.size(); i++) {
NodePtr node = nodes.at(i); // [node: should not be null]
node->GetOpDesc()->SetId(i); // [node->GetOpDesc(): should not be null]
}
if (sub_graph_.size() != subgraphs.size()) { // Graph Partition use subgraph, Keep original
GELOGW("Keep original subgraph for graph size %zu not equal %zu.", sub_graph_.size(), subgraphs.size());
return SUCCESS;
@@ -920,17 +821,10 @@ graphStatus ComputeGraph::TopologicalSortingGraph() {
std::vector<NodePtr> node_vec;
std::map<NodePtr, uint32_t> map_in_edge_num;
bool use_BFS = IsUseBFS();
bool is_tailing_optimization = IsTailingOptimization();
if (use_BFS) {
std::deque<NodePtr> stack;
if (is_tailing_optimization) {
if (BFSTopologicalSortingWithGroup(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) {
return GRAPH_FAILED;
}
} else {
if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) {
return GRAPH_FAILED;
}
if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) {
return GRAPH_FAILED;
}
} else {
std::vector<NodePtr> stack;


+ 1
- 1
src/common/graph/format_refiner.cc View File

@@ -41,7 +41,7 @@ using namespace ge;
using namespace std;
namespace ge {
namespace {
static const std::unordered_set<string> kChangeDimNodes = {RESHAPE, PERMUTE, EXPANDDIMS, SQUEEZE};
static const std::unordered_set<string> kChangeDimNodes = {PERMUTE, EXPANDDIMS, SQUEEZE};
static bool net_format_is_nd = true;
static Format g_user_set_format = FORMAT_ND;
static bool is_first_infer = true;


+ 5
- 1
src/common/graph/ge_attr_define.cc View File

@@ -118,6 +118,9 @@ const std::string ATTR_NAME_NAN_OPT = "nan_opt";
const std::string ATTR_NAME_AIPP = "aipp";
const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp";

const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs";
const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs";

const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id";
const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name";

@@ -150,6 +153,7 @@ const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG = "need_stream_cycle_event";
const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id";
const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start";
const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size";
const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims";

// To be deleted
const std::string ATTR_TO_BE_DELETED = "to_be_deleted";
@@ -1000,7 +1004,7 @@ const std::string ATTR_NAME_FUSION_TYPE_LIST = "_fusion_type_list";
const std::string ATTR_NAME_VALID_INPUT_SHAPE_LIST_LIST = "_valid_input_shape_list_list";
const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_list_list";
const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list";
const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_input_offset_list_list";
const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list";

// used for Horovod
const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id";


+ 5
- 0
src/common/graph/ge_attr_value.cc View File

@@ -1233,6 +1233,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc(
GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed.");
}
}

if (!op_desc->output_name_idx_.empty()) {
op_desc->output_name_idx_.clear();
}

return op_desc;
}



+ 18
- 0
src/common/graph/ge_tensor.cc View File

@@ -464,6 +464,24 @@ void GeTensorDesc::SetFormat(Format format) {
}
}

void GeTensorDesc::SetName(const std::string &name) {
auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg();
if (tensor_descriptor_msg != nullptr) {
tensor_descriptor_msg->set_name(name);
return;
}
GELOGW("[SetName]tensor_descriptor_msg is null.");
}

const std::string GeTensorDesc::GetName() const {
auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg();
if (tensor_descriptor_msg != nullptr) {
return tensor_descriptor_msg->name();
}
GELOGW("[GetName]tensor_descriptor_msg is null.");
return "";
}

Format GeTensorDesc::GetOriginFormat() const {
std::string origin_format_str;
if (!AttrUtils::GetStr(this, TENSOR_UTILS_ORIGIN_FORMAT, origin_format_str)) {


+ 182
- 0
src/common/graph/graph.mk View File

@@ -0,0 +1,182 @@
LOCAL_PATH := $(call my-dir)

COMMON_LOCAL_SRC_FILES := \
./proto/om.proto \
./proto/ge_ir.proto \
./proto/ge_onnx.proto \
./proto/insert_op.proto \
./proto/task.proto \
./proto/fwk_adapter.proto \
./proto/op_mapping_info.proto \
./anchor.cc \
./ge_attr_value.cc \
./attr_value.cc \
./buffer.cc \
./compute_graph.cc \
./graph.cc \
./inference_context.cc \
./shape_refiner.cc \
./format_refiner.cc \
./ref_relation.cc \
./model.cc \
./model_serialize.cc \
./node.cc \
./op_desc.cc \
./operator.cc \
./operator_factory.cc \
./operator_factory_impl.cc \
./ge_attr_define.cc \
./ge_tensor.cc \
./detail/attributes_holder.cc \
./utils/anchor_utils.cc \
./utils/graph_utils.cc \
./utils/ge_ir_utils.cc \
./utils/node_utils.cc \
./utils/op_desc_utils.cc \
./utils/type_utils.cc \
./utils/tensor_utils.cc \
./tensor.cc \
./debug/graph_debug.cc \
./opsproto/opsproto_manager.cc \
../ops/op_imp.cpp \
option/ge_context.cc \
option/ge_local_context.cc \
./runtime_inference_context.cc \

COMMON_LOCAL_C_INCLUDES := \
proto/om.proto \
proto/ge_ir.proto \
proto_inner/ge_onnx.proto \
proto/insert_op.proto \
proto/task.proto \
proto/fwk_adapter.proto \
proto/op_mapping_info.proto \
inc \
inc/external \
inc/external/graph \
inc/graph \
inc/common \
common \
common/graph \
third_party/protobuf/include \
libc_sec/include \
ops/built-in/op_proto/inc \


#compiler for host
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph

LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2
LOCAL_CPPFLAGS += -fexceptions

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \

LOCAL_LDFLAGS := -lrt -ldl

LOCAL_MULTILIB := 64
LOCAL_PROPRIETARY_MODULE := true

include $(BUILD_HOST_SHARED_LIBRARY)


#compiler for device
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph

LOCAL_CFLAGS += -O2

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \

LOCAL_LDFLAGS := -lrt -ldl

ifeq ($(device_os),android)
LOCAL_LDFLAGS := -ldl
endif

LOCAL_MULTILIB := 64
LOCAL_PROPRIETARY_MODULE := true

include $(BUILD_SHARED_LIBRARY)


# compile for ut/st
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph

LOCAL_CFLAGS +=

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \

LOCAL_LDFLAGS := -lrt -ldl

LOCAL_MULTILIB := 64
LOCAL_PROPRIETARY_MODULE := true

include $(BUILD_LLT_SHARED_LIBRARY)


#compiler for host static lib
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph

LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2
LOCAL_CPPFLAGS += -fexceptions

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

LOCAL_STATIC_LIBRARIES := \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libslog \

LOCAL_LDFLAGS := -lrt -ldl

LOCAL_MULTILIB := 64
LOCAL_PROPRIETARY_MODULE := true

include $(BUILD_HOST_STATIC_LIBRARY)

#compiler for device static lib
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph

LOCAL_CFLAGS += -O2

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

LOCAL_STATIC_LIBRARIES := \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libslog \

LOCAL_LDFLAGS := -lrt -ldl

LOCAL_MULTILIB := 64
LOCAL_PROPRIETARY_MODULE := true

include $(BUILD_STATIC_LIBRARY)

+ 39
- 0
src/common/graph/model_serialize.cc View File

@@ -130,6 +130,16 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op
for (const std::string &name : op_desc->GetSubgraphInstanceNames()) {
op_def_proto->add_subgraph_name(name);
}

proto::AttrDef key;
proto::AttrDef value;
for (auto &item : op_desc->output_name_idx_) {
key.mutable_list()->add_s(item.first);
value.mutable_list()->add_i(item.second);
}
auto op_desc_attr = op_def_proto->mutable_attr();
op_desc_attr->insert({"_output_name_key", key});
op_desc_attr->insert({"_output_name_value", value});
}
return true;
}
@@ -228,6 +238,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali
}

bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) {
std::vector<string> key;
std::vector<uint32_t> value;
if (op_def_proto.attr().count("_output_name_key") > 0) {
auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list();
for (const auto &item_s : output_name_key_list.s()) {
key.push_back(item_s);
}
auto op_desc_attr = op_def_proto.mutable_attr();
op_desc_attr->erase("_output_name_key");
}
if (op_def_proto.attr().count("_output_name_value") > 0) {
auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list();
for (const auto &item_i : output_name_value_list.i()) {
value.push_back(static_cast<uint32_t>(item_i));
}
auto op_desc_attr = op_def_proto.mutable_attr();
op_desc_attr->erase("_output_name_value");
}

op_desc = std::shared_ptr<OpDesc>(new (std::nothrow) OpDesc(protobuf_owner_, &op_def_proto));
GE_CHK_BOOL_EXEC(op_desc != nullptr, return false, "op_desc is nullptr.");

@@ -253,6 +282,16 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d
op_desc->SetSubgraphInstanceName(graph_index++, name);
}

if (key.size() != 0) {
if (key.size() != value.size()) {
GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size());
} else {
for (uint32_t i = 0; i < key.size(); ++i) {
op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key.at(i), value.at(i)));
}
}
}

return true;
}



+ 3
- 0
src/common/graph/module.mk View File

@@ -0,0 +1,3 @@
LOCAL_PATH := $(call my-dir)

include $(LOCAL_PATH)/graph.mk

+ 2
- 0
src/common/graph/tensor.cc View File

@@ -589,6 +589,7 @@ GeTensorDesc TensorAdapter::TensorDesc2GeTensorDesc(const TensorDesc &tensor_des
tensor_desc.GetDataType());
ge_tensor_desc.SetOriginShape(GeShape(tensor_desc.GetOriginShape().GetDims()));
ge_tensor_desc.SetOriginFormat(tensor_desc.GetOriginFormat());
ge_tensor_desc.SetName(tensor_desc.GetName());
std::vector<std::pair<int64_t, int64_t>> shape_range;
auto status = tensor_desc.GetShapeRange(shape_range);
if (status != GRAPH_SUCCESS) {
@@ -613,6 +614,7 @@ TensorDesc TensorAdapter::GeTensorDesc2TensorDesc(const GeTensorDesc &ge_tensor_
ge_tensor_desc.GetDataType());
tensor_desc.SetOriginShape(Shape(ge_tensor_desc.GetOriginShape().GetDims()));
tensor_desc.SetOriginFormat(ge_tensor_desc.GetOriginFormat());
tensor_desc.SetName(ge_tensor_desc.GetName());
std::vector<std::pair<int64_t, int64_t>> shape_range;
auto status = ge_tensor_desc.GetShapeRange(shape_range);
if (status != GRAPH_SUCCESS) {


+ 18
- 18
src/common/graph/utils/graph_utils.cc View File

@@ -1336,7 +1336,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraphPtr GraphUtils::FindR
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::GetRefMapping(const ComputeGraphPtr &graph,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol) {
GE_CHECK_NOTNULL(graph);
for (auto &node : graph->GetAllNodes()) {
@@ -1384,7 +1384,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr GraphUtils::FindNodeFromA
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol) {
GE_CHECK_NOTNULL(node);

@@ -1402,7 +1402,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
}

for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn);
NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn);
OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
if (peer_out_anchor == nullptr) {
std::string symbol = cur_node_info.ToString();
@@ -1410,7 +1410,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
symbol_to_anchors[symbol] = {cur_node_info};
anchor_to_symbol[symbol] = symbol;
} else {
NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
GE_LOGE("Update symbol mapping failed.");
return GRAPH_FAILED;
@@ -1429,18 +1429,18 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node,
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol) {
GE_CHECK_NOTNULL(node);
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
NodeIndexIO cur_node_info = NodeIndexIO(node, out_data_anchor->GetIdx(), kOut);
NodeIndexIO cur_node_info(node, out_data_anchor->GetIdx(), kOut);
if (anchor_to_symbol.find(cur_node_info.ToString()) != anchor_to_symbol.end()) {
continue;
}

int32_t reuse_in_index = -1;
if (IsRefFromInput(out_data_anchor, reuse_in_index)) {
NodeIndexIO exist_node_info = NodeIndexIO(node, reuse_in_index, kIn);
NodeIndexIO exist_node_info(node, reuse_in_index, kIn);
if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
GE_LOGE("Update symbol mapping failed.");
return GRAPH_FAILED;
@@ -1448,7 +1448,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node,
} else {
std::string symbol = cur_node_info.ToString();
GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str());
symbol_to_anchors.emplace(std::make_pair(symbol, std::vector<NodeIndexIO>{cur_node_info}));
symbol_to_anchors.emplace(std::make_pair(symbol, std::list<NodeIndexIO>{cur_node_info}));
anchor_to_symbol.emplace(std::make_pair(symbol, symbol));
}
}
@@ -1464,7 +1464,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node,
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
@@ -1482,8 +1482,8 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node,
OutDataAnchorPtr peer_out_anchor = parent_in_anchor->GetPeerOutAnchor();
if (peer_out_anchor != nullptr) {
// Data has and only has one input
NodeIndexIO cur_node_info = NodeIndexIO(node, 0, kIn);
NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
NodeIndexIO cur_node_info(node, 0, kIn);
NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
GE_LOGE("Update symbol mapping failed.");
return GRAPH_FAILED;
@@ -1501,7 +1501,7 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node,
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::HandleMergeInput(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol) {
GE_CHECK_NOTNULL(node);
std::vector<NodeIndexIO> exist_node_infos;
@@ -1574,7 +1574,7 @@ graphStatus GraphUtils::HandleMergeInput(const NodePtr &node,
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol) {
GE_CHECK_NOTNULL(node);
ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph();
@@ -1595,8 +1595,8 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
}
GE_CHECK_NOTNULL(parent_node->GetOutDataAnchor(index));
// Union symbol of peer_out_anchor & parent_out_anchor
NodeIndexIO peer_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
NodeIndexIO parent_node_info = NodeIndexIO(parent_node, index, kOut);
NodeIndexIO peer_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut);
NodeIndexIO parent_node_info(parent_node, index, kOut);
std::string symbol;
if ((UnionSymbolMapping(peer_node_info, parent_node_info, symbol_to_anchors, anchor_to_symbol, symbol) !=
GRAPH_SUCCESS) ||
@@ -1606,7 +1606,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
return GRAPH_FAILED;
}

NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn);
NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn);
GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str());
symbol_to_anchors[symbol].emplace_back(cur_node_info);
anchor_to_symbol.emplace(std::make_pair(cur_node_info.ToString(), symbol));
@@ -1625,7 +1625,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node,
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol, std::string &symbol) {
std::string symbol1 = anchor_to_symbol[exist_node_info1.ToString()];
std::string symbol2 = anchor_to_symbol[exist_node_info2.ToString()];
@@ -1675,7 +1675,7 @@ graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1,
/// @return success: GRAPH_SUCESS
///
graphStatus GraphUtils::UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info,
std::map<std::string, std::vector<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::list<NodeIndexIO>> &symbol_to_anchors,
std::map<std::string, std::string> &anchor_to_symbol) {
auto iter1 = anchor_to_symbol.find(exist_node_info.ToString());
if (iter1 == anchor_to_symbol.end()) {


+ 0
- 1
src/common/graph/utils/op_desc_utils.cc View File

@@ -524,7 +524,6 @@ OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) {
return nullptr;
}

GE_CHK_BOOL_EXEC(const_opdesc != nullptr, return nullptr, "const_opdesc is nullptr!");
CHECK_FALSE_EXEC(SetWeights(const_opdesc, tensor_ptr) == ge::GRAPH_SUCCESS, return nullptr);

const_opdesc->SetType(CONSTANT);


+ 2
- 0
src/common/graph/utils/tensor_utils.cc View File

@@ -273,6 +273,7 @@ static graphStatus CalcTensorElementCnt(const std::vector<int64_t> &dims, Format
case FORMAT_FRACTAL_Z:
graph_status = CalcElementCntOfFractalZ(dims, data_type, element_cnt);
break;
case FORMAT_NC1HWC0_C04:
case FORMAT_FRACTAL_NZ:
case FORMAT_FRACTAL_ZZ:
case FORMAT_NDHWC:
@@ -283,6 +284,7 @@ static graphStatus CalcTensorElementCnt(const std::vector<int64_t> &dims, Format
case FORMAT_FRACTAL_Z_3D_TRANSPOSE:
case FORMAT_NDC1HWC0:
case FORMAT_FRACTAL_Z_C04:
case FORMAT_FRACTAL_ZN_LSTM:
graph_status = CalcElementCntByDims(dims, element_cnt);
break;
default:


+ 20
- 2
src/common/graph/utils/type_utils.cc View File

@@ -59,6 +59,7 @@ static const std::map<Format, std::string> kFormatToStringMap = {
{FORMAT_CN, "CN"},
{FORMAT_NC, "NC"},
{FORMAT_FRACTAL_ZN_LSTM, "FRACTAL_ZN_LSTM"},
{FORMAT_FRACTAL_Z_G, "FRACTAL_Z_G"},
{FORMAT_RESERVED, "FORMAT_RESERVED"},
{FORMAT_ALL, "ALL"}};

@@ -98,8 +99,9 @@ static const std::unordered_set<std::string> kInternalFormat = {"NC1HWC0",
"FRACTAL_NZ",
"NDC1HWC0",
"FORMAT_FRACTAL_Z_3D",
"FORMAT_FRACTAL_Z_3D_TRANSPOSE"
"FORMAT_FRACTAL_ZN_LSTM"};
"FORMAT_FRACTAL_Z_3D_TRANSPOSE",
"FORMAT_FRACTAL_ZN_LSTM",
"FORMAT_FRACTAL_Z_G"};

static const std::map<std::string, Format> kDataFormatMap = {
{"NCHW", FORMAT_NCHW}, {"NHWC", FORMAT_NHWC}, {"NDHWC", FORMAT_NDHWC}, {"NCDHW", FORMAT_NCDHW}, {"ND", FORMAT_ND}};
@@ -143,6 +145,7 @@ static const std::map<std::string, Format> kStringToFormatMap = {
{"CN", FORMAT_CN},
{"NC", FORMAT_NC},
{"FRACTAL_ZN_LSTM", FORMAT_FRACTAL_ZN_LSTM},
{"FRACTAL_Z_G", FORMAT_FRACTAL_Z_G},
{"FORMAT_RESERVED", FORMAT_RESERVED},
{"ALL", FORMAT_ALL}};

@@ -235,6 +238,11 @@ static const std::map<ge::DataType, uint32_t> kDataTypeToLength = {
{DT_RESOURCE, sizeof(uint64_t)},
};

static const std::map<domi::FrameworkType, std::string> kFmkTypeToString = {
{domi::CAFFE, "caffe"}, {domi::MINDSPORE, "mindspore"}, {domi::TENSORFLOW, "tensorflow"},
{domi::ANDROID_NN, "android_nn"}, {domi::ONNX, "onnx"}, {domi::FRAMEWORK_RESERVED, "framework_reserved"},
};

bool TypeUtils::IsDataTypeValid(DataType dt) {
uint32_t num = static_cast<uint32_t>(dt);
GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid");
@@ -312,6 +320,16 @@ Format TypeUtils::DomiFormatToFormat(domi::domiTensorFormat_t domi_format) {
return FORMAT_RESERVED;
}

std::string TypeUtils::FmkTypeToSerialString(domi::FrameworkType fmk_type) {
auto it = kFmkTypeToString.find(fmk_type);
if (it != kFmkTypeToString.end()) {
return it->second;
} else {
GELOGW("Framework type not support %d.", fmk_type);
return "";
}
}

static inline void CopyDataFromBuffer(vector<uint8_t> &data, const Buffer &buffer) {
data.clear();
if (buffer.GetData() != nullptr && buffer.GetSize() != 0) {


+ 12
- 9
src/ge/CMakeLists.txt View File

@@ -45,7 +45,7 @@ include_directories(${GE_SOURCE_DIR}/inc/external)
include_directories(${GE_SOURCE_DIR}/inc/external/graph)
include_directories(${GE_SOURCE_DIR}/inc/framework)
include_directories(${GE_SOURCE_DIR}/inc/framework/common)
include_directories(${GE_SOURCE_DIR}/inc/runtime)
include_directories(${GE_SOURCE_DIR}/inc/graph)
include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib)
include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc)
include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce)
@@ -108,6 +108,10 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"graph/partition/engine_place.cc"
"graph/partition/graph_partition.cc"
"graph/passes/*.cc"
"graph/preprocess/graph_preprocess.cc"
"graph/preprocess/insert_op/ge_aipp_op.cc"
"graph/preprocess/insert_op/util_insert_aipp_op.cc"
"graph/preprocess/multi_batch_copy_graph.cc"
"host_kernels/add_kernel.cc"
"host_kernels/broadcast_args_kernel.cc"
"host_kernels/broadcast_gradient_args_kernel.cc"
@@ -144,10 +148,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"host_kernels/transdata_kernel.cc"
"host_kernels/transpose_kernel.cc"
"host_kernels/unpack_kernel.cc"
"graph/preprocess/graph_preprocess.cc"
"graph/preprocess/insert_op/ge_aipp_op.cc"
"graph/preprocess/insert_op/util_insert_aipp_op.cc"
"graph/preprocess/multi_batch_copy_graph.cc"
"host_kernels/unsqueeze_kernel.cc"
"hybrid/common/npu_memory_allocator.cc"
"hybrid/common/tensor_value.cc"
"hybrid/executor/*.cc"
@@ -155,6 +156,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"hybrid/hybrid_davinci_model.cc"
"hybrid/model/*.cc"
"hybrid/node_executor/aicore/*.cc"
"hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"hybrid/node_executor/aicpu/aicpu_node_executor.cc"
"hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"hybrid/node_executor/hostcpu/ge_local_node_executor.cc"
@@ -246,6 +248,10 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"graph/partition/engine_place.cc"
"graph/partition/graph_partition.cc"
"graph/passes/*.cc"
"graph/preprocess/graph_preprocess.cc"
"graph/preprocess/insert_op/ge_aipp_op.cc"
"graph/preprocess/insert_op/util_insert_aipp_op.cc"
"graph/preprocess/multi_batch_copy_graph.cc"
"host_kernels/add_kernel.cc"
"host_kernels/broadcast_args_kernel.cc"
"host_kernels/broadcast_gradient_args_kernel.cc"
@@ -282,11 +288,8 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"host_kernels/transdata_kernel.cc"
"host_kernels/transpose_kernel.cc"
"host_kernels/unpack_kernel.cc"
"host_kernels/unsqueeze_kernel.cc"
"hybrid/hybrid_davinci_model_stub.cc"
"graph/preprocess/graph_preprocess.cc"
"graph/preprocess/insert_op/ge_aipp_op.cc"
"graph/preprocess/insert_op/util_insert_aipp_op.cc"
"graph/preprocess/multi_batch_copy_graph.cc"
"init/gelib.cc"
"ir_build/atc_ir_common.cc"
"ir_build/ge_ir_build.cc"


+ 26
- 3
src/ge/client/ge_api.cc View File

@@ -29,6 +29,7 @@
#include "graph/utils/type_utils.h"
#include "graph/manager/util/rt_context_util.h"
#include "register/op_registry.h"
#include "common/ge/tbe_plugin_manager.h"

using domi::GetContext;
using domi::OpRegistry;
@@ -132,6 +133,9 @@ Status GEInitialize(const std::map<string, string> &options) {
}
GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid");

GE_TIMESTAMP_START(InitPreparation);
TBEPluginManager::Instance().InitPreparation(options);
GE_TIMESTAMP_END(InitPreparation, "GEInitialize::InitPreparation");
// call Initialize
GELOGT(TRACE_RUNNING, "Initializing environment");
GE_TIMESTAMP_START(GELibInitialize);
@@ -178,6 +182,10 @@ Status GEFinalize() {
ret = middle_ret;
}
}
middle_ret = TBEPluginManager::Instance().Finalize();
if (middle_ret != SUCCESS) {
ret = middle_ret;
}

if (kGeInitialized && ret == SUCCESS) {
// Unified destruct rt_context
@@ -262,10 +270,10 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
}

Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, sessinon_id: %lu.", graph_id, sessionId_);
GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Sesson.");
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
return FAILED;
}
GELOGD("Adding graph to session");
@@ -340,7 +348,7 @@ void PrintOutputResult(std::vector<Tensor> &outputs) {
GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast<double *>(outputs[0].GetData()) + i));
break;
default:
GELOGI("Output datatype %s is not support print.", TypeUtils::DataTypeToSerialString(data_type).c_str());
GELOGI("Output datatype %s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str());
return;
}
}
@@ -378,6 +386,21 @@ Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc
return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback);
}

Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
return FAILED;
}
GELOGT(TRACE_RUNNING, "Building Graph");
Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs);
if (ret != SUCCESS) {
GELOGE(ret, "Session BuildGraph failed");
return FAILED;
}
return SUCCESS;
}

Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
RunAsyncCallback callback) {
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();


+ 111
- 0
src/ge/client/module.mk View File

@@ -0,0 +1,111 @@

LOCAL_PATH := $(call my-dir)

COMMON_LOCAL_SRC_FILES := \
proto/ge_api.proto \
ge_api.cc \


COMMON_LOCAL_C_INCLUDES := \
proto/ge_ir.proto \
proto/task.proto \
proto/om.proto \
proto/insert_op.proto \
$(LOCAL_PATH) ./ \
$(LOCAL_PATH)/../ \
$(LOCAL_PATH)/../../ \
$(TOPDIR)inc \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/common \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/graph \
$(TOPDIR)libc_sec/include \
$(TOPDIR)ops/built-in/op_proto/inc \
third_party/json/include \
third_party/protobuf/include \
third_party/opencv/include \

DEVICE_LOCAL_C_INCLUDES := \
proto/ge_ir.proto \
proto/task.proto \
proto/om.proto \
proto/insert_op.proto \
$(LOCAL_PATH) ./ \
$(LOCAL_PATH)/../ \
$(LOCAL_PATH)/../../ \
$(TOPDIR)inc \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/common \
$(TOPDIR)inc/graph \
$(TOPDIR)libc_sec/include \
$(TOPDIR)ops/built-in/op_proto/inc \
third_party/json/include \
third_party/protobuf/include \
third_party/opencv/include \

#compiler for host infer
include $(CLEAR_VARS)

LOCAL_MODULE := libge_client
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \
libmmpa \
libgraph \
libregister \
libge_compiler \
libge_common \


LOCAL_LDFLAGS := -lrt -ldl

LOCAL_SHARED_LIBRARIES += \
libruntime \

include $(BUILD_HOST_SHARED_LIBRARY)

#compiler for device
include $(CLEAR_VARS)

LOCAL_MODULE := libge_client
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -DREUSE_MEMORY=1
LOCAL_MODULE_CLASS := SHARED_LIBRARIES

LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \
libmmpa \
libgraph \
libregister \
libruntime \
libge_compiler \
libge_common \


LOCAL_LDFLAGS := -lrt -ldl
LOCAL_CFLAGS += \
-Wall

include $(BUILD_SHARED_LIBRARY)

+ 1
- 2
src/ge/common/auth/file_saver.cc View File

@@ -40,9 +40,8 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) {
}

char real_path[PATH_MAX] = {0};
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_path.length() >= PATH_MAX, return FAILED, "File path is longer than PATH_MAX!");
GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr,
GELOGI("File %s is not exit, it will be created.", file_path.c_str()));
GELOGI("File %s is not exist, it will be created.", file_path.c_str()));
// Open file
mode_t mode = S_IRUSR | S_IWUSR;
fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode);


+ 25
- 19
src/ge/common/ge/plugin_manager.cc View File

@@ -50,13 +50,13 @@ PluginManager::~PluginManager() { ClearHandles_(); }
string PluginManager::GetPath() {
Dl_info dl_info;
if (dladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) == 0) {
GELOGW("Failed to read so_path!");
GELOGW("Failed to read the shared library file path!");
return string();
} else {
std::string so_path = dl_info.dli_fname;
char path[PATH_MAX] = {0};
if (so_path.length() >= PATH_MAX) {
GELOGW("File path is too long!");
GELOGW("The shared library file path is too long!");
return string();
}
if (realpath(so_path.c_str(), path) == nullptr) {
@@ -93,11 +93,15 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
std::vector<std::string> path_vec;
SplitPath(path, path_vec);
for (const auto &single_path : path_vec) {
GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, "File path is too long!");
GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX,
GELOGE(GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!");
continue);
// load break when number of loaded so reach maximum
if (num_of_loaded_so >= kMaxNumOfSo) {
GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo);
GELOGW(
"The number of dynamic libraries loaded exceeds the kMaxNumOfSo,"
" and only the first %d shared libraries will be loaded.",
kMaxNumOfSo);
break;
}

@@ -110,11 +114,11 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec

int64_t file_size = 0;
if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) {
GELOGW("Failed to validate so %s", file_path_dlopen.c_str());
GELOGW("Failed to validate the shared library: %s", file_path_dlopen.c_str());
continue;
}

GELOGI("dlopen so path name: %s. ", file_path_dlopen.c_str());
GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str());

// load continue when dlopen is failed
auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL);
@@ -128,14 +132,14 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
for (const auto &func_name : func_check_list) {
auto real_fn = (void (*)())dlsym(handle, func_name.c_str());
if (real_fn == nullptr) {
GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not exist!", func_name.c_str(),
GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(),
func_name.c_str());
is_valid = false;
break;
}
}
if (!is_valid) {
GE_LOGE_IF(dlclose(handle), "Failed to dlclose ret");
GE_LOGE_IF(dlclose(handle), "Failed to dlclose.");
continue;
}

@@ -146,13 +150,13 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
num_of_loaded_so++;
}

GELOGI("load so total num %u", num_of_loaded_so);
GELOGI("The total number of shared libraries loaded: %u", num_of_loaded_so);
for (auto name : so_list_) {
GELOGI("load %s successfully", name.c_str());
GELOGI("load shared library %s successfully", name.c_str());
}

if (num_of_loaded_so == 0) {
GELOGW("Failed to find any valid so in path %s!", path.c_str());
GELOGW("No loadable shared library found in the path: %s", path.c_str());
return SUCCESS;
}

@@ -163,7 +167,7 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded
// read file size
struct stat stat_buf;
if (stat(file_path.c_str(), &stat_buf) != 0) {
GELOGW("%s check fail.", file_path.c_str());
GELOGW("The shared library file check failed: %s", file_path.c_str());
return FAILED;
}

@@ -178,8 +182,8 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded
// load continue if the total size of so reaches maximum when it is loaded
if (size_of_loaded_so + file_size > kMaxSizeOfLoadedSo) {
GELOGW(
"%s is skipped because the size of loaded so reaches maximum if it is load! "
"(size: %ldB, size of loaded so: %ldB, maximum: %dB)",
"%s is skipped because the size of loaded share library reaches maximum if it is loaded! "
"(size: %ldB, size of loaded share library: %ldB, maximum: %dB)",
file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo);
return FAILED;
}
@@ -227,7 +231,10 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_

// load break when number of loaded so reach maximum
if (num_of_loaded_so >= kMaxNumOfSo) {
GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo);
GELOGW(
"The number of dynamic libraries loaded exceeds the kMaxNumOfSo,"
" and only the first %d shared libraries will be loaded.",
kMaxNumOfSo);
break;
}

@@ -240,7 +247,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_

int64_t file_size = 0;
if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) {
GELOGW("Failed to validate so %s", canonical_path_str.c_str());
GELOGW("Failed to validate the shared library: %s", canonical_path_str.c_str());
continue;
}

@@ -266,8 +273,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
}
}
if (!is_valid) {
GE_LOGE_IF(dlclose(handle), "Dlclose ret fail");
GELOGW("Dlclose ret fail!");
GE_LOGE_IF(dlclose(handle), "Failed to dlclose.");
continue;
}

@@ -279,7 +285,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
}
closedir(dir);
if (num_of_loaded_so == 0) {
GELOGW("Failed to find any valid so under %s!", path.c_str());
GELOGW("No loadable shared library found in the path: %s", path.c_str());
return SUCCESS;
}



+ 293
- 0
src/ge/common/ge/tbe_plugin_manager.cc View File

@@ -0,0 +1,293 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/ge/tbe_plugin_manager.h"

#include <dirent.h>
#include <unistd.h>
#include <algorithm>
#include <cstring>
#include <fstream>
#include <iostream>
#include <map>
#include <memory>
#include <string>

#include "common/ge/ge_util.h"
#include "framework/common/debug/log.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "framework/common/ge_inner_error_codes.h"
#include "framework/engine/dnnengine.h"
#include "framework/omg/omg_inner_types.h"
#include "external/ge/ge_api_types.h"
#include "register/op_registry.h"
#include "graph/opsproto_manager.h"
#include "graph/utils/type_utils.h"

namespace ge {
std::map<string, string> TBEPluginManager::options_ = {};

// Get Singleton Instance
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginManager::Instance() {
static TBEPluginManager instance_ptr_;
return instance_ptr_;
}

Status TBEPluginManager::ClearHandles_() {
Status ret = SUCCESS;
for (const auto &handle : handles_vec_) {
if (dlclose(handle) != 0) {
ret = FAILED;
GELOGW("Failed to close handle: %s", dlerror());
}
}
handles_vec_.clear();
return ret;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status TBEPluginManager::Finalize() {
Status ret = ClearHandles_();
return ret;
}

string TBEPluginManager::GetPath() {
Dl_info dl_info;
if (dladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) == 0) {
GELOGW("Failed to read so path!");
return string();
} else {
string so_path = dl_info.dli_fname;
char path[PATH_MAX] = {0};
if (so_path.length() >= PATH_MAX) {
GELOGW("File path is too long!");
return string();
}
if (realpath(so_path.c_str(), path) == nullptr) {
GELOGW("Failed to get realpath of %s", so_path.c_str());
return string();
}

so_path = path;
so_path = so_path.substr(0, so_path.rfind('/') + 1);
return so_path;
}
}

void TBEPluginManager::ProcessSoFullName(vector<string> &file_list, string &caffe_parser_path, string &full_name,
const string &caffe_parser_so_suff, const string &aicpu_so_suff,
const string &aicpu_host_so_suff) {
if (full_name.size() >= caffe_parser_so_suff.size() &&
full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(),
caffe_parser_so_suff) == 0) {
caffe_parser_path = full_name;
} else if ((full_name.size() >= aicpu_so_suff.size() &&
full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == 0) ||
(full_name.size() >= aicpu_host_so_suff.size() &&
full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(),
aicpu_host_so_suff) == 0)) {
// aicpu so, Put the file path into the omgcontext and save into the model in the builder stage.
domi::GetContext().aicpu_op_run_paths.push_back(full_name);
} else {
// Save parser so path into file_list vector
file_list.push_back(full_name);
}
}

void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path) {
// Path, change to absolute path
string real_path = RealPath(path.c_str());
// Plugin path does not exist
if (real_path.empty()) {
GELOGW("RealPath is empty.");
return;
}
struct stat stat_buf;
if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) {
GELOGW("%s is not a dir.", real_path.c_str());
return;
}
struct dirent *dent(0);
DIR *dir = opendir(real_path.c_str());
// Plugin path does not exist
if (dir == nullptr) {
GELOGW("Open directory %s failed.", real_path.c_str());
return;
}

while ((dent = readdir(dir)) != nullptr) {
if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue;
string name = dent->d_name;
string full_name = real_path + "/" + name;
const string so_suff = ".so";
const string caffe_parser_so_suff = "lib_caffe_parser.so";
const string aicpu_so_suff = "_aicpu.so";
const string aicpu_host_so_suff = "_online.so";
if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) {
ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff,
aicpu_host_so_suff);
} else {
FindParserSo(full_name, file_list, caffe_parser_path);
}
}
closedir(dir);
}

void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) {
// Support to split multiple so directories by ":"
vector<string> v_path = StringUtils::Split(path, ':');
for (size_t i = 0; i < v_path.size(); ++i) {
FindParserSo(v_path[i], file_list, caffe_parser_path);
GELOGI("CustomOpLib full name = %s", v_path[i].c_str());
}
}

void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
GELOGI("Enter get custom op path schedule");
std::string fmk_type;
domi::FrameworkType type = domi::TENSORFLOW;
auto it = options_.find(FRAMEWORK_TYPE);
if (it != options_.end()) {
type = static_cast<domi::FrameworkType>(std::strtol(it->second.c_str(), nullptr, 10));
}
fmk_type = ge::TypeUtils::FmkTypeToSerialString(type);
GELOGI("Framework type is %s.", fmk_type.c_str());

const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
std::string path = path_env;
customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type);
GELOGI("Get custom so path from env : %s", path_env);
return;
}
std::string path_base = GetPath();
GELOGI("path_base is %s", path_base.c_str());
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);
customop_path = (path_base + "ops/framework/custom" + "/:") + (path_base + "ops/framework/built-in/" + fmk_type);
return;
}

void TBEPluginManager::LoadCustomOpLib() {
LoadPluginSo();

std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
GELOGI("The size of registration_datas is: %zu", registration_datas.size());
for (OpRegistrationData reg_data : registration_datas) {
bool ret = CheckRegisterStatus(reg_data);
if (ret) {
GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(),
static_cast<uint32_t>(reg_data.GetImplyType()));
domi::OpRegistry::Instance()->Register(reg_data);
}
}
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() {
vector<string> file_list;
string caffe_parser_path;
std::string plugin_path;
GetCustomOpPath(plugin_path);

// Whether there are files in the plugin so path
GetPluginSoFileList(plugin_path, file_list, caffe_parser_path);

// No file
if (file_list.empty()) {
// Print log
GELOGW("Can not find any plugin file in plugin_path: %s", plugin_path.c_str());
}

GELOGW("The shared library will not be checked. Please ensure that the source of the shared library is trusted.");

// Load other so files except lib_caffe_parser.so in the plugin so path
for (auto elem : file_list) {
StringUtils::Trim(elem);

void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
if (handle == nullptr) {
GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror());
} else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) {
// Close dl when the program exist, not close here
GELOGI("Plugin load %s success.", elem.c_str());
handles_vec_.push_back(handle);
} else {
GELOGI("Plugin so has already been loaded, no need to load again.");
}
}
}

bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData &reg_data) {
bool ret = true;
static char *parser_priority = std::getenv("PARSER_PRIORITY");
static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce";
auto ori_optype_set = reg_data.GetOriginOpTypeSet();
for (const auto &op_type : ori_optype_set) {
domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type);
GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str());
if (imply_type != domi::ImplyType::BUILDIN) {
if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) ||
(!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) {
GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(),
reg_data.GetOmOptype().c_str());
ret = false;
} else {
GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str());
}
} else {
GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(),
reg_data.GetOmOptype().c_str(), static_cast<int>(reg_data.GetImplyType()));
}
}
return ret;
}

Status TBEPluginManager::CheckCustomAiCpuOpLib() {
std::vector<std::string> vec_op_type;

domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM);
for (size_t i = 0; i < vec_op_type.size(); i++) {
bool aicpu_so_exist = false;
std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so";
for (size_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) {
string bin_file_path = domi::GetContext().aicpu_op_run_paths[j];
if (bin_file_path.size() >= ai_cpu_so_name.size() &&
bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) ==
0) {
aicpu_so_exist = true;
break;
}
}
if (!aicpu_so_exist) {
GELOGE(FAILED, "Can't find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str());
return FAILED;
}
}
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::InitPreparation(
const std::map<string, string> &options) {
options_.insert(options.begin(), options.end());
// Load TBE plugin
TBEPluginManager::Instance().LoadCustomOpLib();
Status ret = CheckCustomAiCpuOpLib();
if (ret != SUCCESS) {
GELOGE(ret, "Check custom aicpu run so failed!");
return;
}
}
} // namespace ge

+ 73
- 0
src/ge/common/ge/tbe_plugin_manager.h View File

@@ -0,0 +1,73 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_
#define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_

#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <type_traits>
#include <typeinfo>
#include <vector>

#include "external/ge/ge_api_error_codes.h"
#include "external/register/register.h"

namespace ge {
using SoHandlesVec = std::vector<void *>;
using std::function;
using std::map;
using std::string;
using std::vector;

class TBEPluginManager {
public:
Status Finalize();

// Get TBEPluginManager singleton instance
static TBEPluginManager &Instance();

static string GetPath();

static void InitPreparation(const std::map<string, string> &options);

void LoadPluginSo();

private:
TBEPluginManager() = default;
~TBEPluginManager() = default;
Status ClearHandles_();

static void ProcessSoFullName(vector<string> &file_list, string &caffe_parser_path, string &full_name,
const string &caffe_parser_so_suff, const string &aicpu_so_suff,
const string &aicpu_host_so_suff);
static void FindParserSo(const string &path, vector<string> &file_list, string &caffe_parser_path);
static void GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path);
static void GetCustomOpPath(std::string &customop_path);
void LoadCustomOpLib();
static Status CheckCustomAiCpuOpLib();
static bool CheckRegisterStatus(const OpRegistrationData &reg_data);

SoHandlesVec handles_vec_;
static std::map<string, string> options_;
};
} // namespace ge

#endif // GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_

+ 241
- 0
src/ge/common/ge_common.mk View File

@@ -0,0 +1,241 @@
LOCAL_PATH := $(call my-dir)

GE_COMMON_LOCAL_SRC_FILES := \
context/ctx.cc \
model_saver.cc \
ge/datatype_util.cc \
helper/om_file_helper.cc \
helper/model_helper.cc \
../model/ge_model.cc \
auth/file_saver.cc \
fp16_t.cc \
math/fp16_math.cc \
debug/memory_dumper.cc \
formats/utils/formats_trans_utils.cc \
formats/format_transfers/datatype_transfer.cc \
formats/format_transfers/format_transfer_transpose.cc \
formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
formats/format_transfers/format_transfer_fractal_z.cc \
formats/format_transfers/format_transfer_fractal_nz.cc \
formats/format_transfers/format_transfer_fractal_zz.cc \
formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
formats/format_transfers/format_transfer_fracz_nchw.cc \
formats/format_transfers/format_transfer_fracz_nhwc.cc \
formats/format_transfers/format_transfer_fracz_hwcn.cc \
formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
formats/format_transfers/format_transfer_nchw_fz_c04.cc \
formats/formats.cc \
ge_format_util.cc \
fmk_error_codes.cc \
util.cc \
properties_manager.cc \
types.cc\
model_parser/base.cc \
tbe_kernel_store.cc \
op/attr_value_util.cc \
op/ge_op_utils.cc \
thread_pool.cc \
ge/tbe_plugin_manager.cc \

GE_COMMON_LOCAL_C_INCLUDES := \
proto/om.proto \
proto/ge_ir.proto \
proto/task.proto \
proto/insert_op.proto \
proto/tensorflow/graph.proto \
proto/tensorflow/node_def.proto \
proto/tensorflow/function.proto \
proto/tensorflow/versions.proto \
proto/tensorflow/attr_value.proto \
proto/tensorflow/tensor.proto \
proto/tensorflow/tensor_shape.proto \
proto/tensorflow/op_def.proto \
proto/tensorflow/types.proto \
proto/tensorflow/resource_handle.proto \
$(TOPDIR)inc \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/common/util \
$(TOPDIR)libc_sec/include \
$(TOPDIR)third_party/json/include \
$(TOPDIR)third_party/protobuf/include \
$(TOPDIR)third_party/openssl/include/x86/include \
$(TOPDIR)framework/domi \
$(TOPDIR)framework/domi/common \
$(TOPDIR)framework/domi/common/op

#compile host libge_common
include $(CLEAR_VARS)

LOCAL_MODULE := libge_common

LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
LOCAL_CFLAGS += -fvisibility=hidden -DHOST_VISIBILITY
endif
ifeq ($(host_os), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(host_os), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif

LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

LOCAL_SHARED_LIBRARIES := \
libprotobuf \
libc_sec \
libslog \
libmmpa \
libgraph \
libregister \
liberror_manager \

LOCAL_LDFLAGS := -lrt -ldl

include $(BUILD_HOST_SHARED_LIBRARY)

#compile device libge_common
include $(CLEAR_VARS)

LOCAL_MODULE := libge_common

LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
LOCAL_CFLAGS += -fvisibility=hidden -DDEV_VISIBILITY
endif
ifeq ($(host_os), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(host_os), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif

LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

LOCAL_SHARED_LIBRARIES := \
libprotobuf \
libc_sec \
libslog \
libmmpa \
libgraph \
libregister \
liberror_manager \

ifeq ($(device_os),android)
LOCAL_LDFLAGS += -ldl
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
else
LOCAL_LDFLAGS := -lrt -ldl
endif

include $(BUILD_SHARED_LIBRARY)

#compile host libge_common static lib
include $(CLEAR_VARS)

LOCAL_MODULE := libge_common

LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif

ifeq ($(host_os), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(host_os), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif

LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

LOCAL_STATIC_LIBRARIES := \
libgraph \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libslog \
libmmpa \
libregister \
liberror_manager \

LOCAL_LDFLAGS := -lrt -ldl

include $(BUILD_HOST_STATIC_LIBRARY)

#compile device libge_common static_lib
include $(CLEAR_VARS)

LOCAL_MODULE := libge_common

LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif
ifeq ($(host_os), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(host_os), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), euleros)
LOCAL_CFLAGS += -DOS_CENTOS
endif
ifeq ($(TARGET_OS), centos)
LOCAL_CFLAGS += -DOS_CENTOS
endif

LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)

LOCAL_STATIC_LIBRARIES := \
libgraph \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libslog \
libmmpa \
libregister \
liberror_manager \

LOCAL_LDFLAGS := -lrt -ldl

include $(BUILD_STATIC_LIBRARY)

+ 3
- 3
src/ge/common/helper/model_cache_helper.cc View File

@@ -178,7 +178,7 @@ bool ModelCacheHelper::IsModelCacheHit() const {
return false;
}
if (!IsVarManagerSameAsCache(var_manager_json)) {
GELOGI("Graph id[%u] cache miss: the VarManager dos not match the cache info.", graph_id_);
GELOGI("Graph id[%u] cache miss: the VarManager does not match the cache info.", graph_id_);
return false;
}
GELOGI("Graph id[%u] cache hit.", graph_id_);
@@ -563,7 +563,7 @@ Status ModelCacheHelper::GetCacheInfo(CacheInfo &cache_info) const {
cache_info.graph_hash = cache_json[kGraphHash];
Json nodes_hash_json = cache_json[kNodeHash];
if (!(nodes_hash_json.is_null() || nodes_hash_json.is_array())) {
GELOGW("Nodes hash in cache be null or array.");
GELOGW("Nodes hash in cache should be null or array.");
return FAILED;
}
for (const auto &iter : nodes_hash_json) {
@@ -1670,7 +1670,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const {
ModelData model_data;
ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
if (ret != SUCCESS) {
GELOGW("LoadOmModelFromCache: Load model from file fialed. ret = %u", ret);
GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret);
return ret;
}



+ 1
- 1
src/ge/common/helper/model_helper.cc View File

@@ -144,7 +144,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(),
platform_version.size() + 1);
if (err != EOK) {
GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while while allocating memory for platform_version");
GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while allocating memory for platform_version.");
return MEMALLOC_FAILED;
}
string version = reinterpret_cast<char *>(model_header.platform_version);


+ 6
- 6
src/ge/common/helper/om_file_helper.cc View File

@@ -52,7 +52,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(u
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type,
ModelPartition &partition) {
if (!is_inited_) {
GELOGE(PARAM_INVALID, "OmFileLoadHelper not Inited!");
GELOGE(PARAM_INVALID, "OmFileLoadHelper has not been initialized!");
return PARAM_INVALID;
}

@@ -67,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod

if (!found) {
if (type != ModelPartitionType::TBE_KERNELS) {
GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas", static_cast<int>(type));
GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type));
return FAILED;
}
}
@@ -77,7 +77,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod
Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const {
// Parameter validity check
if (model.model_data == nullptr) {
GELOGE(PARAM_INVALID, "Model_data must not be null");
GELOGE(PARAM_INVALID, "Model_data must not be null!");
return PARAM_INVALID;
}

@@ -103,7 +103,7 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const {

Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size) {
if (model_data == nullptr) {
GELOGE(PARAM_INVALID, "Param model_data must not be null");
GELOGE(PARAM_INVALID, "Param model_data must not be null!");
return PARAM_INVALID;
}
// Init partition table
@@ -131,7 +131,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
context_.partition_datas_.push_back(partition);

if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) {
GELOGE(PARAM_INVALID, "the current need partition sizes %zu greater than the model data size %u ",
GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.",
partition.size + mem_offset, model_data_size);
return PARAM_INVALID;
}
@@ -199,7 +199,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat

ModelPartitionTable *partition_table = GetPartitionTable();
if (partition_table == nullptr) {
GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile exe failed: partition_table is NULL");
GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile execute failed: partition_table is NULL.");
return ge::GE_GRAPH_SAVE_FAILED;
}
uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);


+ 8
- 2
src/ge/common/model_saver.cc View File

@@ -26,6 +26,7 @@
#include "framework/common/debug/log.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "common/util/error_manager/error_manager.h"

namespace ge {
const uint32_t kInteval = 2;
@@ -41,10 +42,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
try {
model_str = model.dump(kInteval, ' ', false, Json::error_handler_t::ignore);
} catch (std::exception &e) {
GELOGE(FAILED, "Transfer json to string failed, reason: %s.", e.what());
ErrorManager::GetInstance().ATCReportErrMessage("E19007", {"exception"}, {e.what()});
GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what());
return FAILED;
} catch (...) {
GELOGE(FAILED, "Transfer json to string failed.");
ErrorManager::GetInstance().ATCReportErrMessage("E19008");
GELOGE(FAILED, "Failed to convert JSON to string.");
return FAILED;
}

@@ -57,6 +60,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
mode_t mode = S_IRUSR | S_IWUSR;
int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode);
if (fd == EN_ERROR || fd == EN_INVALID_PARAM) {
ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"filepath", "errMsg"}, {file_path, strerror(errno)});
GELOGE(FAILED, "Open file failed. file path : %s, %s", file_path, strerror(errno));
return FAILED;
}
@@ -65,6 +69,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
// Write data to file
mmSsize_t mmpa_ret = mmWrite(fd, const_cast<void *>((const void *)model_char), len);
if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) {
ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"mmpa_ret", "errMsg"},
{std::to_string(mmpa_ret), strerror(errno)});
// Need to both print the error info of mmWrite and mmClose, so return ret after mmClose
GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno));
ret = FAILED;


+ 3
- 0
src/ge/common/module.mk View File

@@ -0,0 +1,3 @@
LOCAL_PATH := $(call my-dir)

include $(LOCAL_PATH)/ge_common.mk

+ 2
- 0
src/ge/common/op/ge_op_utils.cc View File

@@ -114,6 +114,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::AippOpParams *aipp_params) {
GE_CHECK_NOTNULL(aipp_params);
AIPP_CONVERT_FORMAT_EX(aipp_mode, domi::AippOpParams::AippMode, int32_t, GeAttrValue::INT);
AIPP_CONVERT_INT(related_input_rank);

if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) {
AIPP_CONVERT_INT(max_src_image_size);
@@ -149,6 +150,7 @@ OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::Aipp
AIPP_CONVERT_LIST_FLOAT(var_reci_chn_0, true);
AIPP_CONVERT_LIST_FLOAT(var_reci_chn_1, true);
AIPP_CONVERT_LIST_FLOAT(var_reci_chn_2, true);
AIPP_CONVERT_LIST_FLOAT(var_reci_chn_3, true);

const bool csc_switch = aipp_params->csc_switch();
AIPP_CONVERT_LIST_INT(matrix_r0c0, csc_switch);


+ 16
- 8
src/ge/common/profiling/profiling_manager.cc View File

@@ -478,24 +478,32 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData(
const std::vector<TaskDescInfo> &task_desc_info, const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info) {
#ifdef DAVINCI_SUPPORT_PROFILING
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
int32_t logic_device_id = 0;
rtError_t rt_ret = rtGetDevice(&logic_device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "runtime get device_id failed, current device_id:%d", device_id);
GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
return;
}
GELOGI("current device_id:%d", device_id);
GELOGI("current logic_device_id:%d", logic_device_id);

auto ret = std::find(device_id_.begin(), device_id_.end(), device_id);
uint32_t phy_device_id = 0;
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
return;
}
GELOGI("current phy_device_id:%d", phy_device_id);

auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id);
if (ret == device_id_.end()) {
GELOGE(FAILED, "get valid device_id failed, profiling report failed.");
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed.");
return;
}

GELOGI("start ProfilingTaskDescInfo.");
ProfilingTaskDescInfo(task_desc_info, device_id);
ProfilingTaskDescInfo(task_desc_info, phy_device_id);
GELOGI("start ProfilingGraphDescInfo.");
ProfilingGraphDescInfo(compute_graph_desc_info, device_id);
ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id);
GELOGI("Report profiling data for GE end.");
#endif
}


+ 1
- 0
src/ge/common/types.cc View File

@@ -116,6 +116,7 @@ REGISTER_OPTYPE_DEFINE(SLICE, "Slice");
REGISTER_OPTYPE_DEFINE(SLICED, "SliceD");
REGISTER_OPTYPE_DEFINE(FLOORDIV, "FloorDiv");
REGISTER_OPTYPE_DEFINE(SQUEEZE, "Squeeze");
REGISTER_OPTYPE_DEFINE(UNSQUEEZE, "Unsqueeze");
REGISTER_OPTYPE_DEFINE(STRIDEDSLICE, "StridedSlice");
REGISTER_OPTYPE_DEFINE(RANGE, "Range");
REGISTER_OPTYPE_DEFINE(RPNPROPOSALS, "RpnProposals");


+ 11
- 13
src/ge/common/util.cc View File

@@ -67,9 +67,8 @@ static bool ReadProtoFromCodedInputStream(CodedInputStream &coded_stream, Messag
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(const char *file, Message *proto) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr),
ErrorManager::GetInstance().ATCReportErrMessage("E19001");
return false, "Input parameter file or proto is nullptr!");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr), return false,
"Input parameter file or proto is nullptr!");

std::string real_path = RealPath(file);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file);
@@ -119,8 +118,9 @@ long GetFileLength(const std::string &input_file) {
ErrorManager::GetInstance().ATCReportErrMessage("E10037", {"filepath"}, {input_file});
return -1, "Open file[%s] failed", input_file.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E10038");
return -1, "File[%s] length is 0, not valid.", input_file.c_str());
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0),
ErrorManager::GetInstance().ATCReportErrMessage("E10038", {"filepath"}, {input_file});
return -1, "File[%s] size is 0, not valid.", input_file.c_str());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage(
@@ -207,7 +207,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::
if (dir_path_len >= PATH_MAX) {
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"},
{directory_path, std::to_string(PATH_MAX)});
GELOGW("Path[%s] len is too long, it must smaller than %d", directory_path.c_str(), PATH_MAX);
GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), PATH_MAX);
return -1;
}
char tmp_dir_path[PATH_MAX] = {0};
@@ -338,14 +338,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
strlen(path) >= PATH_MAX,
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)});
return "", "Path[%s] len is too long, it must smaller than %d", path, PATH_MAX);
return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX);
// PATH_MAX is the system's own macro, indicating the maximum file path length supported
std::shared_ptr<char> resolved_path(new (std::nothrow) char[PATH_MAX](), std::default_delete<char[]>());

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
resolved_path == nullptr,
ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"filepath", "size"}, {path, std::to_string(PATH_MAX)});
return "", "Path[%s] new string object len[%d] failed.", path, PATH_MAX);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(resolved_path == nullptr, return "", "Path[%s] new string object len[%d] failed.",
path, PATH_MAX);

// Nullptr is returned when the path does not exist or there is no permission
// Return absolute path when path is accessible
@@ -384,7 +382,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const
!ValidateStr(real_path, mode),
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path});
return false,
"Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
"Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
"and chinese character.",
atc_param.c_str(), real_path.c_str());

@@ -420,7 +418,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const
!ValidateStr(real_path, mode),
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path});
return false,
"Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
"Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' "
"and chinese character.",
atc_param.c_str(), real_path.c_str());



+ 2
- 2
src/ge/engine_manager/dnnengine_manager.cc View File

@@ -75,7 +75,7 @@ Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &op
return status;
}

GELOGI("The number of DNNEngineObjs are %zu.", engines_map_.size());
GELOGI("The number of DNNEngineObjs is %zu.", engines_map_.size());

// Engines initialize
for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
@@ -373,7 +373,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h
GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno));
return FAILED;
} else {
GELOGW("The json file %s is not need", file_path.c_str());
GELOGW("The json file %s is not needed.", file_path.c_str());
return SUCCESS;
}
}


+ 1
- 0
src/ge/executor/CMakeLists.txt View File

@@ -30,6 +30,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"../common/profiling/profiling_manager.cc"
"../graph/execute/graph_execute.cc"
"../graph/load/graph_loader.cc"
"../graph/load/new_model_manager/aipp_utils.cc"
"../graph/load/new_model_manager/cpu_queue_schedule.cc"
"../graph/load/new_model_manager/data_dumper.cc"
"../graph/load/new_model_manager/data_inputer.cc"


+ 142
- 27
src/ge/executor/ge_executor.cc View File

@@ -38,6 +38,7 @@

namespace {
const size_t kDynamicBatchSizeVecSize = 1;
const size_t kStaticBatchInfoSize = 1;
const size_t kDynamicImageSizeVecSize = 2;
const size_t kDynamicImageSizeInputSize = 2;
const char *const kBatchLabel = "Batch_";
@@ -180,16 +181,16 @@ class ModelListenerAdapter : public ModelListener {
GeExecutor::GeExecutor() {}

Status GeExecutor::Initialize() {
GELOGI("Init ge_executor begin.");
GELOGI("Init GeExecutor begin.");
if (isInit_) {
GELOGW("Already inited, don't need to init again.");
GELOGW("Already initialized, no need to be initialized again.");
return ge::SUCCESS;
}

std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
auto ret = MemManager::Instance().Initialize(mem_type);
if (ret != SUCCESS) {
GELOGE(ret, "Memory Manager init fail.");
GELOGE(ret, "Memory Manager init failed.");
return ret;
}

@@ -200,14 +201,14 @@ Status GeExecutor::Initialize() {
ProfilingManager::Instance().Init(profiling_options);

isInit_ = true;
GELOGI("Init ge_executor over.");
GELOGI("Init GeExecutor over.");
return ge::SUCCESS;
}

Status GeExecutor::Finalize() {
GELOGI("Uninit ge_executor begin.");
GELOGI("Uninit GeExecutor begin.");
if (isInit_ == false) {
GELOGW("ge_executor needs to init begin.");
GELOGW("GeExecutor has not been initialized.");
return ge::SUCCESS;
}

@@ -217,7 +218,7 @@ Status GeExecutor::Finalize() {
ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE);
}

GELOGI("Uninit ge_executor over.");
GELOGI("Uninit GeExecutor over.");
return ge::SUCCESS;
}

@@ -236,6 +237,7 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad

// Verify whether the input dynamic batch matches the model gear
std::vector<std::vector<int64_t>> batch_info;
std::vector<uint64_t> batch_num{batch_size};
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get dynamic input info failed.");
@@ -247,6 +249,11 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad
return FAILED;
}

ret = GraphExecutor::SetDynamicSize(model_id, batch_num);
if (ret != SUCCESS) {
GELOGE(FAILED, "Set dynamic size failed");
return FAILED;
}
// memcpy dynamic_batch_size from host to device
if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) {
GELOGE(FAILED, "memcpy dynamic batch input data failed!");
@@ -270,6 +277,7 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad

// Verify whether the input dynamic resolution matches the model gear
std::vector<std::vector<int64_t>> batch_info;
std::vector<uint64_t> batch_num{image_height, image_width};
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get dynamic input info failed.");
@@ -281,6 +289,11 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad
return FAILED;
}

ret = GraphExecutor::SetDynamicSize(model_id, batch_num);
if (ret != SUCCESS) {
GELOGE(FAILED, "Set dynamic size failed");
return FAILED;
}
// Memcpy dynamic resolution height from host to device
if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) !=
RT_ERROR_NONE) {
@@ -298,6 +311,20 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad
return SUCCESS;
}

Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
GELOGI("Begin to get current shape");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}
Status ret = GraphExecutor::GetCurShape(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get current shape failed");
return FAILED;
}
return SUCCESS;
}

Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
const std::vector<kAippDynamicBatchPara> &aippBatchPara,
const kAippDynamicPara &aippParms) {
@@ -346,13 +373,13 @@ Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path,
int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
GELOGI("load model offline begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

string filePath = RealPath(path.c_str());
if (filePath.empty()) {
GELOGE(ge::FAILED, "fileath is invalid. please check your text file '%s'.", path.c_str());
GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str());
return ge::FAILED;
}

@@ -375,7 +402,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
std::shared_ptr<ge::ModelListener> listener) {
GELOGI("Load model begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -397,7 +424,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
Status GeExecutor::UnloadModel(uint32_t model_id) {
GELOGI("unload model %u begin.", model_id);
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}
Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id);
@@ -411,7 +438,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
GELOGI("run model begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -428,7 +455,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
std::vector<ge::TensorDesc> &output_desc) {
GELOGI("get model desc info begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -436,12 +463,11 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
std::vector<InputOutputDescInfo> output_desc_infos;
std::vector<uint32_t> input_formats;
std::vector<uint32_t> output_formats;
GELOGI("GetInputOutputDescInfo via new ome.");

Status ret =
GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats);
if (ret != domi::SUCCESS) {
GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret);
GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret);
return TransferDomiErrorCode(ret);
}

@@ -473,7 +499,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) {
GELOGI("Begin to get dynamic batch info.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -487,11 +513,49 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Get AIPP input format
/// @param [in] model_id
/// @param [in] index
/// @param [out] input_format
/// @return execute result
///
Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
GELOGI("Begin to GetAIPPInfo.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
return GE_EXEC_NOT_INIT;
}
Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetAIPPInfo failed.");
return ret;
}
GELOGI("GetAIPPInfo succ.");
return SUCCESS;
}
Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) {
GELOGI("Begin to get dynamic batch output shape info");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
return GE_EXEC_NOT_INIT;
}
Status ret = GraphExecutor::GetModelAttr(model_id, dynamic_output_shape_info);
if (ret != SUCCESS) {
GELOGE(ret, "Get dynamic batch output shape info failed.");
return ret;
}

GELOGI("Get dynamic batch output shape info succ.");
return SUCCESS;
}

Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<TensorDesc> &output_desc) {
GELOGI("get model desc info for zero copy begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -499,12 +563,11 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge
std::vector<InputOutputDescInfo> output_desc_infos;
std::vector<uint32_t> input_formats;
std::vector<uint32_t> output_formats;
GELOGI("GetInputOutputDescInfoForZeroCopy via new ome.");

Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos,
input_formats, output_formats);
if (ret != domi::SUCCESS) {
GELOGE(ret, "Get DescInfo For ZeroCopy failed. ret = %u", ret);
GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret);
return TransferDomiErrorCode(ret);
}

@@ -521,7 +584,7 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge
GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats);
GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats);

GELOGI("get model desc info for zero copy end.");
GELOGI("get model desc info from zero copy end.");
return ge::SUCCESS;
}

@@ -539,7 +602,7 @@ Status GeExecutor::CommandHandle(const Command &command) {
Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) {
GELOGI("Get max used memory begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -559,13 +622,13 @@ Status GeExecutor::GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size) {
Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_data) {
GELOGI("Load data from file begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

string filePath = RealPath(path.c_str());
if (filePath.empty()) {
GELOGE(ge::FAILED, "filePath is invalid. please check your text file '%s'.", path.c_str());
GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str());
return ge::FAILED;
}
GELOGI("load modelData from file: %s.", path.c_str());
@@ -618,7 +681,7 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
const std::vector<uint32_t> &output_queue_ids) {
GELOGI("Load model with queue begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}
return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
@@ -638,7 +701,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
ge::RunModelData &run_output_data, bool async_mode) {
GELOGI("Execute model begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -674,7 +737,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) {
GELOGI("Get memory and weight size from file begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -707,7 +770,7 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size
size_t &weight_size) {
GELOGI("Get memory and weight size from data begin.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

@@ -741,4 +804,56 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer
Status GeExecutor::ReleaseSingleOpResource(void *stream) {
return SingleOpManager::GetInstance().ReleaseResource(stream);
}

Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) {
std::vector<std::vector<int64_t>> batch_info;
Status ret = GetDynamicBatchInfo(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(ret, "Calc batch info size failed. ret = %d", ret);
return ret;
}
if (batch_info.empty()) {
shape_count = kStaticBatchInfoSize;
} else {
shape_count = batch_info.size();
}
return SUCCESS;
}

Status GeExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
GELOGI("Begin to GetOrigInputInfo.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
return GE_EXEC_NOT_INIT;
}

Status ret = GraphExecutor::GetOrigInputInfo(model_id, index, orig_input_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetOrigInputInfo failed.");
return ret;
}

GELOGI("GetOrigInputInfo succ.");
return SUCCESS;
}

Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims) {
GELOGI("Begin to GetAllAippInputOutputDims.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "not inited yet!");
return GE_EXEC_NOT_INIT;
}

Status ret = GraphExecutor::GetAllAippInputOutputDims(model_id, index, input_dims, output_dims);
if (ret != SUCCESS) {
GELOGE(ret, "GetAllAippInputOutputDims failed.");
return ret;
}

GELOGI("GetAllAippInputOutputDims succ.");
return SUCCESS;
}

} // namespace ge

+ 202
- 0
src/ge/executor/module.mk View File

@@ -0,0 +1,202 @@
LOCAL_PATH := $(call my-dir)

local_ge_executor_src_files := \
ge_executor.cc \
../common/profiling/profiling_manager.cc \
../common/ge/plugin_manager.cc \
../graph/load/graph_loader.cc \
../graph/execute/graph_execute.cc \
../omm/csa_interact.cc \
../graph/manager/graph_manager_utils.cc \
../graph/manager/graph_var_manager.cc \
../graph/manager/graph_mem_allocator.cc \
../graph/manager/graph_caching_allocator.cc \
../graph/manager/trans_var_data_utils.cc \
../graph/manager/util/debug.cc \
../model/ge_model.cc \
../model/ge_root_model.cc \
../graph/load/new_model_manager/davinci_model.cc \
../graph/load/new_model_manager/davinci_model_parser.cc \
../graph/load/new_model_manager/model_manager.cc \
../graph/load/new_model_manager/tbe_handle_store.cc \
../graph/load/new_model_manager/cpu_queue_schedule.cc \
../graph/load/new_model_manager/model_utils.cc \
../graph/load/new_model_manager/aipp_utils.cc \
../graph/load/new_model_manager/data_inputer.cc \
../graph/load/new_model_manager/data_dumper.cc \
../graph/load/new_model_manager/zero_copy_task.cc \
../graph/load/new_model_manager/task_info/task_info.cc \
../graph/load/new_model_manager/task_info/event_record_task_info.cc \
../graph/load/new_model_manager/task_info/event_wait_task_info.cc \
../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
../graph/load/new_model_manager/task_info/kernel_task_info.cc \
../graph/load/new_model_manager/task_info/label_set_task_info.cc \
../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
../graph/load/new_model_manager/task_info/stream_active_task_info.cc \
../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
../graph/load/new_model_manager/task_info/end_graph_task_info.cc \
../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
../graph/load/output/output.cc \
../single_op/single_op_manager.cc \
../single_op/single_op_model.cc \
../single_op/single_op.cc \
../single_op/stream_resource.cc \
../single_op/task/op_task.cc \
../single_op/task/build_task_utils.cc \
../single_op/task/tbe_task_builder.cc \
../single_op/task/aicpu_task_builder.cc \
../hybrid/hybrid_davinci_model_stub.cc\

local_ge_executor_c_include := \
proto/insert_op.proto \
proto/op_mapping_info.proto \
proto/ge_ir.proto \
proto/task.proto \
proto/om.proto \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc \
$(LOCAL_PATH)/../ \
$(TOPDIR)libc_sec/include \
third_party/protobuf/include \
third_party/json/include \

local_ge_executor_shared_library := \
libprotobuf \
libc_sec \
libge_common \
libruntime \
libslog \
libmmpa \
libgraph \
libmsprof \

local_ge_executor_ldflags := -lrt -ldl \


#compile arm device dynamic lib
include $(CLEAR_VARS)

LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING

LOCAL_SRC_FILES := $(local_ge_executor_src_files)
LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library)
ifeq ($(device_os),android)
LOCAL_LDFLAGS += -ldl
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
else
LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
endif

include $(BUILD_SHARED_LIBRARY)

#compile x86 host dynamic lib
include $(CLEAR_VARS)

LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
LOCAL_CFLAGS += -O2
endif

LOCAL_SRC_FILES := $(local_ge_executor_src_files)

LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

LOCAL_SHARED_LIBRARIES := \
libprotobuf \
libc_sec \
libge_common \
libruntime \
libslog \
libmmpa \
libgraph \
libmsprof \

LOCAL_LDFLAGS += $(local_ge_executor_ldflags)

include $(BUILD_HOST_SHARED_LIBRARY)

#compile for host static lib
include $(CLEAR_VARS)

LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
LOCAL_CFLAGS += -O2
endif

LOCAL_SRC_FILES := $(local_ge_executor_src_files)

LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

LOCAL_STATIC_LIBRARIES := \
libge_common \
libgraph \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libruntime \
libslog \
libmmpa \
libmsprof \

LOCAL_LDFLAGS += $(local_ge_executor_ldflags)

include $(BUILD_HOST_STATIC_LIBRARY)

#compile for device static lib
include $(CLEAR_VARS)

LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
LOCAL_CFLAGS += -O2
endif

LOCAL_SRC_FILES := $(local_ge_executor_src_files)
LOCAL_C_INCLUDES := $(local_ge_executor_c_include)

LOCAL_STATIC_LIBRARIES := \
libge_common \
libgraph \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libruntime \
libslog \
libmmpa \
libmsprof \

ifeq ($(device_os),android)
LOCAL_LDFLAGS += -ldl
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
else
LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
endif

include $(BUILD_STATIC_LIBRARY)

+ 407
- 0
src/ge/ge_inference.mk View File

@@ -0,0 +1,407 @@
LOCAL_PATH := $(call my-dir)

COMMON_LOCAL_SRC_FILES := \
proto/fusion_model.proto \
proto/optimizer_priority.proto \
graph/manager/trans_var_data_utils.cc \
omm/csa_interact.cc \
common/fp16_t.cc \
common/formats/utils/formats_trans_utils.cc \
common/formats/format_transfers/datatype_transfer.cc \
common/formats/format_transfers/format_transfer_transpose.cc \
common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_fractal_z.cc \
common/formats/format_transfers/format_transfer_fractal_nz.cc \
common/formats/format_transfers/format_transfer_fractal_zz.cc \
common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
common/formats/format_transfers/format_transfer_fracz_nchw.cc \
common/formats/format_transfers/format_transfer_fracz_nhwc.cc \
common/formats/format_transfers/format_transfer_fracz_hwcn.cc \
common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
common/formats/format_transfers/format_transfer_nchw_fz_c04.cc \
common/formats/formats.cc \
common/profiling/profiling_manager.cc \
common/helper/model_cache_helper.cc \
ge_local_engine/engine/host_cpu_engine.cc \


GRAPH_MANAGER_LOCAL_SRC_FILES := \
common/ge/plugin_manager.cc\
init/gelib.cc \
session/inner_session.cc \
session/session_manager.cc \
engine_manager/dnnengine_manager.cc \
opskernel_manager/ops_kernel_manager.cc \
graph/manager/graph_manager.cc \
graph/manager/graph_manager_utils.cc \
graph/manager/graph_context.cc \
graph/preprocess/graph_preprocess.cc \
graph/preprocess/multi_batch_copy_graph.cc \
graph/execute/graph_execute.cc \
graph/load/graph_loader.cc \
graph/optimize/graph_optimize.cc \
graph/optimize/summary_optimize.cc \
graph/build/graph_builder.cc \
graph/partition/engine_place.cc \
graph/partition/graph_partition.cc \
graph/partition/dynamic_shape_partition.cc \
generator/ge_generator.cc \
generator/generator_api.cc \
graph/manager/graph_var_manager.cc \
graph/manager/graph_mem_allocator.cc \
graph/manager/graph_caching_allocator.cc \

BUILER_SRC_FILES := \
ir_build/ge_ir_build.cc \
ir_build/atc_ir_common.cc \

OMG_HOST_SRC_FILES := \
model/ge_model.cc \
model/ge_root_model.cc \
graph/common/transop_util.cc \
graph/passes/pass_manager.cc \
graph/passes/resource_pair_add_control_pass.cc \
graph/passes/resource_pair_remove_control_pass.cc \
graph/passes/pass_utils.cc \
graph/passes/base_pass.cc \
graph/passes/constant_folding_pass.cc \
graph/passes/aicpu_constant_folding_pass.cc \
graph/passes/reshape_remove_pass.cc \
graph/passes/reshape_recovery_pass.cc \
graph/passes/transop_breadth_fusion_pass.cc \
graph/passes/transop_depth_fusion_pass.cc \
graph/passes/transop_nearby_allreduce_fusion_pass.cc \
graph/passes/same_transdata_breadth_fusion_pass.cc \
graph/passes/transop_without_reshape_fusion_pass.cc \
graph/passes/compile_nodes_pass.cc \
graph/passes/variable_prepare_op_pass.cc \
graph/passes/variable_ref_delete_op_pass.cc \
graph/passes/variable_ref_useless_control_out_delete_pass.cc \
graph/passes/subgraph_pass.cc \
graph/passes/data_pass.cc \
graph/passes/net_output_pass.cc \
graph/passes/replace_transshape_pass.cc \
graph/passes/constant_fuse_same_pass.cc \
graph/passes/print_op_pass.cc \
graph/passes/no_use_reshape_remove_pass.cc \
graph/passes/iterator_op_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \
graph/common/omg_util.cc \
graph/common/bcast.cc \
graph/passes/dimension_compute_pass.cc \
graph/passes/dimension_adjust_pass.cc \
graph/passes/get_original_format_pass.cc \
graph/passes/shape_operate_op_remove_pass.cc \
graph/passes/unused_op_remove_pass.cc \
graph/passes/assert_pass.cc \
graph/passes/dropout_pass.cc \
graph/passes/infershape_pass.cc \
graph/passes/unused_const_pass.cc \
graph/passes/isolated_op_remove_pass.cc \
graph/passes/permute_pass.cc \
graph/passes/ctrl_edge_transfer_pass.cc \
host_kernels/broadcast_gradient_args_kernel.cc \
host_kernels/greater_kernel.cc \
host_kernels/gather_v2_kernel.cc \
host_kernels/maximum_kernel.cc \
host_kernels/floormod_kernel.cc \
host_kernels/floordiv_kernel.cc \
host_kernels/range_kernel.cc \
host_kernels/shape_kernel.cc \
host_kernels/size_kernel.cc \
host_kernels/shape_n_kernel.cc \
host_kernels/rank_kernel.cc \
host_kernels/broadcast_args_kernel.cc \
host_kernels/fill_kernel.cc \
host_kernels/empty_kernel.cc \
host_kernels/expanddims_kernel.cc \
host_kernels/reshape_kernel.cc \
host_kernels/squeeze_kernel.cc \
host_kernels/unsqueeze_kernel.cc \
host_kernels/kernel_utils.cc \
host_kernels/cast_kernel.cc \
host_kernels/transdata_kernel.cc \
host_kernels/unpack_kernel.cc \
host_kernels/transpose_kernel.cc \
host_kernels/permute_kernel.cc \
host_kernels/pack_kernel.cc \
host_kernels/concat_v2_kernel.cc \
host_kernels/concat_offset_kernel.cc \
host_kernels/strided_slice_kernel.cc \
host_kernels/ssd_prior_box_kernel.cc \
host_kernels/add_kernel.cc \
host_kernels/sub_kernel.cc \
host_kernels/mul_kernel.cc \
host_kernels/reduce_prod_kernel.cc \
host_kernels/rsqrt_kernel.cc \
host_kernels/slice_kernel.cc \
host_kernels/slice_d_kernel.cc \
host_kernels/dynamic_stitch_kernel.cc \
graph/passes/stop_gradient_pass.cc \
graph/passes/prevent_gradient_pass.cc \
graph/passes/identity_pass.cc \
graph/passes/placeholder_with_default_pass.cc \
graph/passes/snapshot_pass.cc \
graph/passes/guarantee_const_pass.cc \
graph/passes/var_is_initialized_op_pass.cc \
graph/passes/parallel_concat_start_op_pass.cc \
graph/passes/folding_pass.cc \
graph/passes/cast_translate_pass.cc \
graph/passes/prune_pass.cc \
graph/passes/switch_op_pass.cc \
graph/passes/multi_batch_pass.cc \
graph/passes/next_iteration_pass.cc \
graph/passes/control_trigger_pass.cc \
graph/passes/cond_pass.cc \
graph/passes/cond_remove_pass.cc \
graph/passes/for_pass.cc \
graph/passes/enter_pass.cc \
graph/passes/addn_pass.cc \
graph/passes/common_subexpression_elimination_pass.cc \
graph/passes/transop_symmetry_elimination_pass.cc \
graph/passes/save_pass.cc \
graph/passes/switch_dead_branch_elimination.cc \
graph/passes/switch_logic_remove_pass.cc \
graph/passes/switch_data_edges_bypass.cc \
graph/passes/merge_pass.cc \
graph/passes/variable_format_pass.cc \
graph/passes/variable_op_pass.cc \
graph/passes/cast_remove_pass.cc \
graph/passes/transpose_transdata_pass.cc \
graph/passes/identify_reference_pass.cc \
graph/passes/hccl_memcpy_pass.cc \
graph/passes/flow_ctrl_pass.cc \
graph/passes/link_gen_mask_nodes_pass.cc \
graph/passes/replace_with_empty_const_pass.cc \
graph/passes/hccl_group_pass.cc \
graph/passes/switch_fusion_pass.cc \
graph/passes/switch_split_pass.cc \

OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES)


OME_HOST_SRC_FILES := \
graph/manager/model_manager/event_manager.cc \
graph/manager/util/rt_context_util.cc \
graph/manager/util/variable_accelerate_ctrl.cc \
graph/manager/util/debug.cc \
graph/load/new_model_manager/model_manager.cc \
graph/load/new_model_manager/data_inputer.cc \
graph/load/new_model_manager/davinci_model.cc \
graph/load/new_model_manager/davinci_model_parser.cc \
graph/load/new_model_manager/model_utils.cc \
graph/load/new_model_manager/aipp_utils.cc \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/output/output.cc \
graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \
graph/load/new_model_manager/task_info/event_wait_task_info.cc \
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
graph/load/new_model_manager/task_info/kernel_task_info.cc \
graph/load/new_model_manager/task_info/label_set_task_info.cc \
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
graph/load/new_model_manager/task_info/stream_active_task_info.cc \
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
graph/load/new_model_manager/task_info/end_graph_task_info.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
single_op/task/op_task.cc \
single_op/task/build_task_utils.cc \
single_op/task/tbe_task_builder.cc \
single_op/task/aicpu_task_builder.cc \
single_op/single_op.cc \
single_op/single_op_model.cc \
single_op/stream_resource.cc \
single_op/single_op_manager.cc \
hybrid/hybrid_davinci_model_stub.cc \
# graph/load/new_model_manager/task_info/hccl_task_info.cc

OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES)

COMMON_LOCAL_C_INCLUDES := \
proto/om.proto \
proto/task.proto \
proto/insert_op.proto \
proto/ge_ir.proto \
proto/fwk_adapter.proto \
proto/op_mapping_info.proto \
proto/tensorflow/attr_value.proto \
proto/tensorflow/function.proto \
proto/tensorflow/graph.proto \
proto/tensorflow/node_def.proto \
proto/tensorflow/op_def.proto \
proto/tensorflow/resource_handle.proto \
proto/tensorflow/tensor.proto \
proto/tensorflow/tensor_shape.proto \
proto/tensorflow/types.proto \
proto/tensorflow/versions.proto \
$(LOCAL_PATH) ./ \
$(TOPDIR)inc \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/framework/common \
$(TOPDIR)inc/common \
$(TOPDIR)inc/runtime \
$(TOPDIR)libc_sec/include \
$(TOPDIR)ops/built-in/op_proto/inc \
third_party/json/include \
third_party/protobuf/include \
third_party/opencv/include \

NEW_OMG_HOST_SRC_FILES := \
graph/preprocess/insert_op/util_insert_aipp_op.cc \
graph/preprocess/insert_op/ge_aipp_op.cc \
graph/build/model_builder.cc \
graph/build/task_generator.cc \
graph/build/stream_allocator.cc \
graph/build/logical_stream_allocator.cc \
graph/build/stream_graph_optimizer.cc \
graph/build/run_context.cc \
graph/build/label_allocator.cc \
graph/label/label_maker.cc \
graph/label/if_label_maker.cc \
graph/label/case_label_maker.cc \
graph/label/while_label_maker.cc \
graph/label/partitioned_call_label_maker.cc \

OME_HOST_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES)
OMG_DEVICE_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES)

DEVICE_LOCAL_C_INCLUDES := \
proto/om.proto \
proto/task.proto \
proto/insert_op.proto \
proto/ge_ir.proto \
proto/fwk_adapter.proto \
proto/op_mapping_info.proto \
proto/tensorflow/attr_value.proto \
proto/tensorflow/function.proto \
proto/tensorflow/graph.proto \
proto/tensorflow/node_def.proto \
proto/tensorflow/op_def.proto \
proto/tensorflow/resource_handle.proto \
proto/tensorflow/tensor.proto \
proto/tensorflow/tensor_shape.proto \
proto/tensorflow/types.proto \
proto/tensorflow/versions.proto \
$(LOCAL_PATH) ./ \
$(TOPDIR)inc \
$(TOPDIR)libc_sec/include \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/common/util \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/framework/common \
$(TOPDIR)inc/runtime \
$(TOPDIR)ops/built-in/op_proto/inc \
$(TOPDIR)framework/domi \
third_party/json/include \
third_party/protobuf/include \
third_party/opencv/include \

#compiler for host infer
include $(CLEAR_VARS)

LOCAL_MODULE := libge_compiler

LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
# from ome_inference.mk
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES)
LOCAL_SRC_FILES += $(OME_HOST_SRC_FILES)
LOCAL_SRC_FILES += $(NEW_OME_DEVICE_SRC_FILES)
LOCAL_SRC_FILES += $(BUILER_SRC_FILES)

LOCAL_STATIC_LIBRARIES := libge_memory \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \
libmmpa \
libgraph \
libregister \
libge_common \
libruntime_compile \
libresource \
liberror_manager \

LOCAL_LDFLAGS := -lrt -ldl


include $(BUILD_HOST_SHARED_LIBRARY)

#compiler for device
include $(CLEAR_VARS)

LOCAL_MODULE := libge_compiler
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION
LOCAL_CFLAGS += -O2
LOCAL_MODULE_CLASS := SHARED_LIBRARIES


LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(OMG_DEVICE_SRC_FILES)
LOCAL_SRC_FILES += $(OME_DEVICE_SRC_FILES)
LOCAL_SRC_FILES += $(BUILER_SRC_FILES)


LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES)

LOCAL_STATIC_LIBRARIES := libge_memory \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \
libmmpa \
libgraph \
libregister \
libresource \
libruntime_compile \
libge_common \




ifeq ($(device_os),android)
LOCAL_LDFLAGS := -ldl
else
LOCAL_LDFLAGS := -lrt -ldl
endif

LOCAL_CFLAGS += \
-Wall

ifeq ($(device_os),android)
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog
endif
include $(BUILD_SHARED_LIBRARY)

+ 3
- 3
src/ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -124,7 +124,7 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector<G
Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_kernel,
map<std::string, const Tensor> &named_inputs,
map<std::string, Tensor> &named_outputs) {
GELOGD("To run host cpu op: %s", op_desc->GetName().c_str());
GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str());
Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc);
auto ret = op_kernel.Compute(op, named_inputs, named_outputs);
if (ret != GRAPH_SUCCESS) {
@@ -139,7 +139,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs,
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());

GELOGD("To run node by host cpu engine. node name = %s", node->GetName().c_str());
GELOGD("Run node by host cpu engine. node name = %s", node->GetName().c_str());
std::unique_ptr<HostCpuOp> op_kernel;
GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel));

@@ -151,7 +151,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs,
GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, tmp_outputs, named_outputs));
GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs));

GELOGD("Ran node by host cpu engine successfully. name node = %s", node->GetName().c_str());
GELOGD("Run node by host cpu engine successfully. name node = %s", node->GetName().c_str());
outputs.swap(tmp_outputs);
return SUCCESS;
}


+ 59
- 0
src/ge/ge_local_engine/module.mk View File

@@ -0,0 +1,59 @@
LOCAL_PATH := $(call my-dir)


local_lib_src_files := engine/ge_local_engine.cc \
ops_kernel_store/ge_local_ops_kernel_info.cc \
ops_kernel_store/op/op_factory.cc \
ops_kernel_store/op/op.cc \
ops_kernel_store/op/ge_deleted_op.cc \
ops_kernel_store/op/no_op.cc \

local_lib_inc_path := proto/task.proto \
${LOCAL_PATH} \
${TOPDIR}inc \
${TOPDIR}inc/external \
${TOPDIR}inc/external/graph \
$(TOPDIR)libc_sec/include \
${TOPDIR}third_party/protobuf/include \
${TOPDIR}inc/framework \
$(TOPDIR)framework/domi \

#compiler for host
include $(CLEAR_VARS)
LOCAL_MODULE := libge_local_engine
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
libc_sec \
libslog \
libgraph \
libregister \
libruntime

LOCAL_SRC_FILES := $(local_lib_src_files)
LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

#compiler for atc
include $(CLEAR_VARS)
LOCAL_MODULE := atclib/libge_local_engine
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
libc_sec \
libslog \
libgraph \
libregister \
libruntime_compile

LOCAL_SRC_FILES := $(local_lib_src_files)
LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

+ 1
- 1
src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc View File

@@ -81,7 +81,7 @@ Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) {
const string node_name = ge_node.GetName();
const string node_type = ge_node.GetType();
size_t output_size = op_desc->GetOutputsSize();
GELOGD("Calc op[%s:%s] op running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size);
GELOGD("Calc op[%s:%s] running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size);

for (size_t i = 0; i < output_size; ++i) {
GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast<uint32_t>(i));


+ 1
- 1
src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc View File

@@ -24,7 +24,7 @@ namespace ge_local {
NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}

Status NoOp::Run() {
GELOGI("Node:%s type is %s, no need gen task.", name_.c_str(), type_.c_str());
GELOGI("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str());
// Do nothing
return SUCCESS;
}


+ 429
- 0
src/ge/ge_runner.mk View File

@@ -0,0 +1,429 @@
LOCAL_PATH := $(call my-dir)

LIBGE_LOCAL_SRC_FILES := \
proto/fusion_model.proto \
proto/optimizer_priority.proto \
common/formats/format_transfers/datatype_transfer.cc \
common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
common/formats/format_transfers/format_transfer_fractal_nz.cc \
common/formats/format_transfers/format_transfer_fractal_z.cc \
common/formats/format_transfers/format_transfer_fractal_zz.cc \
common/formats/format_transfers/format_transfer_fracz_hwcn.cc \
common/formats/format_transfers/format_transfer_fracz_nchw.cc \
common/formats/format_transfers/format_transfer_fracz_nhwc.cc \
common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_transpose.cc \
common/formats/formats.cc \
common/formats/utils/formats_trans_utils.cc \
common/fp16_t.cc \
common/ge/plugin_manager.cc\
common/helper/model_cache_helper.cc \
common/profiling/profiling_manager.cc \
engine_manager/dnnengine_manager.cc \
ge_local_engine/engine/host_cpu_engine.cc \
generator/ge_generator.cc \
generator/generator_api.cc \
graph/build/graph_builder.cc \
graph/build/label_allocator.cc \
graph/build/logical_stream_allocator.cc \
graph/build/model_builder.cc \
graph/build/run_context.cc \
graph/build/stream_allocator.cc \
graph/build/stream_graph_optimizer.cc \
graph/build/task_generator.cc \
graph/common/bcast.cc \
graph/common/omg_util.cc \
graph/common/transop_util.cc \
graph/execute/graph_execute.cc \
graph/label/case_label_maker.cc \
graph/label/if_label_maker.cc \
graph/label/label_maker.cc \
graph/label/partitioned_call_label_maker.cc \
graph/label/while_label_maker.cc \
graph/load/graph_loader.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/data_inputer.cc \
graph/load/new_model_manager/davinci_model.cc \
graph/load/new_model_manager/davinci_model_parser.cc \
graph/load/new_model_manager/model_manager.cc \
graph/load/new_model_manager/model_utils.cc \
graph/load/new_model_manager/aipp_utils.cc \
graph/load/new_model_manager/task_info/end_graph_task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \
graph/load/new_model_manager/task_info/event_wait_task_info.cc \
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
graph/load/new_model_manager/task_info/hccl_task_info.cc \
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
graph/load/new_model_manager/task_info/kernel_task_info.cc \
graph/load/new_model_manager/task_info/label_set_task_info.cc \
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
graph/load/new_model_manager/task_info/stream_active_task_info.cc \
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/output/output.cc \
graph/manager/graph_context.cc \
graph/manager/graph_manager.cc \
graph/manager/graph_manager_utils.cc \
graph/manager/graph_mem_allocator.cc \
graph/manager/graph_caching_allocator.cc \
graph/manager/graph_var_manager.cc \
graph/manager/model_manager/event_manager.cc \
graph/manager/trans_var_data_utils.cc \
graph/manager/util/debug.cc \
graph/manager/util/hcom_util.cc \
graph/manager/util/rt_context_util.cc \
graph/manager/util/variable_accelerate_ctrl.cc \
graph/optimize/graph_optimize.cc \
graph/optimize/optimizer/allreduce_fusion_pass.cc \
graph/optimize/summary_optimize.cc \
graph/partition/engine_place.cc \
graph/partition/graph_partition.cc \
graph/passes/addn_pass.cc \
graph/passes/aicpu_constant_folding_pass.cc \
graph/passes/assert_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \
graph/partition/dynamic_shape_partition.cc \
graph/passes/base_pass.cc \
graph/passes/cast_remove_pass.cc \
graph/passes/cast_translate_pass.cc \
graph/passes/common_subexpression_elimination_pass.cc \
graph/passes/transop_symmetry_elimination_pass.cc \
graph/passes/compile_nodes_pass.cc \
graph/passes/constant_folding_pass.cc \
graph/passes/constant_fuse_same_pass.cc \
graph/passes/control_trigger_pass.cc \
graph/passes/dimension_adjust_pass.cc \
graph/passes/dimension_compute_pass.cc \
graph/passes/dropout_pass.cc \
graph/passes/hccl_group_pass.cc \
graph/passes/switch_fusion_pass.cc \
graph/passes/switch_split_pass.cc \
graph/passes/enter_pass.cc \
graph/passes/flow_ctrl_pass.cc \
host_kernels/transpose_kernel.cc \
host_kernels/add_kernel.cc \
host_kernels/broadcast_args_kernel.cc \
host_kernels/broadcast_gradient_args_kernel.cc \
host_kernels/cast_kernel.cc \
host_kernels/concat_offset_kernel.cc \
host_kernels/concat_v2_kernel.cc \
host_kernels/dynamic_stitch_kernel.cc \
host_kernels/empty_kernel.cc \
host_kernels/expanddims_kernel.cc \
host_kernels/fill_kernel.cc \
host_kernels/floordiv_kernel.cc \
host_kernels/floormod_kernel.cc \
host_kernels/gather_v2_kernel.cc \
host_kernels/greater_kernel.cc \
host_kernels/kernel_utils.cc \
host_kernels/maximum_kernel.cc \
host_kernels/mul_kernel.cc \
host_kernels/pack_kernel.cc \
host_kernels/permute_kernel.cc \
host_kernels/range_kernel.cc \
host_kernels/rank_kernel.cc \
host_kernels/reduce_prod_kernel.cc \
host_kernels/reshape_kernel.cc \
host_kernels/rsqrt_kernel.cc \
host_kernels/shape_kernel.cc \
host_kernels/shape_n_kernel.cc \
host_kernels/size_kernel.cc \
host_kernels/slice_d_kernel.cc \
host_kernels/slice_kernel.cc \
host_kernels/squeeze_kernel.cc \
host_kernels/unsqueeze_kernel.cc \
host_kernels/ssd_prior_box_kernel.cc \
host_kernels/strided_slice_kernel.cc \
host_kernels/sub_kernel.cc \
host_kernels/transdata_kernel.cc \
host_kernels/unpack_kernel.cc \
graph/passes/folding_pass.cc \
graph/passes/get_original_format_pass.cc \
graph/passes/guarantee_const_pass.cc \
graph/passes/hccl_memcpy_pass.cc \
graph/passes/identify_reference_pass.cc \
graph/passes/identity_pass.cc \
graph/passes/infershape_pass.cc \
graph/passes/isolated_op_remove_pass.cc \
graph/passes/iterator_op_pass.cc \
graph/passes/link_gen_mask_nodes_pass.cc \
graph/passes/merge_pass.cc \
graph/passes/multi_batch_pass.cc \
graph/passes/net_output_pass.cc \
graph/passes/next_iteration_pass.cc \
graph/passes/no_use_reshape_remove_pass.cc \
graph/passes/pass_manager.cc \
graph/passes/pass_utils.cc \
graph/passes/permute_pass.cc \
graph/passes/placeholder_with_default_pass.cc \
graph/passes/prevent_gradient_pass.cc \
graph/passes/print_op_pass.cc \
graph/passes/prune_pass.cc \
graph/passes/ctrl_edge_transfer_pass.cc \
graph/passes/replace_with_empty_const_pass.cc \
graph/passes/reshape_remove_pass.cc \
graph/passes/reshape_recovery_pass.cc \
graph/passes/resource_pair_add_control_pass.cc \
graph/passes/resource_pair_remove_control_pass.cc \
graph/passes/same_transdata_breadth_fusion_pass.cc \
graph/passes/save_pass.cc \
graph/passes/shape_operate_op_remove_pass.cc \
graph/passes/snapshot_pass.cc \
graph/passes/stop_gradient_pass.cc \
graph/passes/subgraph_pass.cc \
graph/passes/data_pass.cc \
graph/passes/switch_data_edges_bypass.cc \
graph/passes/switch_logic_remove_pass.cc \
graph/passes/switch_op_pass.cc \
graph/passes/switch_dead_branch_elimination.cc \
graph/passes/replace_transshape_pass.cc \
graph/passes/transop_breadth_fusion_pass.cc \
graph/passes/transop_depth_fusion_pass.cc \
graph/passes/transop_nearby_allreduce_fusion_pass.cc \
graph/passes/transop_without_reshape_fusion_pass.cc \
graph/passes/transpose_transdata_pass.cc \
graph/passes/unused_const_pass.cc \
graph/passes/unused_op_remove_pass.cc \
graph/passes/var_is_initialized_op_pass.cc \
graph/passes/parallel_concat_start_op_pass.cc \
graph/passes/cond_pass.cc \
graph/passes/cond_remove_pass.cc \
graph/passes/for_pass.cc \
graph/passes/variable_format_pass.cc \
graph/passes/variable_op_pass.cc \
graph/passes/variable_prepare_op_pass.cc \
graph/passes/variable_ref_delete_op_pass.cc \
graph/passes/variable_ref_useless_control_out_delete_pass.cc \
graph/preprocess/graph_preprocess.cc \
graph/preprocess/insert_op/ge_aipp_op.cc \
graph/preprocess/insert_op/util_insert_aipp_op.cc \
graph/preprocess/multi_batch_copy_graph.cc \
init/gelib.cc \
model/ge_model.cc \
model/ge_root_model.cc \
omm/csa_interact.cc \
opskernel_manager/ops_kernel_manager.cc \
session/inner_session.cc \
session/session_manager.cc \
single_op/single_op.cc \
single_op/single_op_manager.cc \
single_op/single_op_model.cc \
single_op/stream_resource.cc \
single_op/task/build_task_utils.cc \
single_op/task/op_task.cc \
single_op/task/tbe_task_builder.cc \
single_op/task/aicpu_task_builder.cc \
hybrid/common/tensor_value.cc \
hybrid/common/npu_memory_allocator.cc \
hybrid/executor/rt_callback_manager.cc \
hybrid/executor/node_state.cc \
hybrid/executor/node_done_manager.cc \
hybrid/executor/hybrid_profiler.cc \
hybrid/executor/hybrid_model_executor.cc \
hybrid/executor/hybrid_model_async_executor.cc \
hybrid/executor/hybrid_execution_context.cc \
hybrid/executor/worker/task_compile_engine.cc \
hybrid/executor/worker/shape_inference_engine.cc \
hybrid/executor/worker/execution_engine.cc \
hybrid/model/hybrid_model.cc \
hybrid/model/hybrid_model_builder.cc \
hybrid/model/node_item.cc \
hybrid/node_executor/aicore/aicore_node_executor.cc \
hybrid/node_executor/aicore/aicore_op_task.cc \
hybrid/node_executor/aicore/aicore_task_builder.cc \
hybrid/node_executor/aicore/aicore_task_compiler.cc \
hybrid/node_executor/aicpu/aicpu_ext_info.cc \
hybrid/node_executor/aicpu/aicpu_node_executor.cc \
hybrid/node_executor/compiledsubgraph/known_node_executor.cc \
hybrid/node_executor/hostcpu/ge_local_node_executor.cc \
hybrid/node_executor/node_executor.cc \
hybrid/node_executor/task_context.cc \
hybrid/hybrid_davinci_model.cc \
executor/ge_executor.cc \

LIBCLIENT_LOCAL_SRC_FILES := \
proto/ge_api.proto \
client/ge_api.cc \

RUNNER_LOCAL_C_INCLUDES := \
$(LOCAL_PATH) ./ \
$(LOCAL_PATH)/../ \
$(LOCAL_PATH)/../../ \
$(TOPDIR)inc \
$(TOPDIR)inc/common \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/framework/common \
$(TOPDIR)inc/graph \
$(TOPDIR)inc/runtime \
$(TOPDIR)libc_sec/include \
$(TOPDIR)ops/built-in/op_proto/inc \
proto/fwk_adapter.proto \
proto/ge_ir.proto \
proto/insert_op.proto \
proto/om.proto \
proto/op_mapping_info.proto \
proto/task.proto \
proto/tensorflow/attr_value.proto \
proto/tensorflow/function.proto \
proto/tensorflow/graph.proto \
proto/tensorflow/node_def.proto \
proto/tensorflow/op_def.proto \
proto/tensorflow/resource_handle.proto \
proto/tensorflow/tensor.proto \
proto/tensorflow/tensor_shape.proto \
proto/tensorflow/types.proto \
proto/tensorflow/versions.proto \
third_party/json/include \
third_party/opencv/include \
third_party/protobuf/include \



#compiler for GeRunner
include $(CLEAR_VARS)

LOCAL_MODULE := libge_runner

LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif


LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

LOCAL_STATIC_LIBRARIES := libge_memory \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \
libmmpa \
libgraph \
libregister \
libge_common \
libhccl \
libmsprof \
liberror_manager \


LOCAL_LDFLAGS := -lrt -ldl

LOCAL_SHARED_LIBRARIES += \
libruntime \
libresource \

include $(BUILD_HOST_SHARED_LIBRARY)


# add engine_conf.json to host
include $(CLEAR_VARS)

LOCAL_MODULE := engine_conf.json

LOCAL_SRC_FILES := engine_manager/engine_conf.json

LOCAL_MODULE_CLASS := ETC

LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json
include $(BUILD_HOST_PREBUILT)

# add optimizer_priority.pbtxt to host
include $(CLEAR_VARS)

LOCAL_MODULE := optimizer_priority.pbtxt

LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt

LOCAL_MODULE_CLASS := ETC

LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt
include $(BUILD_HOST_PREBUILT)

#compiler for GeRunner static lib
include $(CLEAR_VARS)

LOCAL_MODULE := libge_runner

LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD

LOCAL_CFLAGS += -g -O0


LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

LOCAL_STATIC_LIBRARIES := libge_memory \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libslog \
libmmpa \
libhccl \
libmsprof \

LOCAL_LDFLAGS := -lrt -ldl

LOCAL_SHARED_LIBRARIES += \
libruntime \
libresource \

include $(BUILD_HOST_STATIC_LIBRARY)

#compiler for GeRunner static lib device
include $(CLEAR_VARS)

LOCAL_MODULE := libge_runner

LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD

LOCAL_CFLAGS += -g -O0

LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)

LOCAL_STATIC_LIBRARIES := libge_memory \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libslog \
libmmpa \
libhccl \
libmsprof \

LOCAL_LDFLAGS := -lrt -ldl

LOCAL_SHARED_LIBRARIES += \
libruntime \
libresource \

include $(BUILD_STATIC_LIBRARY)

+ 333
- 0
src/ge/ge_train.mk View File

@@ -0,0 +1,333 @@
LOCAL_PATH := $(call my-dir)

COMMON_LOCAL_SRC_FILES := \
proto/fusion_model.proto \
proto/optimizer_priority.proto \
session/inner_session.cc \
session/session_manager.cc \
common/ge/plugin_manager.cc\
common/fp16_t.cc \
common/formats/utils/formats_trans_utils.cc \
common/formats/format_transfers/datatype_transfer.cc \
common/formats/format_transfers/format_transfer_transpose.cc \
common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_fractal_z.cc \
common/formats/format_transfers/format_transfer_fractal_nz.cc \
common/formats/format_transfers/format_transfer_fractal_zz.cc \
common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \
common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \
common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \
common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \
common/formats/format_transfers/format_transfer_fracz_nchw.cc \
common/formats/format_transfers/format_transfer_fracz_nhwc.cc \
common/formats/format_transfers/format_transfer_fracz_hwcn.cc \
common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \
common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \
common/formats/formats.cc \
init/gelib.cc \
engine_manager/dnnengine_manager.cc \
opskernel_manager/ops_kernel_manager.cc \
graph/manager/graph_manager.cc \
graph/manager/graph_manager_utils.cc \
graph/manager/graph_context.cc \
graph/preprocess/graph_preprocess.cc \
graph/preprocess/multi_batch_copy_graph.cc \
graph/execute/graph_execute.cc \
graph/load/graph_loader.cc \
graph/optimize/graph_optimize.cc \
graph/passes/folding_pass.cc \
graph/optimize/summary_optimize.cc \
graph/build/graph_builder.cc \
graph/partition/engine_place.cc \
graph/partition/graph_partition.cc \
graph/partition/dynamic_shape_partition.cc \
generator/ge_generator.cc \
generator/generator_api.cc \
common/profiling/profiling_manager.cc \
ge_local_engine/engine/host_cpu_engine.cc \
common/helper/model_cache_helper.cc \

OMG_HOST_SRC_FILES := \
model/ge_model.cc \
model/ge_root_model.cc \
graph/common/transop_util.cc \
graph/manager/graph_var_manager.cc \
graph/manager/trans_var_data_utils.cc \
omm/csa_interact.cc \
graph/passes/pass_manager.cc \
graph/passes/pass_utils.cc \
graph/passes/base_pass.cc \
graph/passes/resource_pair_add_control_pass.cc \
graph/passes/resource_pair_remove_control_pass.cc \
graph/passes/constant_folding_pass.cc \
graph/passes/aicpu_constant_folding_pass.cc \
graph/passes/reshape_remove_pass.cc \
graph/passes/reshape_recovery_pass.cc \
graph/passes/transop_breadth_fusion_pass.cc \
graph/passes/transop_depth_fusion_pass.cc \
graph/passes/same_transdata_breadth_fusion_pass.cc \
graph/passes/transop_without_reshape_fusion_pass.cc \
graph/passes/compile_nodes_pass.cc \
graph/passes/transop_nearby_allreduce_fusion_pass.cc \
graph/passes/variable_prepare_op_pass.cc \
graph/passes/variable_ref_delete_op_pass.cc \
graph/passes/variable_ref_useless_control_out_delete_pass.cc \
graph/passes/variable_op_pass.cc \
graph/passes/cast_remove_pass.cc \
graph/passes/replace_transshape_pass.cc \
graph/passes/transpose_transdata_pass.cc \
graph/passes/identify_reference_pass.cc \
graph/passes/variable_format_pass.cc \
graph/passes/subgraph_pass.cc \
graph/passes/data_pass.cc \
graph/passes/net_output_pass.cc \
graph/passes/constant_fuse_same_pass.cc \
graph/passes/print_op_pass.cc \
graph/passes/no_use_reshape_remove_pass.cc \
graph/passes/iterator_op_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \
graph/optimize/optimizer/allreduce_fusion_pass.cc \
graph/common/omg_util.cc \
graph/common/bcast.cc \
graph/passes/dimension_compute_pass.cc \
graph/passes/dimension_adjust_pass.cc \
graph/passes/get_original_format_pass.cc \
graph/passes/shape_operate_op_remove_pass.cc \
graph/passes/unused_op_remove_pass.cc \
graph/passes/assert_pass.cc \
graph/passes/dropout_pass.cc \
graph/passes/infershape_pass.cc \
graph/passes/unused_const_pass.cc \
graph/passes/isolated_op_remove_pass.cc \
graph/passes/permute_pass.cc \
graph/passes/ctrl_edge_transfer_pass.cc \
host_kernels/broadcast_gradient_args_kernel.cc \
host_kernels/greater_kernel.cc \
host_kernels/gather_v2_kernel.cc \
host_kernels/maximum_kernel.cc \
host_kernels/floormod_kernel.cc \
host_kernels/floordiv_kernel.cc \
host_kernels/range_kernel.cc \
host_kernels/shape_kernel.cc \
host_kernels/size_kernel.cc \
host_kernels/shape_n_kernel.cc \
host_kernels/rank_kernel.cc \
host_kernels/broadcast_args_kernel.cc \
host_kernels/fill_kernel.cc \
host_kernels/empty_kernel.cc \
host_kernels/expanddims_kernel.cc \
host_kernels/reshape_kernel.cc \
host_kernels/squeeze_kernel.cc \
host_kernels/kernel_utils.cc \
host_kernels/cast_kernel.cc \
host_kernels/transdata_kernel.cc \
host_kernels/transpose_kernel.cc \
host_kernels/permute_kernel.cc \
host_kernels/pack_kernel.cc \
host_kernels/concat_v2_kernel.cc \
host_kernels/concat_offset_kernel.cc \
host_kernels/strided_slice_kernel.cc \
host_kernels/ssd_prior_box_kernel.cc \
host_kernels/add_kernel.cc \
host_kernels/unpack_kernel.cc \
host_kernels/sub_kernel.cc \
host_kernels/mul_kernel.cc \
host_kernels/reduce_prod_kernel.cc \
host_kernels/rsqrt_kernel.cc \
host_kernels/slice_kernel.cc \
host_kernels/slice_d_kernel.cc \
host_kernels/dynamic_stitch_kernel.cc \
graph/passes/stop_gradient_pass.cc \
graph/passes/prevent_gradient_pass.cc \
graph/passes/identity_pass.cc \
graph/passes/placeholder_with_default_pass.cc \
graph/passes/snapshot_pass.cc \
graph/passes/guarantee_const_pass.cc \
graph/passes/var_is_initialized_op_pass.cc \
graph/passes/parallel_concat_start_op_pass.cc \
graph/passes/cast_translate_pass.cc \
graph/passes/addn_pass.cc \
graph/passes/common_subexpression_elimination_pass.cc \
graph/passes/transop_symmetry_elimination_pass.cc \
graph/passes/save_pass.cc \
graph/passes/switch_dead_branch_elimination.cc \
graph/passes/merge_pass.cc \
graph/passes/prune_pass.cc \
graph/passes/flow_ctrl_pass.cc \
graph/passes/control_trigger_pass.cc \
graph/passes/switch_data_edges_bypass.cc \
graph/passes/switch_op_pass.cc \
graph/passes/multi_batch_pass.cc \
graph/passes/switch_logic_remove_pass.cc \
graph/passes/next_iteration_pass.cc \
graph/passes/cond_pass.cc \
graph/passes/cond_remove_pass.cc \
graph/passes/for_pass.cc \
graph/passes/enter_pass.cc \
graph/passes/hccl_memcpy_pass.cc \
graph/passes/link_gen_mask_nodes_pass.cc \
graph/passes/replace_with_empty_const_pass.cc \
graph/passes/hccl_group_pass.cc \

OME_SRC_FILES := \
graph/manager/graph_mem_allocator.cc \
graph/manager/graph_caching_allocator.cc \
graph/manager/model_manager/event_manager.cc \
graph/manager/util/debug.cc \
graph/manager/util/rt_context_util.cc \
graph/manager/util/variable_accelerate_ctrl.cc \
graph/manager/util/hcom_util.cc \
graph/load/new_model_manager/model_manager.cc \
graph/load/new_model_manager/data_inputer.cc \
graph/load/new_model_manager/davinci_model.cc \
graph/load/new_model_manager/davinci_model_parser.cc \
graph/load/new_model_manager/model_utils.cc \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/output/output.cc \
graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \
graph/load/new_model_manager/task_info/event_wait_task_info.cc \
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \
graph/load/new_model_manager/task_info/hccl_task_info.cc \
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \
graph/load/new_model_manager/task_info/kernel_task_info.cc \
graph/load/new_model_manager/task_info/label_set_task_info.cc \
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \
graph/load/new_model_manager/task_info/stream_active_task_info.cc \
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \
graph/load/new_model_manager/task_info/end_graph_task_info.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \
single_op/task/op_task.cc \
single_op/task/build_task_utils.cc \
single_op/task/tbe_task_builder.cc \
single_op/task/aicpu_task_builder.cc \
single_op/single_op.cc \
single_op/single_op_model.cc \
single_op/stream_resource.cc \
single_op/single_op_manager.cc \
hybrid/hybrid_davinci_model_stub.cc \


COMMON_LOCAL_C_INCLUDES := \
proto/om.proto \
proto/task.proto \
proto/insert_op.proto \
proto/ge_ir.proto \
proto/fwk_adapter.proto \
proto/op_mapping_info.proto \
proto/tensorflow/attr_value.proto \
proto/tensorflow/function.proto \
proto/tensorflow/graph.proto \
proto/tensorflow/node_def.proto \
proto/tensorflow/op_def.proto \
proto/tensorflow/resource_handle.proto \
proto/tensorflow/tensor.proto \
proto/tensorflow/tensor_shape.proto \
proto/tensorflow/types.proto \
proto/tensorflow/versions.proto \
$(LOCAL_PATH) ./ \
$(TOPDIR)inc \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/framework/common \
$(TOPDIR)inc/runtime \
$(TOPDIR)libc_sec/include \
$(TOPDIR)ops/built-in/op_proto/inc \
third_party/json/include \
third_party/protobuf/include \
third_party/opencv/include \

NEW_OMG_HOST_SRC_FILES := \
graph/preprocess/insert_op/util_insert_aipp_op.cc \
graph/preprocess/insert_op/ge_aipp_op.cc \
graph/build/model_builder.cc \
graph/build/task_generator.cc \
graph/build/stream_allocator.cc \
graph/build/logical_stream_allocator.cc \
graph/build/stream_graph_optimizer.cc \
graph/build/run_context.cc \
graph/build/label_allocator.cc \
graph/label/label_maker.cc \
graph/label/if_label_maker.cc \
graph/label/case_label_maker.cc \
graph/label/while_label_maker.cc \
graph/label/partitioned_call_label_maker.cc \



#compiler for host train
include $(CLEAR_VARS)

LOCAL_MODULE := libge_train

LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
LOCAL_CFLAGS += -DDAVINCI_CLOUD -DDAVINCI_TRAIN -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING
LOCAL_CFLAGS += -DFMK_SUPPORT_DEBUG
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif

LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)

LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES)
LOCAL_SRC_FILES += $(OME_SRC_FILES)
LOCAL_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES)

LOCAL_STATIC_LIBRARIES := libge_memory \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libslog \
libmmpa \
libgraph \
libregister \
libge_common \
libhccl \
libmsprof \


LOCAL_LDFLAGS := -lrt -ldl

LOCAL_SHARED_LIBRARIES += \
libruntime \
libresource \

include $(BUILD_HOST_SHARED_LIBRARY)

# add engine_conf.json to host
include $(CLEAR_VARS)

LOCAL_MODULE := engine_conf.json

LOCAL_SRC_FILES := engine_manager/engine_conf.json

LOCAL_MODULE_CLASS := ETC

LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json
include $(BUILD_HOST_PREBUILT)

# add optimizer_priority.pbtxt to host
include $(CLEAR_VARS)

LOCAL_MODULE := optimizer_priority.pbtxt

LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt

LOCAL_MODULE_CLASS := ETC

LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt
include $(BUILD_HOST_PREBUILT)

+ 75
- 22
src/ge/generator/ge_generator.cc View File

@@ -22,10 +22,13 @@
#include "common/util.h"
#include "framework/common/debug/ge_log.h"
#include "ge/ge_api.h"
#include "graph/ge_context.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/manager/graph_manager.h"
#include "graph/manager/util/rt_context_util.h"
#include "graph/opsproto_manager.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/type_utils.h"
#include "model/ge_model.h"
#include "init/gelib.h"

@@ -108,7 +111,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi
return FAILED;
}

static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index,
static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index,
bool attr) {
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
@@ -122,6 +125,17 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
if (data_op == nullptr) {
return FAILED;
}
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
auto input_desc = op_desc->MutableInputDesc(index);
GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID);
ge::Format old_format = input_desc->GetFormat();
if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
input_desc->SetFormat(FORMAT_ND);
input_desc->SetOriginFormat(FORMAT_ND);
(void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format));
(void)AttrUtils::SetBool(data_op, "_is_single_op", true);
}

GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail.");
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail.");
@@ -139,10 +153,21 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
}

static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, const vector<GeTensor> &outputs) {
OpDescPtr op_desc = MakeShared<ge::OpDesc>(NODE_NAME_NET_OUTPUT, NETOUTPUT);
OpDescPtr op_desc = MakeShared<ge::OpDesc>(graph->GetName() + "_" + NODE_NAME_NET_OUTPUT, NETOUTPUT);
if (op_desc == nullptr) {
return FAILED;
}
auto single_op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID);
auto output_desc = single_op_desc->MutableOutputDesc(0);
GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID);
ge::Format old_format = output_desc->GetFormat();
if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
output_desc->SetFormat(FORMAT_ND);
output_desc->SetOriginFormat(FORMAT_ND);
(void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format));
(void)AttrUtils::SetBool(op_desc, "_is_single_op", true);
}
int32_t count = 0;
for (const auto &out_desc : outputs) {
GeTensorDesc tensor = out_desc.GetTensorDesc();
@@ -187,6 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) {
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}

static string GetModelNameFromFileName(const string &file_name_prefix) {
int start_position = 0;
// using output as model_name (ignore ".om")
int filename_suffixes = 3;
if (file_name_prefix.find_last_of('/') != string::npos) {
start_position += 1;
}
int end_position = file_name_prefix.length() - filename_suffixes;
string model_name = file_name_prefix.substr(start_position, end_position - start_position);
GELOGI("Get model_name from file, model_name:%s", model_name.c_str());
return model_name;
}

class GeGenerator::Impl {
public:
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models);
@@ -278,24 +316,28 @@ Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) {
}
return ret;
}
GELOGI("GenerateInfershapeGraph success.");
GELOGI("Generate infer shape graph success");
return SUCCESS;
}

Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
ModelBufferData &model, bool is_offline) {
rtContext_t ctx = nullptr;
auto rt = rtCtxGetCurrent(&ctx);
if (rt != RT_ERROR_NONE) {
GELOGW("Current ctx is null.");
} else {
ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx);
}
GraphId graph_id;
GeRootModelPtr ge_root_model = nullptr;
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
// using output as model_name (ignore ".om")
int start_position = file_name_prefix.find_last_of('/') + 1;
int end_position = file_name_prefix.length() - 3;
const string model_name = file_name_prefix.substr(start_position, end_position - start_position);
const string model_name = GetModelNameFromFileName(file_name_prefix);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!");
impl_->is_offline_ = is_offline;
Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model);
if (ret != SUCCESS) {
GELOGE(ret, "Build model failed");
GELOGE(ret, "Build model failed.");
if (impl_->graph_manager_.Finalize() != SUCCESS) {
GELOGE(FAILED, "graph_manager finalize fail.");
}
@@ -316,6 +358,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
}
return ret;
}

if (RtContextUtil::GetInstance().GetNormalModeContext() != nullptr) {
(void)rtCtxSetCurrent(RtContextUtil::GetInstance().GetNormalModeContext());
}

GELOGI("GenerateOfflineModel success.");
return SUCCESS;
}
@@ -325,11 +372,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
bool is_offline) {
GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) {
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size:%zu", inputs.size(), op_desc->GetInputsSize());
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize());
return PARAM_INVALID;
}
if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) {
GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size:%zu", outputs.size(), op_desc->GetOutputsSize());
GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size: %zu", outputs.size(), op_desc->GetOutputsSize());
return PARAM_INVALID;
}

@@ -368,7 +415,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
}
} else {
for (const auto &in_desc : inputs) {
const GeTensorDesc input_desc = in_desc.GetTensorDesc();
GeTensorDesc input_desc = in_desc.GetTensorDesc();
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true));
arg_index++;
}
@@ -382,7 +429,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
// dump ComputeGraph.
compute_graph->Dump();
Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph);
GELOGI("ATC parser success in single op schedule.");
GELOGI("ATC parser success in single op build.");

GraphId graph_id;
GeRootModelPtr ge_root_model = nullptr;
@@ -394,7 +441,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GELOGD("The opType in op_desc_tmp is: %s", op_desc_tmp->GetType().c_str());
GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff));
return SUCCESS;
@@ -411,7 +458,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
*/
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, const string &model_file_name) {
GELOGI("Start to Build Single Op Offline Model.");
GELOGI("Start to build single op offline model.");
ModelBufferData model_buff;
OpEngineType engine_type = ENGINE_SYS;
return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
@@ -430,7 +477,7 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, OpEngineType engine_type,
ModelBufferData &model_buff) {
GELOGI("Start to Build Single Op Online");
GELOGI("Start to build single op online");
return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
}

@@ -449,7 +496,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &
model_helper.SetSaveMode(is_offline_);
Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff);
if (ret != SUCCESS) {
GELOGE(ret, "Save to Om model failed");
GELOGE(ret, "Save to om model failed");
return ret;
}
return SUCCESS;
@@ -461,16 +508,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
const std::map<std::string, std::string> options;
Status ret = graph_manager_.AddGraph(id, graph, options);
if (ret != SUCCESS) {
GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, id: %u", id);
GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph fail, graph id: %u", id);
(void)graph_manager_.Finalize();
return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED;
}

GELOGI("models inputs.size()=%zu", inputs.size());
GELOGI("Model inputs size is %zu", inputs.size());
graph_manager_.SetOptionsRunGraphFlag(false);
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model);
struct timeval tv;
if (gettimeofday(&tv, nullptr) != 0) {
GELOGE(INTERNAL_ERROR, "get the time of day failed.");
return INTERNAL_ERROR;
}
uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id);
if (ret != SUCCESS) {
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph failed, id: %u", id);
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id);
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
}

@@ -485,14 +538,14 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g
const std::map<std::string, std::string> options;
Status ret = graph_manager_.AddGraph(id, graph, options);
if (ret != SUCCESS) {
GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "graphManager add graph failed, id: %u", id);
GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, graph id: %u", id);
(void)graph_manager_.Finalize();
return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED;
}

ret = graph_manager_.GenerateInfershapeGraph(id);
if (ret != SUCCESS) {
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager BuildGraph failed, id: %u", id);
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed");
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
}



+ 41
- 60
src/ge/graph/build/memory/block_mem_assigner.cc View File

@@ -160,10 +160,10 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block) {
parent->child_offset_ += child->block_size_;
child->deleted_block_ = true;
GELOGI(
"Add block stream id:%ld [size:%zu, life time[begin:%zu, end:%zu]] to"
" block[size:%zu, life time[begin:%zu, end:%zu]]",
stream_id_, child->block_size_, child->GetLifeBegin(), child->GetLifeEnd(), parent->block_size_,
parent->GetLifeBegin(), parent->GetLifeEnd());
"Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to"
" block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]",
child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent,
parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd());
}
}

@@ -499,17 +499,17 @@ void BlockMemAssigner::InitReuseFlag() {
bool pre_reuse_flag = true;
bool post_reuse_flag = true;
for (auto &node_index_io : pair.second) {
if (node_index_io.io_type == kIn) {
if (node_index_io.io_type_ == kIn) {
continue;
}

OutDataAnchorPtr out_anchor = node_index_io.node->GetOutDataAnchor(node_index_io.index);
OutDataAnchorPtr out_anchor = node_index_io.node_->GetOutDataAnchor(node_index_io.index_);
if (out_anchor == nullptr) {
continue;
}

bool out_flg = false;
if (node_index_io.node->GetOutDataNodes().empty()) {
if (node_index_io.node_->GetOutDataNodes().empty()) {
out_flg = true;
}
for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
@@ -643,7 +643,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
CanReuseByStream(map_iter->second, *reusable_block)) {
GELOGD("Cross stream mem reuse, target stream:%ld, current stream:%ld", reusable_block->stream_id_,
stream_id);
reusable_block->AddNodeTypeIndex({n, mem_type, out_index}, real_size, no_align_size);
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size);
if (mem_type == kOutput) {
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString());
if (iter != anchor_to_symbol_.end()) {
@@ -660,7 +660,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
}
}

auto block = new (std::nothrow) MemoryBlock(block_size, is_reuse_memory);
auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed.");

// Data and netoutput need zero copy block
@@ -688,7 +688,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
auto node_op_desc = n->GetOpDesc();
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");
MemoryBlock *block = nullptr;
NodeIndexIO node_index_io = NodeIndexIO(n, index, kOut);
NodeIndexIO node_index_io(n, index, kOut);
int64_t size = 0;
auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
if (output_op_desc != nullptr) {
@@ -701,7 +701,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
if (IsSymbolExist(node_index_io)) {
std::string symbol = anchor_to_symbol_[node_index_io.ToString()];
block = symbol_blocks_[symbol];
block->AddNodeTypeIndex({n, kOutput, index}, size, no_align_size);
block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size);
block->ref_count_++;
} else {
int64_t max_size = size;
@@ -749,7 +749,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
GELOGI("Get dst_reuse_input_index failed"));
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
block->AddNodeTypeIndex({owner_node, kOutput, i}, block->Size(), block->Size());
block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size());
out_count_reuse_input += 1;
reuse_input = true;
}
@@ -775,31 +775,6 @@ bool IsOutputBlock(const ge::InDataAnchorPtr &in_data_anchor) {
return false;
}

// current node's output uses previous node's output memory
bool IsReferencePreviousNodeOutputMemory(const ge::NodePtr &node, uint32_t output_index) {
// Get the reference type of the node, default is false
bool is_ref = false;
// If GetBool fail, is_ref is false.
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
return false;
}
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref);
if (!is_ref) {
return false;
}
const string &output_name = op_desc->GetOutputNameByIndex(output_index);
for (const auto &input_name : op_desc->GetAllInputNames()) {
if (!input_name.empty() && output_name == input_name) {
int input_index = op_desc->GetInputIndexByName(input_name);
GELOGI("Reference memory:name[%s] output[%s][%u] ref to input[%s][%d] ", op_desc->GetName().c_str(),
output_name.c_str(), output_index, input_name.c_str(), input_index);
return true;
}
}
return false;
}

// atomic out memory will be reassigned
bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool is_atomic,
bool out_node_set_continuous_input) {
@@ -920,58 +895,57 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_
}

Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) {
auto node_op_desc = node->GetOpDesc();
int64_t stream_id = node_op_desc->GetStreamId();
auto op_desc = node->GetOpDesc();
int64_t stream_id = op_desc->GetStreamId();
vector<int64_t> memorys_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type);
GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", node_op_desc->GetName().c_str(),
node_op_desc->GetOutputsSize(), memorys_type.size());
if (has_mem_type_attr && (memorys_type.size() != node_op_desc->GetOutputsSize())) {
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type);
GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", op_desc->GetName().c_str(),
op_desc->GetOutputsSize(), memorys_type.size());
if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) {
GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]",
node_op_desc->GetName().c_str(), node_op_desc->GetOutputsSize(), memorys_type.size());
op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size());
return INTERNAL_ERROR;
}

is_op_reuse_mem_ = true;
if (op_reuse_env_valid_ == true) {
vector<string>::iterator it_name =
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetName());
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName());
vector<string>::iterator it_type =
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetType());
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetType());
GE_IF_BOOL_EXEC(it_name != op_no_reuse_mem_vec_.end() || it_type != op_no_reuse_mem_vec_.end(),
is_op_reuse_mem_ = false;);
}

bool is_atomic = false;
// If GetBool fail, is_atomic is false.
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
(void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
// Allocate memory for the current node and release node memory of the same size in the workspace
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_);)
for (uint32_t i = 0; i < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); i++) {
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
int64_t size = 0;
auto output_op_desc = node_op_desc->GetOutputDescPtr(i);
auto output_op_desc = op_desc->GetOutputDescPtr(i);
if (output_op_desc != nullptr) {
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
}
// fusion: other type's size not means malloc HBM memory
bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1;
if (l1_flag) {
GELOGI("fusion: node[%s], output[%s], output memory type [%d]", node_op_desc->GetName().c_str(),
node_op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
GELOGI("fusion: node[%s], output[%s], output memory type [%d]", op_desc->GetName().c_str(),
op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
size = 0;
}
std::string peer_name;
uint32_t peer_input_index = 0;
bool out_node_set_continuous_input = false;
bool no_need_assign_memory =
((size == 0) || CheckIsZeroMemNodeType(node->GetType()) || IsReferencePreviousNodeOutputMemory(node, i));
bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType()));
if (!no_need_assign_memory) {
out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index);
no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);
}
if (no_need_assign_memory) {
zero_memory_list_.emplace_back(node, kOutput, i);
zero_memory_list_.emplace_back(node, kOutput, i, false);
continue;
}
// atomic can't be reused
@@ -1049,7 +1023,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
workspace_skip_flag = true;
}
if (temp[i] == 0 || workspace_skip_flag) {
zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i));
zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false);
continue;
}
MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast<size_t>(temp[i]), ranges),
@@ -1067,7 +1041,9 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
(void)mem_block; // Fix warning
}

GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), MergeDynamicBatchBlocks();)
bool merge_dynamic_batch = false;
GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();)
GE_IF_BOOL_EXEC(!merge_dynamic_batch, ReuseBlocksByLifeTime();)
AssignContinuousBlocks();
ResizeMemoryBlocks();

@@ -1131,7 +1107,8 @@ void MergeBlocks(std::vector<MemoryBlock *> &dest, std::vector<MemoryBlock *> &s
}
}

void BlockMemAssigner::MergeDynamicBatchBlocks() {
bool BlockMemAssigner::MergeDynamicBatchBlocks() {
bool merged = false;
std::map<std::string, std::vector<MemoryBlock *>> dynamic_batch_blocks;
for (auto block : memory_blocks_) {
if (block == nullptr) {
@@ -1160,8 +1137,10 @@ void BlockMemAssigner::MergeDynamicBatchBlocks() {
if (it != it_max) {
GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str());
MergeBlocks(it_max->second, it->second);
merged = true;
}
}
return merged;
}

// asending order
@@ -1331,9 +1310,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz
}
GELOGI(
"[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]"
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d].",
" noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d] isref[%d].",
graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset,
op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block);
op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block,
node_type.ref_input);
}

void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) {
@@ -1528,6 +1508,7 @@ void BlockMemAssigner::FindDependentStreamBetweenGraphs(const NodePtr &pre_node,
bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
(node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) ||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT);
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
(node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE);
}
} // namespace ge

+ 11
- 8
src/ge/graph/build/memory/block_mem_assigner.h View File

@@ -23,6 +23,7 @@
#include <unordered_set>
#include <utility>
#include <vector>
#include <list>
#include "common/ge_inner_error_codes.h"
#include "common/types.h"
#include "common/util.h"
@@ -36,13 +37,14 @@ const size_t kMaxLifeTime = 0xffffffff;
enum MemoryType { kOutput, kWorkspace };

struct NodeTypeIndex {
NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index)
: node(std::move(node)), mem_type(mem_type), index(index) {}
NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index, bool ref_input = false)
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {}

ge::NodePtr node = nullptr;
MemoryType mem_type = kOutput;
uint32_t index = 0;
size_t life_time_end = kMaxLifeTime;
bool ref_input = false;
const string GetMemType() const {
if (mem_type == kOutput) {
return "output";
@@ -55,9 +57,9 @@ struct NodeTypeIndex {

class MemoryBlock {
public:
explicit MemoryBlock(size_t block_size, bool reuse_mem = true)
explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true)
: ref_count_(0),
stream_id_(0),
stream_id_(stream_id),
deleted_block_(false),
reuse_mem_(reuse_mem),
input_index_(0),
@@ -81,7 +83,7 @@ class MemoryBlock {
void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) {
real_size_list_.emplace_back(real_size);
no_align_size_list_.emplace_back(no_align_size);
node_type_index_list_.emplace_back(node, type, out_index);
node_type_index_list_.emplace_back(node, type, out_index, false);
}
size_t Size() const { return block_size_; }

@@ -129,6 +131,7 @@ class MemoryBlock {
bool continuous_block_;
bool last_continuous_block_;
bool is_zero_copy_;
std::map<int64_t, size_t> depend_stream_life_;

private:
size_t block_size_;
@@ -287,7 +290,7 @@ class BlockMemAssigner : public MemAssigner {
std::vector<NodeTypeIndex> zero_memory_list_;

// ref mapping
std::map<std::string, std::vector<NodeIndexIO>> symbol_to_anchors_;
std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors_;
std::map<std::string, std::string> anchor_to_symbol_;
std::map<std::string, bool> pre_reuse_flag_;
std::map<std::string, bool> post_reuse_flag_;
@@ -371,10 +374,10 @@ class BlockMemAssigner : public MemAssigner {
///
/// @ingroup GE
/// @brief Merge memory blocks between different batchs
/// @return void
/// @return merge or not
/// @author
///
void MergeDynamicBatchBlocks();
bool MergeDynamicBatchBlocks();

void AssignContinuousBlocks();



+ 98
- 0
src/ge/graph/build/memory/module.mk View File

@@ -0,0 +1,98 @@
LOCAL_PATH := $(call my-dir)


local_lib_src_files := memory_assigner.cc \
graph_mem_assigner.cc \
binary_block_mem_assigner.cc \
block_mem_assigner.cc \
hybrid_mem_assigner.cc \
max_block_mem_assigner.cc \
var_mem_assign_util.cc \

local_lib_inc_path := ${LOCAL_PATH} \
${TOPDIR}inc \
${TOPDIR}inc/external \
${TOPDIR}inc/external/graph \
$(TOPDIR)libc_sec/include \
${TOPDIR}third_party/protobuf/include \
${TOPDIR}inc/framework \
$(TOPDIR)framework/domi \

#compiler for host
include $(CLEAR_VARS)
LOCAL_MODULE := libge_memory

LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -O2
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif

LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
libc_sec \
libslog \
libgraph \
libge_common \

LOCAL_SRC_FILES := $(local_lib_src_files)

generated_sources_dir := $(call local-generated-sources-dir)
LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH)
LOCAL_C_INCLUDES := $(local_lib_inc_path)
LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS

include ${BUILD_HOST_STATIC_LIBRARY}


#compiler for device
include $(CLEAR_VARS)
LOCAL_MODULE := libge_memory

LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY
LOCAL_CFLAGS += -O2
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
libc_sec \
libslog \
libgraph \
libge_common \

LOCAL_SRC_FILES := $(local_lib_src_files)

generated_sources_dir := $(call local-generated-sources-dir)
LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH)
LOCAL_C_INCLUDES := $(local_lib_inc_path)
LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS

include ${BUILD_STATIC_LIBRARY}

#compiler for device
include $(CLEAR_VARS)
LOCAL_MODULE := libge_memory

LOCAL_CFLAGS += -std=c++11
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
libc_sec \
libslog \
libgraph \
libge_common \

LOCAL_SRC_FILES := $(local_lib_src_files)

generated_sources_dir := $(call local-generated-sources-dir)
LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH)
LOCAL_C_INCLUDES := $(local_lib_inc_path)
LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS

include ${BUILD_LLT_STATIC_LIBRARY}

+ 32
- 5
src/ge/graph/build/model_builder.cc View File

@@ -18,6 +18,7 @@
#include <iostream>
#include <set>
#include <unordered_map>
#include <securectype.h>
#include "common/ge/ge_util.h"
#include "framework/common/debug/ge_log.h"
#include "graph/anchor.h"
@@ -250,7 +251,7 @@ Status ModelBuilder::SetInputOutputDesc() {
}
// if user set input node format ND, the expected node for data and netoutput format is ND in
// final graph.
if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) &&
if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) &&
((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) {
GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str());
auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr();
@@ -521,11 +522,37 @@ Status ModelBuilder::MergeWeights() {
}
if (weight_data.data() != nullptr) {
GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED);
GE_CHK_BOOL_EXEC(
memcpy_s(base_addr + offset, weight_offset_ - offset, weight_data.data(), weight_data.size()) == EOK,
return FAILED, "call memcpy_s failed.");
if (weight_offset_ - offset < weight_data.size()) {
GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", weight_offset_ - offset,
weight_data.size());
return FAILED;
}
uintptr_t dst_ptr = (uintptr_t)base_addr + offset;
uintptr_t src_ptr = (uintptr_t)weight_data.data();
size_t left_size = weight_data.size();
while (left_size > SECUREC_MEM_MAX_LEN) {
auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast<void *>(src_ptr),
SECUREC_MEM_MAX_LEN);
if (err != EOK) {
GELOGE(FAILED,
"mem copy failed. errret:%u, "
"dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu",
err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
return FAILED;
}
left_size -= SECUREC_MEM_MAX_LEN;
dst_ptr = dst_ptr + SECUREC_MEM_MAX_LEN;
src_ptr = src_ptr + SECUREC_MEM_MAX_LEN;
}
auto err = memcpy_s(reinterpret_cast<void *>(dst_ptr), left_size, reinterpret_cast<void *>(src_ptr), left_size);
if (err != EOK) {
GELOGE(FAILED,
"mem copy failed. errret:%u, "
"dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu",
err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN);
return FAILED;
}
}

weight_data.clear();
}



+ 16
- 6
src/ge/graph/build/stream_allocator.cc View File

@@ -683,7 +683,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id);
return FAILED;
}
stream_node_num_vec[stream_id]++;
AddNodeNum(cur_node, stream_node_num_vec[stream_id]);
stream_2_nodes_map[stream_id].push_back(cur_node);
// The maximum number of tasks per stream.
int64_t max_node_num_one_stream = GetMaxNodeNumPerStream(cur_node, max_task_count);
@@ -706,7 +706,8 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
"It's time to split the stream, split newly-added stream id is %ld",
stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id);
NodePtr pre_node = pre_node_vec[stream_id];
stream_node_num_vec[stream_id] = 1;
stream_node_num_vec[stream_id] = 0;
AddNodeNum(cur_node, stream_node_num_vec[stream_id]);
// try spilt a new stream and move same continuous stream label nodes from this stream
bool not_use_cur = false;
NodePtr not_cur = nullptr;
@@ -720,7 +721,7 @@ Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
auto stored_op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(stored_op_desc);
stored_op_desc->SetStreamId(last_stream_id);
stream_node_num_vec[stream_id]++;
AddNodeNum(node, stream_node_num_vec[stream_id]);
}
not_use_cur = true;
not_cur = nodes.front();
@@ -1055,7 +1056,7 @@ Status StreamAllocator::CollectDeactiveStream(const OpDescPtr &op_desc, std::set

// Insert StreamActive Op for Entry Stream.
Status StreamAllocator::InsertActiveEntryStream(const std::vector<uint32_t> &active_streams, int64_t stream_id) {
string node_name = "ActiveEntryStream_" + string(STREAMACTIVE);
string node_name = whole_graph_->GetName() + "_ActiveEntryStream_" + string(STREAMACTIVE);
OpDescPtr op_desc = ge::MakeShared<OpDesc>(node_name, STREAMACTIVE);
if (op_desc == nullptr) {
GELOGE(FAILED, "Failed to new opdesc.");
@@ -1143,7 +1144,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
GE_CHECK_NOTNULL(node->GetInControlAnchor());
GE_CHECK_NOTNULL(node->GetOutControlAnchor());
for (auto &event_id : recv_event_id_list) {
string recv_node_name = "_Recv_" + to_string(event_id);
string recv_node_name = whole_graph_->GetName() + "_Recv_" + to_string(event_id);
OpDescPtr op_desc_ptr = MakeShared<OpDesc>(recv_node_name, RECV);
GE_CHECK_NOTNULL(op_desc_ptr);

@@ -1171,7 +1172,7 @@ Status StreamAllocator::InsertSyncEventNodes() {
GetSendEventIdList(node, send_event_id_list);

for (auto &event_id : send_event_id_list) {
string send_node_name = "_Send_" + to_string(event_id);
string send_node_name = whole_graph_->GetName() + "_Send_" + to_string(event_id);
OpDescPtr op_desc_ptr = MakeShared<OpDesc>(send_node_name, SEND);
GE_CHECK_NOTNULL(op_desc_ptr);

@@ -1291,6 +1292,15 @@ int64_t StreamAllocator::GetMaxNodeNumPerStream(const NodePtr &node, uint32_t ma
return max_node_num_one_stream;
}

void StreamAllocator::AddNodeNum(const NodePtr &node, int64_t &node_num) {
node_num++;
vector<uint32_t> events;
GetSendEventIdList(node, events);
node_num += static_cast<int64_t>(events.size());
GetRecvEventIdList(node, events);
node_num += static_cast<int64_t>(events.size());
}

// Insert send event id on a node
void StreamAllocator::AddSendEventId(const NodePtr &node, uint32_t event_id) {
node_to_send_events_[node].emplace_back(event_id);


+ 1
- 0
src/ge/graph/build/stream_allocator.h View File

@@ -80,6 +80,7 @@ class StreamAllocator {

Status GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stream_count, uint32_t &max_task_count);
int64_t GetMaxNodeNumPerStream(const NodePtr &node, uint32_t max_node_num_one_stream);
void AddNodeNum(const NodePtr &node, int64_t &node_num);

void AddSendEventId(const NodePtr &node, uint32_t event_id);
void AddRecvEventId(const NodePtr &node, uint32_t event_id);


+ 21
- 15
src/ge/graph/build/task_generator.cc View File

@@ -47,6 +47,7 @@ const char *const kIsOutputVar = "OUTPUT_IS_VAR";
const char *const kProfilingMode = "PROFILING_MODE";
const char *const kProfilingFpPoint = "FP_POINT";
const char *const kProfilingBpPoint = "BP_POINT";
const char *const kOffOptimize = "off_optimize";
const uint32_t kProfilingArStep = 2;
const uint64_t kProfilingFpStartLogid = 1;
const uint64_t kProfilingBpEndLogid = 2;
@@ -83,10 +84,10 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
}
Status ret = SUCCESS;
if (is_unknown_shape) {
GELOGI("Beign to generate unknown shape task.");
GELOGI("Beign to generate unknown shape task. Graph name is %s.", graph->GetName().c_str());
ret = GenerateUnknownShapeTask(run_context, graph, task_def_list, op_name_map);
} else {
GELOGI("Beign to generate known shape task.");
GELOGI("Beign to generate known shape task. Graph name is %s.", graph->GetName().c_str());
ret = GenerateTask(run_context, graph, task_def_list, op_name_map);
}
GE_DUMP(graph, "GenerateTaskAfter");
@@ -108,7 +109,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
GELOGE(FAILED, "SetListStr failed.");
return FAILED);

GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(),
GELOGI("Generate task success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(),
op_name_map.size());

// Init and serialize model_task_def
@@ -130,7 +131,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
return ret;
}

GELOGI("Get TaskInfo success. session_id=%lu", session_id);
GELOGI("Get TaskInfo success. session id is %lu", session_id);
return SUCCESS;
}

@@ -253,7 +254,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed.");
return GE_CLI_GE_NOT_INITIALIZED;
}
GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed.");
GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed.");
ProfilingPoint profiling_point;
vector<uint32_t> all_reduce_nodes;
GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes));
@@ -263,9 +264,9 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
GE_TIMESTAMP_CALLNUM_START(GenerateTask);
// map store fusion nodes
map<int64_t, std::vector<NodePtr>> fusion_nodes;
string buffer_optimize = "off_optimize";
string buffer_optimize = kOffOptimize;
(void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize);
if (buffer_optimize != "off_optimize") {
if (buffer_optimize != kOffOptimize) {
GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph));
}
std::unordered_set<Node *> fusion_nodes_seen;
@@ -371,7 +372,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed.");
return GE_CLI_GE_NOT_INITIALIZED;
}
GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed.");
GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed.");
ProfilingPoint profiling_point;
vector<uint32_t> all_reduce_nodes;
GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes));
@@ -381,9 +382,9 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
GE_TIMESTAMP_CALLNUM_START(GenerateTask);
// map store fusion nodes
map<int64_t, std::vector<NodePtr>> fusion_nodes;
string buffer_optimize = "off_optimize";
string buffer_optimize = kOffOptimize;
(void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize);
if (buffer_optimize != "off_optimize") {
if (buffer_optimize != kOffOptimize) {
GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph));
}
std::unordered_set<Node *> fusion_nodes_seen;
@@ -392,7 +393,11 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
rtStream_t stream = nullptr;
GE_CHK_RT_RET(rtStreamCreate(&stream, 0));
run_context.stream = stream;
GE_CHK_RT_RET(rtModelBindStream(run_context.model, stream, 0));
if (rtModelBindStream(run_context.model, stream, 0) != RT_ERROR_NONE) {
GELOGE(FAILED, "Call rt api failed.");
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
}
for (auto &node : graph->GetAllNodes()) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
@@ -437,7 +442,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG
size_t task_list_size_before = task_def_list.size();
GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list));

GELOGI("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(),
GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(),
name.c_str(), type.c_str(), op_id, stream_id);
GE_TIMESTAMP_RESTART(GenerateTask);
auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list);
@@ -659,14 +664,15 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) {

Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_single_stream) const {
vector<vector<OpDescPtr>> continuous_op_lists(1);
const set<string> label_op_types({LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX});
const set<string> separator_types(
{LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN});
for (auto &op_desc : ops) {
bool attr_notask = false;
if (ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask) && attr_notask) {
continue;
}
string op_type = op_desc->GetType();
if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || label_op_types.count(op_type) != 0)) {
if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) {
continuous_op_lists.emplace_back(vector<OpDescPtr>());
} else {
continuous_op_lists.back().emplace_back(op_desc);
@@ -727,7 +733,6 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP
fp_op_desc = in_node_desc;
}
}
GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId());
break;
}
}
@@ -736,6 +741,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP
GELOGW("not find fp_op_desc.");
return SUCCESS;
}
GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId());
for (auto &node : graph->GetAllNodes()) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);


+ 71
- 0
src/ge/graph/execute/graph_execute.cc View File

@@ -86,6 +86,17 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) {
return SUCCESS;
}

Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->SetDynamicSize(model_id, batch_num);
if (ret != SUCCESS) {
GELOGE(FAILED, "SetDynamicSize failed");
return ret;
}
return SUCCESS;
}

void GraphExecutor::SetTrainFlag(bool is_train_graph) { train_graph_flag_ = is_train_graph; }

Status GraphExecutor::FreeInOutBuffer() {
@@ -476,7 +487,28 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::ve
GELOGE(ret, "GetDynamicBatchInfo failed.");
return ret;
}
return SUCCESS;
}

Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetCurShape(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "GetCurShape failed");
return ret;
}
return SUCCESS;
}

Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetModelAttr(model_id, dynamic_output_shape_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "GetModelAttr failed");
return ret;
}
return SUCCESS;
}

@@ -503,4 +535,43 @@ Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vecto

return SUCCESS;
}

Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetAIPPInfo failed.");
return ret;
}

return SUCCESS;
}

Status GraphExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetOrigInputInfo(model_id, index, orig_input_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetOrigInputInfo failed.");
return ret;
}

return SUCCESS;
}

Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetAllAippInputOutputDims(model_id, index, input_dims, output_dims);
if (ret != SUCCESS) {
GELOGE(ret, "GetAllAippInputOutputDims failed.");
return ret;
}

return SUCCESS;
}

} // namespace ge

+ 11
- 0
src/ge/graph/execute/graph_execute.h View File

@@ -56,6 +56,8 @@ class GraphExecutor {

Status SetGraphContext(GraphContextPtr graph_context_ptr);

static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num);

void SetTrainFlag(bool is_train_graph);

const std::vector<InputOutputDescInfo> &GetOutputsDesc() const { return outputs_desc_; }
@@ -71,6 +73,8 @@ class GraphExecutor {
vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats);

static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

///
/// @ingroup ge
/// @brief Get dynamic batch_info
@@ -80,10 +84,17 @@ class GraphExecutor {
///
static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);

static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);

static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
std::vector<uint32_t> &output_formats);
static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);

private:
Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data,


+ 1
- 1
src/ge/graph/label/while_label_maker.cc View File

@@ -98,7 +98,7 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
return FAILED;
}

NodePtr cond_out_node = cond_graph->FindNode(NODE_NAME_NET_OUTPUT);
NodePtr cond_out_node = cond_graph->FindFirstNodeMatchType(NETOUTPUT);
GE_CHECK_NOTNULL(cond_out_node);
OpDescPtr cond_out_desc = cond_out_node->GetOpDesc();
GE_CHECK_NOTNULL(cond_out_desc);


+ 90
- 0
src/ge/graph/load/new_model_manager/aipp_utils.cc View File

@@ -0,0 +1,90 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/load/new_model_manager/aipp_utils.h"

#include <string>

#include "common/debug/log.h"
#include "common/op/ge_op_utils.h"
#include "framework/common/util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/attr_utils.h"

#include "framework/common/debug/ge_log.h"

namespace ge {
#define AIPP_CONVERT_TO_AIPP_INFO(KEY) aipp_info.KEY = aipp_params->KEY()

#define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \
do { \
if (aipp_params->KEY##_size() > 0) { \
aipp_info.KEY = aipp_params->KEY(INDEX); \
} \
} while (0)

Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) {
GE_CHECK_NOTNULL(aipp_params);
AIPP_CONVERT_TO_AIPP_INFO(input_format);
AIPP_CONVERT_TO_AIPP_INFO(src_image_size_w);
AIPP_CONVERT_TO_AIPP_INFO(src_image_size_h);
AIPP_CONVERT_TO_AIPP_INFO(crop);
AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_w);
AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_h);
AIPP_CONVERT_TO_AIPP_INFO(crop_size_w);
AIPP_CONVERT_TO_AIPP_INFO(crop_size_h);
AIPP_CONVERT_TO_AIPP_INFO(resize);
AIPP_CONVERT_TO_AIPP_INFO(resize_output_w);
AIPP_CONVERT_TO_AIPP_INFO(resize_output_h);
AIPP_CONVERT_TO_AIPP_INFO(padding);
AIPP_CONVERT_TO_AIPP_INFO(left_padding_size);
AIPP_CONVERT_TO_AIPP_INFO(right_padding_size);
AIPP_CONVERT_TO_AIPP_INFO(top_padding_size);
AIPP_CONVERT_TO_AIPP_INFO(bottom_padding_size);
AIPP_CONVERT_TO_AIPP_INFO(csc_switch);
AIPP_CONVERT_TO_AIPP_INFO(rbuv_swap_switch);
AIPP_CONVERT_TO_AIPP_INFO(ax_swap_switch);
AIPP_CONVERT_TO_AIPP_INFO(single_line_mode);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c0, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c1, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c2, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c0, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c1, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c2, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c0, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c1, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c2, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_0, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_1, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_2, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_0, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_1, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_2, 0);
AIPP_CONVERT_TO_AIPP_INFO(mean_chn_0);
AIPP_CONVERT_TO_AIPP_INFO(mean_chn_1);
AIPP_CONVERT_TO_AIPP_INFO(mean_chn_2);
AIPP_CONVERT_TO_AIPP_INFO(mean_chn_3);
AIPP_CONVERT_TO_AIPP_INFO(min_chn_0);
AIPP_CONVERT_TO_AIPP_INFO(min_chn_1);
AIPP_CONVERT_TO_AIPP_INFO(min_chn_2);
AIPP_CONVERT_TO_AIPP_INFO(min_chn_3);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_0, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_1, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_2, 0);
AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_3, 0);
return SUCCESS;
}
} // namespace ge

+ 48
- 0
src/ge/graph/load/new_model_manager/aipp_utils.h View File

@@ -0,0 +1,48 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_

#include <vector>

#include "common/ge_inner_error_codes.h"
#include "common/ge_types.h"
#include "graph/op_desc.h"
#include "proto/insert_op.pb.h"

using std::vector;

namespace ge {
const uint32_t kAippOriginInputIndex = 0;
const uint32_t kAippInfoNum = 6;
const uint32_t kAippInfoFormat = 0;
const uint32_t kAippInfoDataType = 1;
const uint32_t kAippInfoTensorName = 2;
const uint32_t kAippInfoTensorSize = 3;
const uint32_t kAippInfoDimNum = 4;
const uint32_t kAippInfoShape = 5;

class AippUtils {
public:
AippUtils() = default;
~AippUtils() = default;

static Status ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info);
};
} // namespace ge

#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_

+ 3
- 17
src/ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -35,7 +35,6 @@
namespace {
const uint32_t kAicpuLoadFlag = 1;
const uint32_t kAicpuUnloadFlag = 0;
const uint32_t kTimeBufferLen = 80;
const char *const kDumpOutput = "output";
const char *const kDumpInput = "input";
const char *const kDumpAll = "all";
@@ -190,18 +189,6 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin
}
}

static std::string GetCurrentTime() {
std::time_t now = std::time(nullptr);
std::tm *ptm = std::localtime(&now);
if (ptm == nullptr) {
return "";
}
char buffer[kTimeBufferLen] = {0};
// format: 20171122042550
std::strftime(buffer, kTimeBufferLen, "%Y%m%d%H%M%S", ptm);
return std::string(buffer);
}

Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
GELOGI("Start dump output");
if (inner_dump_info.is_task) {
@@ -384,10 +371,9 @@ Status DataDumper::LoadDumpInfo() {
}

aicpu::dump::OpMappingInfo op_mapping_info;
std::string time_now = GetCurrentTime();
GELOGI("Time is %s now", time_now.c_str());
op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + time_now + "/" +
std::to_string(device_id_) + "/");

auto dump_path = PropertiesManager::Instance().GetDumpOutputPath();
op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/");
op_mapping_info.set_model_name(model_name_);
op_mapping_info.set_model_id(model_id_);
op_mapping_info.set_flag(kAicpuLoadFlag);


+ 265
- 10
src/ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -80,6 +80,7 @@ const uint32_t kOutputNum = 1;
const uint32_t kTrueBranchStreamNum = 1;
const uint32_t kThreadNum = 16;
const uint32_t kAddrLen = sizeof(void *);
const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel";
const int kDecimal = 10;
const int kBytes = 8;
const uint32_t kDataMemAlignSizeCompare = 64;
@@ -579,6 +580,14 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
auto ret = DoTaskSink();
GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink");

/// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution,
/// the aicpu opertor needs to destroy history record, and update operator memory address.
/// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel().
if (MarkSpecifiedAicpuKernel() != SUCCESS) {
GELOGE(FAILED, "Mark model with specified aicpu operators failed.");
return FAILED;
}

// collect profiling for ge
if (ProfilingManager::Instance().ProfilingOn()) {
std::vector<ComputeGraphDescInfo> compute_graph_desc_info;
@@ -595,6 +604,82 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size

///
/// @ingroup ge
/// @brief Travel all nodes and determine if destruction is required.
/// @return bool
///
bool DavinciModel::IsAicpuKernelConnectSpecifiedLayer() {
Graph graph = ge_model_->GetGraph();
ComputeGraphPtr compute_graph = GraphUtils::GetComputeGraph(graph);
auto all_nodes = compute_graph->GetAllNodes();
for (auto &node : all_nodes) {
GE_IF_BOOL_EXEC(node == nullptr, continue);
OpDescPtr op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, continue);

int64_t imply_type = -1;
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, imply_type);
if (imply_type != static_cast<int64_t>(domi::ImplyType::AI_CPU)) {
continue;
}
GELOGD("Current operator imply type is %ld, name is %s.", imply_type, op_desc->GetName().c_str());

for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
auto peer_node = peer_out_data_anchor->GetOwnerNode();
GE_IF_BOOL_EXEC(peer_node == nullptr, continue);
auto peer_op_desc = peer_node->GetOpDesc();
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
if (IsDataOp(peer_op_desc->GetType())) {
GELOGI("Mark specified aicpu operator connected to data.");
return true;
}
}
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue);
auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors();
for (auto &peer_in_data_anchor : peer_in_data_anchors) {
GE_IF_BOOL_EXEC(peer_in_data_anchor == nullptr, continue);
auto peer_node = peer_in_data_anchor->GetOwnerNode();
GE_IF_BOOL_EXEC(peer_node == nullptr, continue);
auto peer_op_desc = peer_node->GetOpDesc();
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
if (peer_op_desc->GetType() == NETOUTPUT) {
GELOGI("Mark specified aicpu operator connected to netoutput.");
return true;
}
}
}
}

return false;
}
///
/// @ingroup ge
/// @brief mark ge model with specified aicpu operators .
/// @return Status
///
Status DavinciModel::MarkSpecifiedAicpuKernel() {
bool result = IsAicpuKernelConnectSpecifiedLayer();
if (!result) {
// No aicpu operator needing destroy.
GELOGD("No specified aicpu operator that connects to data or netoutput.");
return SUCCESS;
}

bool ret = ge::AttrUtils::SetBool(ge_model_, kNeedDestroySpecifiedAicpuKernel, result);
if (!ret) {
GELOGW("Add attr[%s] in ge model failed, and may lead to specified aicpu operators destruction failure.",
kNeedDestroySpecifiedAicpuKernel);
}
GELOGI("Mark ge model success, the model has specified aicpu operators, ge model name: %s.",
ge_model_->GetName().c_str());
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Travel all nodes and do some init.
/// @param [in] compute_graph: ComputeGraph to load.
/// @return Status
@@ -1002,8 +1087,6 @@ Status DavinciModel::BindInputQueue() {
/// @ingroup ge
/// @brief definiteness queue schedule, bind input queue to task.
/// @param [in] queue_id: input queue id from user.
/// @param [in] addr: Data Op output tensor address.
/// @param [in] size: Data Op output tensor size.
/// @return: 0 for success / others for failed
Status DavinciModel::CpuModelDequeue(uint32_t queue_id) {
GELOGI("Set CpuKernel model dequeue task enter.");
@@ -1266,10 +1349,76 @@ Status DavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batc
}
break;
}
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Get AIPP input info
/// @param [in] index
/// @param [out] aipp_info
/// @return execute result
///
Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) {
GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
OpDescPtr data_op = data_op_list_[index];
if (!data_op->HasAttr(ATTR_NAME_AIPP)) {
GELOGE(GE_AIPP_NOT_EXIST, "GetAIPPInfo: there is not AIPP related with index %u.", index);
return GE_AIPP_NOT_EXIST;
}

std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams());
GE_CHECK_NOTNULL(aipp_params);

ge::GeAttrValue::NAMED_ATTRS aipp_attr;
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST,
"Data node do not contain param aipp!");
GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed");
GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u",
data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank());
if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) {
GELOGI("GetAIPPInfo, dynamic Aipp is not support to query temporarily.");
return GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY;
}

GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info),
"convert aipp params to aipp config info failed");

return SUCCESS;
}

void DavinciModel::SetDynamicSize(const std::vector<uint64_t> &batch_num) {
batch_size_.clear();
if (batch_num.empty()) {
GELOGD("User has not set dynammic data");
}
for (size_t i = 0; i < batch_num.size(); i++) {
batch_size_.emplace_back(batch_num[i]);
}
}

void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info) {
if (batch_size_.empty()) {
GELOGD("User does not set dynamic size");
}
for (size_t i = 0; i < batch_size_.size(); i++) {
GELOGI("Start to get current shape");
batch_info.emplace_back(batch_size_[i]);
}
}

void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
for (auto &op : output_op_list_) {
if (op->GetType() != NETOUTPUT) {
continue;
}
GELOGI("Start to get dynamic output dims attr");
if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
GELOGD("Can not get dynamic output dims attr");
}
}
}

Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &input_formats,
@@ -1299,7 +1448,7 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf
}

Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) {
for (std::size_t index = 0; index < data_op_list_.size(); ++index) {
for (size_t index = 0; index < data_op_list_.size(); ++index) {
InputOutputDescInfo input;
GE_CHECK_NOTNULL(data_op_list_[index]);
GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0));
@@ -1495,7 +1644,14 @@ Status DavinciModel::SinkModelProfile() {
// Model Header
string name = this->Name();
int32_t name_len = name.size();
reporter_data.deviceId = device_id_;
// phy device id
uint32_t phy_device_id = 0;
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
return FAILED;
}
reporter_data.deviceId = phy_device_id;
reporter_data.data = (unsigned char *)&name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.",
@@ -1671,7 +1827,13 @@ Status DavinciModel::SinkTimeProfile(const InputData &current_data) {
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");
// device id
reporter_data.deviceId = device_id_;
uint32_t phy_device_id = 0;
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id);
return FAILED;
}
reporter_data.deviceId = phy_device_id;

// Model Header
string name = this->Name();
@@ -2637,8 +2799,10 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
}

if (input_size > op_size) {
GELOGE(FAILED, "Input size [%u] can not be bigger than op size [%u]", input_size, op_size);
return false;
GELOGW(
"Input size [%u] is bigger than om size need [%u],"
"MAY cause inference result ERROR, please check model input",
input_size, op_size);
}
bool is_dynamic_aipp = false;
for (const auto &op_desc : data_op_list_) {
@@ -2707,14 +2871,18 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp
///
Status DavinciModel::UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> &data_info, bool is_input,
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label) {
string input_or_output = "input";
is_input ? input_or_output = "input" : input_or_output = "output";
if (blobs.size() != data_info.size()) {
GELOGE(FAILED, "Blobs not match: blobs=%zu datas=%zu", blobs.size(), data_info.size());
GELOGE(FAILED, "Verify %s data num failed: model requires %zu, but user actually feeds %zu",
input_or_output.c_str(), data_info.size(), blobs.size());
return FAILED;
}

for (const auto &data : data_info) {
if (data.first >= blobs.size()) { // check data index.
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u", blobs.size(), data_info.size(), data.first);
GELOGE(FAILED, "Verify %s data num failed: can not find No.%zu data, because user only feeds %zu",
input_or_output.c_str(), data.first, blobs.size());
return FAILED;
}
int64_t size = data.second.first; // size of tensor.
@@ -3262,7 +3430,7 @@ void DavinciModel::PushHcclStream(rtStream_t value) {
void DavinciModel::CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap) {
std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_);
capacity_of_stream_.emplace_back(make_pair(stream, remain_cap));
};
}

void DavinciModel::ReuseHcclFollowStream(int64_t remain_cap, int64_t &index) {
std::lock_guard<std::mutex> lock(capacity_of_stream_mutex_);
@@ -3320,4 +3488,91 @@ Status DavinciModel::GetComputeGraphInfo(std::vector<ComputeGraphDescInfo> &comp
return SUCCESS;
}

Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) {
GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
OpDescPtr data_op = data_op_list_[index];
if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
GELOGE(GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index);
return GE_AIPP_NOT_EXIST;
}

vector<std::string> inputs;
if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
std::string input = inputs[kAippOriginInputIndex];
GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str());
std::vector<std::string> infos = ge::StringUtils::Split(input, ':');
if (infos.size() != kAippInfoNum) {
GELOGW("origin input str is invalid.");
}
orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]);
orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]);
orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);
}

return SUCCESS;
}

void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info) {
GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str());
std::vector<std::string> infos = ge::StringUtils::Split(in_out_info, ':');
if (infos.size() != kAippInfoNum) {
GELOGW("origin input str is invalid.");
}
dims_info.name = infos[kAippInfoTensorName];
dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal);
dims_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal);

std::vector<std::string> dims = ge::StringUtils::Split(infos[kAippInfoShape], ',');
for (const auto &dim : dims) {
if (dim.empty()) {
continue;
}
dims_info.dims.emplace_back(std::strtol(dim.c_str(), nullptr, kDecimal));
}
}

Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims) {
GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index);
OpDescPtr data_op = data_op_list_[index];
if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) {
GELOGE(GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index);
return GE_AIPP_NOT_EXIST;
}

vector<std::string> inputs;
if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
GELOGI("GetAllAippInputOutputDims: Data: %s has %u related aippInfo.", data_op->GetName().c_str(), inputs.size());
for (auto it : inputs) {
InputOutputDims input_info;
ParseAIPPInfo(it, input_info);
input_dims.emplace_back(input_info);
GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str());

ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex);
int64_t data_input_size;
(void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size);
GELOGD(
"GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: %s, "
"data_type: %s, shape: %s .",
index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),
TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(),
formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str());
}
}

vector<std::string> outputs;
if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) {
for (auto it : outputs) {
InputOutputDims output_info;
ParseAIPPInfo(it, output_info);
output_dims.emplace_back(output_info);
GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str());
}
}

return SUCCESS;
}

} // namespace ge

+ 28
- 0
src/ge/graph/load/new_model_manager/davinci_model.h View File

@@ -34,6 +34,7 @@
#include "graph/load/new_model_manager/data_dumper.h"
#include "graph/load/new_model_manager/data_inputer.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/new_model_manager/aipp_utils.h"
#include "graph/load/new_model_manager/zero_copy_task.h"
#include "graph/model.h"
#include "graph/node.h"
@@ -294,6 +295,19 @@ class DavinciModel {
///
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info);

void GetCurShape(std::vector<int64_t> &batch_info);

void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);

///
/// @ingroup ge
/// @brief Get AIPP input info
/// @param [in] index
/// @param [out] aipp_info
/// @return execute result
///
Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info);

///
/// @ingroup ge
/// @brief Get model_id.
@@ -407,6 +421,8 @@ class DavinciModel {
void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args,
size_t size, size_t offset);

void SetDynamicSize(const std::vector<uint64_t> &batch_num);

bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; }

void SetProfileTime(ModelProcStage stage, int64_t endTime = 0);
@@ -452,6 +468,10 @@ class DavinciModel {
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
Status UpdateKnownZeroCopyAddr(vector<void *> &io_addrs, uint32_t args_offset);

Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);

private:
// memory address of weights
uint8_t *weights_mem_base_;
@@ -560,6 +580,10 @@ class DavinciModel {

void UnbindTaskSinkStream();

bool IsAicpuKernelConnectSpecifiedLayer();

Status MarkSpecifiedAicpuKernel();

///
/// @ingroup ge
/// @brief Travel all nodes and do some init.
@@ -741,6 +765,8 @@ class DavinciModel {
Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs);

void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);

bool is_model_has_inited_;
uint32_t model_id_;
uint32_t runtime_model_id_;
@@ -856,6 +882,8 @@ class DavinciModel {
void *args_host_ = nullptr;
std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_;

vector<uint64_t> batch_size_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

+ 130
- 4
src/ge/graph/load/new_model_manager/model_manager.cc View File

@@ -22,6 +22,8 @@
#include "common/profiling/profiling_manager.h"
#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "graph/debug/ge_attr_define.h"
#include "framework/common/util.h"
#include "graph/load/new_model_manager/davinci_model.h"
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "model/ge_root_model.h"
@@ -31,6 +33,7 @@ thread_local uint32_t device_count = 0;
namespace {
const int kCmdParSize = 2;
const int kDumpCmdPairSize = 2;
const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel";
} // namespace

std::shared_ptr<ModelManager> ModelManager::GetInstance() {
@@ -39,7 +42,10 @@ std::shared_ptr<ModelManager> ModelManager::GetInstance() {
return instance_ptr;
}

ModelManager::ModelManager() { max_model_id_ = 0; }
ModelManager::ModelManager() {
max_model_id_ = 0;
session_id_bias_ = 0;
}

Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) {
STR_FWK_OP_KERNEL param_base = {};
@@ -69,6 +75,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;)
uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr));
param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr;
// In the scene of loading once and running many times, the kernel needs to be destroyed many times,
// and connot be removed from kernel map.
}
}

@@ -213,6 +221,13 @@ Status ModelManager::SetDevice(int32_t deviceId) const {
return SUCCESS;
}

ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->SetDynamicSize(batch_num);
return SUCCESS;
}

ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const shared_ptr<ge::GeRootModel> &ge_root_model,
const shared_ptr<ModelListener> &listener) {
auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model);
@@ -616,7 +631,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) {
return FAILED;
}
if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') {
dump_path += "/";
dump_path = dump_path + "/" + CurrentTimeInStr() + "/";
}
GELOGI("dump status = %s.", dump_path.c_str());

@@ -647,7 +662,6 @@ Status ModelManager::HandleDumpCommand(const Command &command) {
Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) {
auto hybrid_model = GetHybridModel(model_id);
if (hybrid_model != nullptr) {
// TODO hybrid use dynamic memory allocation
max_size = 0;
return SUCCESS;
}
@@ -694,6 +708,20 @@ Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<st
return davinci_model->GetDynamicBatchInfo(batch_info);
}

Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetCurShape(batch_info);
return SUCCESS;
}

Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetModelAttr(dynamic_output_shape_info);
return SUCCESS;
}

Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &inputFormats,
@@ -705,6 +733,52 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
}

///
/// @ingroup ge
/// @brief Get AIPP info
/// @param [in] model_id
/// @param [in] index
/// @param [out] aipp_info
/// @return execute result
///
Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.",
model_id);

return davinci_model->GetAIPPInfo(index, aipp_info);
}

Status ModelManager::GenSessionId(uint64_t &session_id) {
std::lock_guard<std::mutex> lock(session_id_create_mutex_);

struct timeval tv;
if (gettimeofday(&tv, nullptr) != 0) {
GELOGE(INTERNAL_ERROR, "Failed to get current time.");
return INTERNAL_ERROR;
}
session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us

session_id_bias_++;
// max bais 100.
session_id_bias_ = session_id_bias_ % 100;
session_id = session_id * 100 + session_id_bias_;

GELOGD("Generate new session id: %lu.", session_id);
return SUCCESS;
}

Status ModelManager::UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id) {
GeModelPtr ge_model_current = davinci_model->GetGeModel();
GE_CHECK_NOTNULL(ge_model_current);
if (!ge::AttrUtils::SetInt(ge_model_current, ge::MODEL_ATTR_SESSION_ID, static_cast<int64_t>(session_id))) {
GELOGW("Set attr[%s] failed in updating session_id.", MODEL_ATTR_SESSION_ID.c_str());
}

GELOGD("Update session id: %lu.", session_id);
return SUCCESS;
}

Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID,
@@ -747,6 +821,15 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
}
davinci_model->SetDeviceId(device_id);

/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail.
/// These session_ids come from the same model, so the values of session_id are the same.
/// Update session_id for infer in load model to avoid the same session_id.
uint64_t new_session_id;
ret = GenSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed.");
ret = UpdateSessionId(davinci_model, new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed.");

ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "DavinciInit failed.");

@@ -805,9 +888,17 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
return ret;
}

/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail.
/// These session_ids come from the same model, so the values of session_id are the same.
/// Update session_id for infer in load model to avoid the same session_id.
uint64_t new_session_id;
ret = GenSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
ret = UpdateSessionId(davinci_model, new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");

GenModelId(&model_id);
davinci_model->SetId(model_id);
davinci_model->SetSessionId(model_id);
ret = davinci_model->SetQueIds(input_queue_ids, output_queue_ids);
if (ret != SUCCESS) {
GELOGE(ret, "set model queue ids failed.");
@@ -840,6 +931,22 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id);

GeModelPtr ge_model_current = davinci_model->GetGeModel();
bool need_destroy_aicpu_kernel = false;
bool result = ge::AttrUtils::GetBool(ge_model_current, kNeedDestroySpecifiedAicpuKernel, need_destroy_aicpu_kernel);
if (result && need_destroy_aicpu_kernel) {
GELOGI("Get attr %s successfully, start to destroy specified aicpu kernel.", kNeedDestroySpecifiedAicpuKernel);

// Zero copy is enabled by default, no need to judge.
uint64_t session_id_davinci = davinci_model->GetSessionId();
uint32_t model_id_davinci = davinci_model->GetModelId();
Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci);
if (status != SUCCESS) {
GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci,
model_id_davinci);
}
}

Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data);
if (status == SUCCESS) {
GELOGI("Execute model %u success.", model_id);
@@ -920,4 +1027,23 @@ void ModelManager::GenModelId(uint32_t *id) {
std::lock_guard<std::mutex> lock(map_mutex_);
*id = ++max_model_id_;
}

Status ModelManager::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetOrigInputInfo failed, invalid model_id is %u.",
model_id);

return davinci_model->GetOrigInputInfo(index, orig_input_info);
}

Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID,
"GetAllAippInputOutputDims failed, invalid model_id is %u.", model_id);

return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims);
}

} // namespace ge

+ 27
- 1
src/ge/graph/load/new_model_manager/model_manager.h View File

@@ -17,6 +17,7 @@
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_

#include <model/ge_root_model.h>
#include <pthread.h>
#include <stdint.h>
#include <algorithm>
@@ -25,7 +26,6 @@
#include <set>
#include <string>
#include <vector>
#include <model/ge_root_model.h>
#include "cce/aicpu_engine_struct.h"
#include "common/ge_inner_error_codes.h"
#include "common/ge_types.h"
@@ -189,6 +189,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

///
/// @ingroup ge
/// @brief Get AIPP info
/// @param [in] model_id
/// @param [in] index
/// @param [out] aipp_info
/// @return execute result
///
ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);

///
/// @ingroup domi_ome
/// @brief set model input and output size zero copy
/// @param [in] model_id model id
@@ -202,8 +212,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::vector<uint32_t> &inputFormats,
std::vector<uint32_t> &outputFormats);

ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);

ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);

ge::Status SetDevice(int32_t deviceId) const;

ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num);

///
/// @ingroup domi_ome
/// @brief Get model according to given id
@@ -226,6 +242,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);

ge::Status GenSessionId(uint64_t &session_id);

ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
std::vector<InputOutputDims> &output_dims);

private:
///
/// @ingroup domi_ome
@@ -253,6 +276,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
ge::Status DeleteModel(uint32_t id);

void GenModelId(uint32_t *id);
ge::Status UpdateSessionId(std::shared_ptr<DavinciModel> &davinci_model, uint64_t session_id);

std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_;
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
@@ -260,6 +284,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
uint32_t max_model_id_;
std::mutex map_mutex_;
std::mutex sess_ids_mutex_;
std::mutex session_id_create_mutex_;
uint64_t session_id_bias_;
std::set<uint64_t> sess_ids_;
};
} // namespace ge


+ 1
- 0
src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc View File

@@ -177,6 +177,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
(void)rtStreamDestroy(stream);
return RT_FAILED;
}
GELOGD("hccl_stream addr is=%p", stream);


+ 28
- 5
src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc View File

@@ -67,6 +67,18 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
return FAILED;
}

const auto &ext_info = kernel_ex_def.kernel_ext_info();
if (!ext_info.empty()) {
auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;)
rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;)
}

// 2.1 get loop cond variable for tensor array write
uint64_t step_id_addr = 0;
OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP);
@@ -77,7 +89,9 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
}
}

auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID;
auto session_id = davinci_model_->GetSessionId();
fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id;

// 2.2 Collect aicpu kernel
uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID;
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS,
@@ -97,8 +111,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_base_addr));
fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr;
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0;
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size();
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_);

rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;)
@@ -149,8 +163,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = workspace_base_addr;
fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = input_output_addr;
fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr;
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0;
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size();
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_);

// 4. Create session
GE_CHECK_NOTNULL(ModelManager::GetInstance());
@@ -291,6 +305,15 @@ Status KernelExTaskInfo::Release() {
input_output_addr_ = nullptr;
}
}
if (ext_info_addr_ != nullptr) {
rtError_t rt_ret = rtFree(ext_info_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret);
ret = FAILED;
} else {
ext_info_addr_ = nullptr;
}
}
return ret;
}



+ 2
- 0
src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h View File

@@ -31,6 +31,7 @@ class KernelExTaskInfo : public TaskInfo {
davinci_model_(nullptr),
kernel_buf_(nullptr),
input_output_addr_(nullptr),
ext_info_addr_(nullptr),
dump_args_(nullptr) {}

~KernelExTaskInfo() override {}
@@ -64,6 +65,7 @@ class KernelExTaskInfo : public TaskInfo {
DavinciModel *davinci_model_;
void *kernel_buf_;
void *input_output_addr_;
void *ext_info_addr_;
void *dump_args_;
OpDescPtr op_desc_ = nullptr;
uint32_t args_offset_ = 0;


+ 29
- 0
src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -414,6 +414,7 @@ Status KernelTaskInfo::Release() {
FreeRtMem(&custom_info_.output_descs);
FreeRtMem(&custom_info_.output_addrs);
FreeRtMem(&custom_info_.attr_handle);
FreeRtMem(&aicpu_ext_info_addr_);

if (ctx_.argsOffset != nullptr) {
delete[] ctx_.argsOffset;
@@ -792,6 +793,16 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}
}

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size());

// malloc device memory for args
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
@@ -823,6 +834,24 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return SUCCESS;
}

Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
if (ext_info.empty()) {
return SUCCESS;
}
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;
}
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;
}

return SUCCESS;
}

Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_data_addrs,
const std::vector<void *> &output_data_addrs,
const std::vector<::tagCcAICPUTensor> &input_descs,


+ 5
- 0
src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h View File

@@ -100,6 +100,8 @@ class KernelTaskInfo : public TaskInfo {

Status InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def);

Status InitAicpuTaskExtInfo(const std::string &ext_info);

Status StoreInputOutputTensor(const std::vector<void *> &input_data_addrs,
const std::vector<void *> &output_data_addrs,
const std::vector<::tagCcAICPUTensor> &input_descs,
@@ -152,6 +154,9 @@ class KernelTaskInfo : public TaskInfo {
DavinciModel *davinci_model_;
uint32_t args_offset_ = 0;

// aicpu ext_info device mem
void *aicpu_ext_info_addr_ = nullptr;

// For super kernel
uint32_t skt_id_;
std::string stub_func_name_;


+ 5
- 3
src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc View File

@@ -133,10 +133,11 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;)
rt_ret =
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)
rt_ret = rtKernelConfigTransArg(hbm_nav_table_addr, sizeof(uint64_t), 0, &hbm_nav_table_addr_pys);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret);
return FAILED;)
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)

GELOGD("SKT: hbm_nav_table_addr %p, hbm_nav_table_addr_pys %p", hbm_nav_table_addr, hbm_nav_table_addr_pys);
// Create the necessary metadata for the super kernel
@@ -159,7 +160,8 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;)
rt_ret =
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;)
// Create the necessary metadata for the super kernel
h = new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim);
}


+ 36
- 31
src/ge/graph/manager/graph_manager.cc View File

@@ -41,20 +41,24 @@
#include "graph/ge_local_context.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/util/rt_context_util.h"
#include "graph/partition/dynamic_shape_partition.h"
#include "graph/passes/addn_pass.h"
#include "graph/passes/atomic_addr_clean_pass.h"
#include "graph/passes/cast_remove_pass.h"
#include "graph/passes/common_subexpression_elimination_pass.h"
#include "graph/passes/compile_nodes_pass.h"
#include "graph/passes/cond_remove_pass.h"
#include "graph/passes/constant_folding_pass.h"
#include "graph/passes/constant_fuse_same_pass.h"
#include "graph/passes/control_trigger_pass.h"
#include "graph/passes/ctrl_edge_transfer_pass.h"
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/identify_reference_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/iterator_op_pass.h"
#include "graph/passes/link_gen_mask_nodes_pass.h"
#include "graph/passes/merge_pass.h"
@@ -63,10 +67,11 @@
#include "graph/passes/permute_pass.h"
#include "graph/passes/prune_pass.h"
#include "graph/passes/replace_with_empty_const_pass.h"
#include "graph/passes/reshape_remove_pass.h"
#include "graph/passes/reshape_recovery_pass.h"
#include "graph/passes/reshape_remove_pass.h"
#include "graph/passes/same_transdata_breadth_fusion_pass.h"
#include "graph/passes/subgraph_pass.h"
#include "graph/passes/switch_data_edges_bypass.h"
#include "graph/passes/switch_dead_branch_elimination.h"
#include "graph/passes/switch_logic_remove_pass.h"
#include "graph/passes/switch_op_pass.h"
@@ -76,14 +81,10 @@
#include "graph/passes/transop_symmetry_elimination_pass.h"
#include "graph/passes/transop_without_reshape_fusion_pass.h"
#include "graph/passes/transpose_transdata_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
#include "graph/passes/cond_remove_pass.h"
#include "graph/passes/ctrl_edge_transfer_pass.h"
#include "graph/partition/dynamic_shape_partition.h"
#include "graph/utils/tensor_adapter.h"
#include "inc/pass_manager.h"
#include "init/gelib.h"
@@ -369,14 +370,15 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
GM_RUN_AND_DUMP("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph,
session_id);
GM_RUN_AND_DUMP("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph);

GM_RUN_AND_DUMP("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner);
GM_RUN_AND_DUMP("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph);
GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph);
if (IsTailingOptimization()) {
GM_RUN_AND_DUMP("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph);
}
GM_RUN_AND_DUMP("Optimize1", OptimizeStage1, compute_graph);
GM_RUN_AND_DUMP("InferShape2", compute_graph->InferShapeInNeed);
// TODO: to be delete
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
if (unknown_shape_skip != nullptr) {
PassManager graph_pass;
@@ -423,7 +425,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
return ret;
}
}
ret = LoadGraph(ge_root_model, graph_node);
if (!graph_node->IsAsync()) {
ret = LoadGraph(ge_root_model, graph_node);
} else {
ret = LoadGraphAsync(ge_root_model, graph_node);
}
if (ret != SUCCESS) {
GELOGE(ret, "LoadGraph Failed.");
return ret;
@@ -432,7 +438,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId());
} else if (!graph_node->GetLoadFlag()) {
GeRootModelPtr ge_root_model_ptr = graph_node->GetGeRootModel();
ret = LoadGraph(ge_root_model_ptr, graph_node);
if (!graph_node->IsAsync()) {
ret = LoadGraph(ge_root_model_ptr, graph_node);
} else {
ret = LoadGraphAsync(ge_root_model_ptr, graph_node);
}
if (ret != SUCCESS) {
GELOGE(ret, "LoadGraph Failed.");
return ret;
@@ -587,7 +597,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTenso
GELOGI("[RunGraph] start to run graph, graph_id = %u, is_train_graph: %d", graph_id, GetTrainFlag());

if (inputs.empty()) {
GELOGI("[RunGraph] initilize sub graph has no inputs.");
GELOGI("[RunGraph] initialize sub graph has no inputs");
}

// find graph
@@ -689,7 +699,7 @@ Status GraphManager::GenerateInfershapeGraph(GraphId &graph_id) {
}

Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs,
GeRootModelPtr &ge_root_model) {
GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) {
GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id);
if (inputs.empty()) {
GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs");
@@ -712,15 +722,10 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen
GELOGE(GE_GRAPH_ALREADY_RUNNING, "[BuildGraph] graph already running, graph id = %u", graph_node->GetGraphId());
return GE_GRAPH_ALREADY_RUNNING;
}
graph_node->SetAsync(async);
// set graph's run flag
graph_node->SetRunFlag(true);

struct timeval tv;
if (gettimeofday(&tv, nullptr) != 0) {
GELOGE(INTERNAL_ERROR, "get the time of day failed.");
return INTERNAL_ERROR;
}
uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us
ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id);
graph_node->SetRunFlag(false);
if (ret != SUCCESS) {
@@ -954,6 +959,9 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
}
options_.enable_print_op_pass = true;
ret = ParseOption(options, ENABLE_PRINT_OP_PASS, options_.enable_print_op_pass);

options_.is_single_op = false;
ret = ParseOption(options, SINGLE_OP_FLAG, options_.is_single_op);
GE_IF_BOOL_EXEC(ret != SUCCESS,
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.enablePrintOpPass value is invalid, must be 0 or 1.");
return GE_GRAPH_OPTIONS_INVALID);
@@ -1555,6 +1563,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
}
PassManager after_merge_passes;
GE_CHK_STATUS_RET(
after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass));
GE_CHK_STATUS_RET(
after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass));
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass",
new (std::nothrow) CommonSubexpressionEliminationPass));
@@ -1579,8 +1589,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
GE_IF_BOOL_EXEC(options == "default" || options == "1", GELOGI("turn on variable accelerator");
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::VariableOpPass",
new (std::nothrow) VariableOpPass(&var_acc_ctrl_))))
GE_CHK_STATUS_RET(
after_merge_passes.AddPass("OptimizeStage1_1::TransOpDepthFusionPass", new (std::nothrow) TransOpDepthFusionPass))
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpWithoutReshapeFusionPass",
new (std::nothrow) TransOpWithoutReshapeFusionPass))
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass",
@@ -1660,7 +1668,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret);
return ret;
}

return SUCCESS;
}

@@ -1688,10 +1695,6 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass);
HcclGroupPass hccl_group_pass;
if (IsTailingOptimization()) {
names_to_passes.emplace_back("HcclGroupPass", &hccl_group_pass);
}
GE_TIMESTAMP_START(names_to_passes);
ret = GEPass(compute_graph).Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses");
@@ -1708,19 +1711,12 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {

PassManager pass_for_control_attr_optimize;
if (options_.train_graph_flag) {
// TODO: to be delete
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
if (unknown_shape_skip == nullptr) {
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass",
new (std::nothrow) FlowCtrlPass))
}
}
// TODO: to be delete
const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
if (unknown_shape_skip == nullptr) {
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass",
new (std::nothrow) SubgraphPass));
}

GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass",
new (std::nothrow) MultiBatchPass))
@@ -1739,6 +1735,14 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass",
new (std::nothrow) AtomicAddrCleanPass))

const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION");
if (unknown_shape_skip == nullptr) {
// SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and
// graph-structure. So try not to add new pass after SubgraphPass.
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass",
new (std::nothrow) SubgraphPass));
}

GE_TIMESTAMP_START(pass_for_control_attr_optimize);
ret = pass_for_control_attr_optimize.Run(compute_graph);
GE_TIMESTAMP_END(pass_for_control_attr_optimize, "OptimizeStage2::ControlAttrOptimize");
@@ -1908,6 +1912,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G
graph_node->SetRunFlag(false);
return ret;
}
graph_node->SetLoadFlag(true);
ge_root_model->SetModelId(model_id_info.model_id);
graph_node->SetGeRootModel(ge_root_model);
}


+ 2
- 1
src/ge/graph/manager/graph_manager.h View File

@@ -99,7 +99,8 @@ class GraphManager {
/// @param [out] models build result
/// @return Status result of function
///
ge::Status BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, GeRootModelPtr &models);
ge::Status BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, GeRootModelPtr &models,
uint64_t session_id = 0, bool async = false);

///
/// @ingroup ge_graph


+ 1
- 0
src/ge/graph/manager/graph_manager_utils.cc View File

@@ -40,6 +40,7 @@ GraphNode::GraphNode(GraphId graph_id)
compute_graph_(nullptr),
build_flag_(false),
load_flag_(false),
async_(false),
ge_model_(nullptr),
sem_(1) {
graph_run_async_listener_ = MakeShared<RunAsyncListener>();


+ 7
- 1
src/ge/graph/manager/graph_manager_utils.h View File

@@ -152,6 +152,9 @@ class GraphNode {
bool GetRunFlag() const { return run_flag_; }
void SetRunFlag(bool flag) { run_flag_ = flag; }

bool IsAsync() const { return async_; }
void SetAsync(bool flag) { async_ = flag; }

void SetSubGraph(std::vector<SubGraphInfoPtr> &subgraph_ptr_list) { subgraph_ptr_list_ = subgraph_ptr_list; }
const std::vector<SubGraphInfoPtr> &GetAllSubGraph() const { return subgraph_ptr_list_; }

@@ -181,6 +184,7 @@ class GraphNode {
ComputeGraphPtr compute_graph_;
bool build_flag_;
bool load_flag_;
bool async_;
GeModelPtr ge_model_;
GeRootModelPtr ge_root_model_;
BlockingQueue<uint8_t> sem_;
@@ -239,6 +243,7 @@ struct GraphManagerOptions {
bool local_fmk_op_flag;
bool hcom_parallel;
bool enable_print_op_pass;
bool is_single_op;
std::map<std::string, int> stream_max_parallel_num;
std::string output_datatype;
std::string original_model_file;
@@ -247,7 +252,7 @@ struct GraphManagerOptions {
: stream_num(1),
perf_level(domi::GEN_TASK_WITHOUT_FUSION),
encrypt_mode(-1),
framework_type(domi::FMK_TYPE_T),
framework_type(domi::TENSORFLOW),
ek_file(""),
cert_file(""),
hw_key_file(""),
@@ -263,6 +268,7 @@ struct GraphManagerOptions {
local_fmk_op_flag(false),
hcom_parallel(false),
enable_print_op_pass(true),
is_single_op(false),
save_original_model("false") {}
};
} // namespace ge


+ 1
- 1
src/ge/graph/manager/graph_var_manager.cc View File

@@ -301,7 +301,7 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin
return SUCCESS;
}

int64_t MemResource::GetVarMemSize() const { return var_mem_size_; }
uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; }

void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; };



+ 1
- 1
src/ge/graph/manager/graph_var_manager.h View File

@@ -177,7 +177,7 @@ class MemResource {

Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset);

int64_t GetVarMemSize() const;
uint64_t GetVarMemSize() const;

void UpdateVarMemSize(int64_t mem_size);



+ 5
- 1
src/ge/graph/manager/util/rt_context_util.h View File

@@ -31,6 +31,10 @@ class RtContextUtil {

void AddrtContext(rtContext_t context);

const rtContext_t GetNormalModeContext() const { return before_prerun_ctx_; }

void SetNormalModeContext(rtContext_t context) { before_prerun_ctx_ = context; }

void DestroyrtContexts();

RtContextUtil &operator=(const RtContextUtil &) = delete;
@@ -41,8 +45,8 @@ class RtContextUtil {
~RtContextUtil() {}

std::vector<rtContext_t> rtContexts_;
rtContext_t before_prerun_ctx_ = nullptr;
};
} // namespace ge

#endif // GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_


+ 4
- 4
src/ge/graph/optimize/graph_optimize.cc View File

@@ -34,7 +34,7 @@ const char *const kAicoreEngine = "AIcoreEngine";

namespace ge {
GraphOptimize::GraphOptimize()
: optimize_type_(domi::FrameworkType::FMK_TYPE_T),
: optimize_type_(domi::FrameworkType::TENSORFLOW),
cal_config_(""),
insert_op_config_(""),
parse_out_node_(""),
@@ -73,7 +73,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) {
src_index_list.emplace_back(peer_out_anchor->GetIdx());
node_op_desc->SetSrcName(src_name_list);
node_op_desc->SetSrcIndex(src_index_list);
GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::FMK_TYPE_T),
GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::TENSORFLOW),
ge::NodePtr peer_owner_node = peer_out_anchor->GetOwnerNode();
input_name_list.emplace_back(
peer_owner_node->GetName() +
@@ -260,7 +260,7 @@ Status GraphOptimize::OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_
}

Status GraphOptimize::SetOptions(const ge::GraphManagerOptions &options) {
if (options.framework_type >= static_cast<int32_t>(domi::FrameworkType::FMK_TYPE_RESERVED)) {
if (options.framework_type >= static_cast<int32_t>(domi::FrameworkType::FRAMEWORK_RESERVED)) {
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Optimize Type %d invalid.", options.framework_type);
return GE_GRAPH_OPTIONS_INVALID;
}
@@ -293,7 +293,7 @@ void GraphOptimize::TranFrameOp(ComputeGraphPtr &compute_graph) {
// set - framework_type
// [No need to verify return value]
op->SetType("FrameworkOp");
if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::FMK_TYPE_T)) {
if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::TENSORFLOW)) {
GELOGW("TranFrameOp SetInt ATTR_NAME_FRAMEWORK_FWK_TYPE failed");
}
}


+ 397
- 0
src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc View File

@@ -0,0 +1,397 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/optimize/optimizer/allreduce_fusion_pass.h"
#include <string>
#include "common/debug/log.h"
#include "framework/common/debug/ge_log.h"
#include "common/types.h"
#include "common/util.h"
#include "graph/anchor.h"
#include "graph/node.h"
#include "graph/op_desc.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/debug/ge_attr_define.h"
#include "hccl/base.h"
#include "hccl/hcom.h"

namespace ge {
Status AllReducePass::Run(ge::ComputeGraphPtr graph) {
GELOGI("FusionAllReducePass: start");
std::vector<NodePtr> fusionOps;
std::vector<float> inputGradientSize;
std::vector<float> inputGradientTime;

static const float inputGradientSizeTemp = 0.0;
static const float inputGradientTimeTemp = 0.0;

// Get all nodes
for (auto nodePtr : graph->GetDirectNode()) {
GE_IF_BOOL_EXEC(nullptr == nodePtr, GELOGW("FusionAllReducePass: null node exists"); continue;);

ge::OpDescPtr opDescPtr = nodePtr->GetOpDesc();
GE_IF_BOOL_EXEC(nullptr == opDescPtr,
GELOGW("FusionAllReducePass: desc of node %s is null", nodePtr->GetName().c_str());
continue;)
GE_IF_BOOL_EXEC(HCOMALLREDUCE == opDescPtr->GetType(),
// the op is allreduce and fusion > 0, then run fusion
std::int64_t hcom_fusion = 1;
GE_IF_BOOL_EXEC(!ge::AttrUtils::GetInt(opDescPtr, HCOM_ATTR_FUSION, hcom_fusion),
GELOGW("FusionAllReducePass: not get hcom_fusion from opDescPtr "
"by HCOM_ATTR_FUSION"));
GELOGI("after GetInt, hcom_fusion is :%ld", hcom_fusion); GE_IF_BOOL_EXEC(
hcom_fusion > 0, fusionOps.push_back(nodePtr); inputGradientSize.push_back(inputGradientSizeTemp);
inputGradientTime.push_back(inputGradientTimeTemp);))
}
// The number of allredecue operator must be more than 1
GE_IF_BOOL_EXEC(1 >= fusionOps.size(), GELOGW("FusionAllReducePass NOT_CHANGED: the graph has "
"%lu allreduce operator",
fusionOps.size());
return NOT_CHANGED;);

string group = "group";
u32 gradientNum = fusionOps.size();
string model_name_str = graph->GetName();
const char *model_name = model_name_str.c_str();
model_feature modelFeature{model_name, gradientNum, inputGradientSize.data(), inputGradientTime.data()};

u32 segmentNum = 0;
u32 segmentIndex[HCCL_MAX_SEGMENT_NUM] = {};

// Call HCCL function: hcom_gradient_segment
GELOGI("FusionAllReducePass: invoking hcom_get_split_strategy");
GE_IF_BOOL_EXEC(HCCL_SUCCESS != hcom_get_split_strategy(group.c_str(), &modelFeature, HCCL_MAX_SEGMENT_NUM,
&segmentNum, segmentIndex),
GELOGE(FAILED, "FusionAllReducePass FAILED: the graph has %lu allreduce operator", fusionOps.size());
return FAILED;)
GELOGI("FusionAllReducePass: invoke hcom_get_split_strategy successfully");

// check whether segmentNum is legal or not
GE_IF_BOOL_EXEC((HCCL_MAX_SEGMENT_NUM < segmentNum || 1 > segmentNum || segmentNum > gradientNum),
GELOGE(FAILED,
"FusionAllReducePass FAILED: illegal segmentNum=%u, "
"HCCL_MAX_SEGMENT_NUM=%u, gradientNum=%u",
segmentNum, HCCL_MAX_SEGMENT_NUM, gradientNum);
return FAILED;);

// check whether segmentIndex is legal or not
GE_IF_BOOL_EXEC((segmentIndex[segmentNum - 1] != gradientNum - 1),
GELOGE(FAILED,
"FusionAllReducePass FAILED: illegal segmentIndex[0]=%u, "
"segmentIndex[segmentNum-1]=%u, gradientNum=%u",
segmentIndex[0], segmentIndex[(segmentNum)-1], gradientNum);
return FAILED;);

for (uint32_t i = 0; i < segmentNum - 1; i++) {
GE_IF_BOOL_EXEC(segmentIndex[i] >= segmentIndex[i + 1], GELOGE(FAILED,
"FusionAllReducePass FAILED: illegal "
"segmentIndex[%u]=%u, segmentIndex[%u]=%u",
i, segmentIndex[i], i + 1, segmentIndex[i + 1]);
return FAILED;);
}

// check whether fusion is needed or not
GE_IF_BOOL_EXEC(
segmentNum == gradientNum,
GELOGE(NOT_CHANGED, "FusionAllReducePass NOT_CHANGED: segmentNum=%u, gradientNum=%u", segmentNum, gradientNum);
return NOT_CHANGED;)

std::unordered_set<void *> anchorPtrSet;
std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataAnchor;
std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataToInControl;
std::vector<ge::OutControlAnchorPtr> fusionOpPeerOutControlAnchor;
std::vector<std::pair<int, ge::InDataAnchorPtr>> fusionOpPeerInDataAnchor;
std::vector<std::pair<int, ge::InControlAnchorPtr>> fusionOpPeerInControlFromOutData;
std::vector<ge::InControlAnchorPtr> fusionOpPeerInControlAnchor;
ge::OutControlAnchorPtr previousNewAllreduceOutControlAnchor = nullptr;

// Traversing the segmentNum
uint32_t start = 0;
uint32_t end = 0;
for (uint32_t segmentIdx = 0; segmentIdx < segmentNum; segmentIdx++) {
end = segmentIndex[segmentIdx];
GE_IF_BOOL_EXEC(end - start < 1,
GELOGI("FusionAllReducePass: segmentIndex[%u]=%u", segmentIdx, segmentIndex[segmentIdx]);
start = end + 1; continue;);

ge::OpDescPtr originDescPtr = fusionOps[start]->GetOpDesc();
GE_CHECK_NOTNULL(originDescPtr);
ge::OpDescPtr newAllreduceDesc = AttrUtils::CloneOpDesc(originDescPtr);
GE_CHECK_NOTNULL(newAllreduceDesc);

// Cleat buffer
anchorPtrSet.clear();
fusionOpPeerOutDataAnchor.clear();
fusionOpPeerOutDataToInControl.clear();
fusionOpPeerOutControlAnchor.clear();
fusionOpPeerInDataAnchor.clear();
fusionOpPeerInControlFromOutData.clear();
fusionOpPeerInControlAnchor.clear();

// Traversing the Allreduce operators of each group
int outDataAnchorIndex = 0;
GE_CHK_STATUS_RET(GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[start]),
"Get peer outDataAnchor to inDataAnchor failed");

GE_CHK_STATUS_RET(GetPeerInAnchorToOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData,
fusionOps[start]),
"Get peer inDataAnchor and inControlAnchor to outDataAnchor failed");

GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[start]),
"Get peer outDataAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[start]),
"Get peer outControlAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[start]),
"Get peer outControlAnchor from inControlAnchor failed");
GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[start]), "FusionAllReducePass FAILED: remove node %s\n.",
fusionOps[start]->GetName().c_str());

for (uint32_t idx = start + 1; idx <= end; idx++) {
GE_CHK_STATUS_RET(
GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[idx], newAllreduceDesc),
"Get peer outDataAnchor to inDataAnchor failed");
GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[idx]),
"Get peer outDataAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[idx]),
"Get peer outControlAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(
GetPeerAnchorFromOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData,
fusionOps[idx], newAllreduceDesc, outDataAnchorIndex),
"Get peerAnchor from outDataAnchor failed");
GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[idx]),
"Get peer outControlAnchor from inControlAnchor failed");

// Delete the node
GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[idx]), "FusionAllReducePass FAILED: remove node %s\n.",
fusionOps[idx]->GetName().c_str());
}

NodePtr newAllReducePtr = graph->AddNode(newAllreduceDesc);
GE_CHECK_NOTNULL(newAllReducePtr);
// Link the inputDataAnchor
for (uint32_t i = 0; i < fusionOpPeerOutDataAnchor.size(); i++) {
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(fusionOpPeerOutDataAnchor[i], newAllReducePtr->GetInDataAnchor(static_cast<int>(i))),
"FusionAllReducePass FAILED: add input data edge failed");
}

// Link the inputControlAnchor
for (uint32_t i = 0; i < fusionOpPeerOutControlAnchor.size(); i++) {
GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutControlAnchor[i], newAllReducePtr->GetInControlAnchor()),
"FusionAllReducePass FAILED: add input control edge failed");
}

for (uint32_t i = 0; i < fusionOpPeerOutDataToInControl.size(); i++) {
GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutDataToInControl[i], newAllReducePtr->GetInControlAnchor()),
"FusionAllReducePass FAILED: add edge from out data to incontrol "
"failed");
}

// Link the outputDataAnchor
for (uint32_t i = 0; i < fusionOpPeerInDataAnchor.size(); i++) {
auto peerInDataAnchor = fusionOpPeerInDataAnchor[i].second;
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInDataAnchor[i].first), peerInDataAnchor),
"FusionAllReducePass FAILED: add output data edge failed");
}
for (uint32_t i = 0; i < fusionOpPeerInControlFromOutData.size(); i++) {
auto peerInControlAnchor = fusionOpPeerInControlFromOutData[i].second;
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInControlFromOutData[i].first),
peerInControlAnchor),
"FusionAllReducePass FAILED: add edge from out data to in control "
"failed");
}

// Link the outputControlAnchor
for (uint32_t i = 0; i < fusionOpPeerInControlAnchor.size(); i++) {
GE_CHK_STATUS_RET(GraphUtils::AddEdge(newAllReducePtr->GetOutControlAnchor(), fusionOpPeerInControlAnchor[i]),
"FusionAllReducePass FAILED: add output control edge failed");
}

// Link the newAllreduce
if (segmentIdx > 0 && previousNewAllreduceOutControlAnchor != nullptr) {
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(previousNewAllreduceOutControlAnchor, newAllReducePtr->GetInControlAnchor()),
"FusionAllReducePass FAILED: add input previous control edge failed");
}

previousNewAllreduceOutControlAnchor = newAllReducePtr->GetOutControlAnchor();
start = end + 1;
}

return SUCCESS;
}

Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec,
ge::NodePtr &srcNodePtr) {
for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) {
GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;);
OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;);
if (anchorSet.count(peerOutDataAnchor.get()) == 0) {
peerOutDataAnchorVec.push_back(peerOutDataAnchor);
anchorSet.insert(peerOutDataAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerInAnchorToOutData(
std::unordered_set<void *> &anchorSet, std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor,
std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr) {
for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) {
GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;);
for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) {
GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;);
if (anchorSet.count(peerInDataAnchor.get()) == 0) {
std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor;
pairPeerInDataAnchor.first = 0;
pairPeerInDataAnchor.second = peerInDataAnchor;
fusionOpPeerInDataAnchor.push_back(pairPeerInDataAnchor);
anchorSet.insert(peerInDataAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor));
}
}

for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) {
GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;);
if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) {
std::pair<uint32_t, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData;
pairPeerInControlAnchorFromData.first = 0;
pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData;
fusionOpPeerInControlFromOutData.push_back(pairPeerInControlAnchorFromData);
anchorSet.insert(peerInControlAnchorFromData.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData));
}
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec,
ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr) {
for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) {
GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;);
OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;);
if (anchorSet.count(peerOutDataAnchor.get()) == 0) {
peerOutDataAnchorVec.push_back(peerOutDataAnchor);
anchorSet.insert(peerOutDataAnchor.get());
if (dstOpDescPtr->AddInputDesc(inDataAnchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(inDataAnchor->GetIdx())) !=
ge::GRAPH_SUCCESS) {
GELOGW("GetPeerOutDataToInData: AddInputDesc failed");
}
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec,
ge::NodePtr &srcNodePtr) {
InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor();
GE_CHECK_NOTNULL(inControlAnchor);
for (auto peerOutDataToInControl : inControlAnchor->GetPeerOutDataAnchors()) {
GE_IF_BOOL_EXEC(peerOutDataToInControl == nullptr, continue;);
if (anchorSet.count(peerOutDataToInControl.get()) == 0) {
peerOutDataToInControlVec.push_back(peerOutDataToInControl);
anchorSet.insert(peerOutDataToInControl.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataToInControl, inControlAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec,
ge::NodePtr &srcNodePtr) {
InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor();
GE_CHECK_NOTNULL(inControlAnchor);
for (auto peerOutControlAnchor : inControlAnchor->GetPeerOutControlAnchors()) {
GE_IF_BOOL_EXEC(peerOutControlAnchor == nullptr, continue;);
if (anchorSet.count(peerOutControlAnchor.get()) == 0) {
peerOutControlToInControlVec.push_back(peerOutControlAnchor);
anchorSet.insert(peerOutControlAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutControlAnchor, inControlAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerAnchorFromOutData(
std::unordered_set<void *> &anchorSet, vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec,
vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, ge::NodePtr &srcNodePtr,
ge::OpDescPtr &dstOpDescPtr, int &index) {
for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) {
GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;)
if (outDataAnchor->GetPeerInDataAnchors().size() > 0 || outDataAnchor->GetPeerInControlAnchors().size() > 0) {
if (dstOpDescPtr->AddOutputDesc(
outDataAnchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(outDataAnchor->GetIdx())) != ge::GRAPH_SUCCESS) {
GELOGW("GetPeerAnchorFromOutData: AddOutputDesc failed");
}
index++;
}

for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) {
GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;)
if (anchorSet.count(peerInDataAnchor.get()) == 0) {
std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor;
pairPeerInDataAnchor.first = index;
pairPeerInDataAnchor.second = peerInDataAnchor;
peerInDataFromOutDataVec.push_back(pairPeerInDataAnchor);
anchorSet.insert(peerInDataAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor))
}
}

for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) {
GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;)
if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) {
std::pair<int, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData;
pairPeerInControlAnchorFromData.first = index;
pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData;
peerInControlFromOutDataVec.push_back(pairPeerInControlAnchorFromData);
anchorSet.insert(peerInControlAnchorFromData.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData))
}
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet,
vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec,
ge::NodePtr &srcNodePtr) {
OutControlAnchorPtr outControlAnchor = srcNodePtr->GetOutControlAnchor();
GE_CHECK_NOTNULL(outControlAnchor);
for (auto peerInControlAnchor : outControlAnchor->GetPeerInControlAnchors()) {
GE_IF_BOOL_EXEC(peerInControlAnchor == nullptr, continue;)
if (anchorSet.count(peerInControlAnchor.get()) == 0) {
peerInControlFromOutControlVec.push_back(peerInControlAnchor);
anchorSet.insert(peerInControlAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outControlAnchor, peerInControlAnchor))
}
}
return SUCCESS;
}
} // namespace ge

+ 55
- 0
src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h View File

@@ -0,0 +1,55 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_
#define GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_

#include <unordered_set>
#include <utility>
#include <vector>
#include "inc/graph_pass.h"

namespace ge {
//
class AllReducePass : public GraphPass {
public:
Status Run(ge::ComputeGraphPtr graph) override;

private:
Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr,
ge::OpDescPtr &dstOpDescPtr);
Status GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, ge::NodePtr &srcNodePtr);
Status GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec,
ge::NodePtr &srcNodePtr);
Status GetPeerAnchorFromOutData(std::unordered_set<void *> &anchorSet,
vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec,
vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec,
ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr, int &index);
Status GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet,
vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec,
ge::NodePtr &srcNodePtr);
Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
std::vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr);
Status GetPeerInAnchorToOutData(std::unordered_set<void *> &anchorSet,
std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor,
std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData,
ge::NodePtr &srcNodePtr);
};
} // namespace ge
#endif // GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_

+ 3
- 2
src/ge/graph/partition/dynamic_shape_partition.cc View File

@@ -745,7 +745,8 @@ Status Cluster::BuildPartitionSubgraph() {
}
int64_t parent_node_index = 0;
for (auto anchor : inputs_) {
auto data_op = MakeShared<OpDesc>(std::string("Data_") + std::to_string(parent_node_index), ge::DATA);
auto data_op =
MakeShared<OpDesc>(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA);
REQUIRE_NOT_NULL(data_op, "Failed new memory for data op.");
auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx());
REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc.");
@@ -763,7 +764,7 @@ Status Cluster::BuildPartitionSubgraph() {
if (outputs_.empty() && control_outputs_.empty()) {
return SUCCESS;
}
auto net_output_op = MakeShared<OpDesc>(NODE_NAME_NET_OUTPUT, ge::NETOUTPUT);
auto net_output_op = MakeShared<OpDesc>(subgraph_->GetName() + "_" + NODE_NAME_NET_OUTPUT, ge::NETOUTPUT);
REQUIRE_NOT_NULL(net_output_op, "Failed new memory for netoutput op.");
for (size_t i = 0; i < outputs_.size(); ++i) {
GeTensorDesc input_desc;


+ 12
- 17
src/ge/graph/partition/graph_partition.cc View File

@@ -300,11 +300,9 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
GE_CHECK_NOTNULL(end_graph);
const auto &src_node = out_anchor->GetOwnerNode();
const auto &dst_node = peer_in_anchor->GetOwnerNode();
string engine_end_name;
string engine_pld_name;
// link input -> end
string end_name = kEndType + std::to_string(graph_info_.num_of_pld_end_);
auto end_op_desc = MakeShared<OpDesc>(end_name, END);
auto end_op_desc = MakeShared<OpDesc>(end_graph->GetName() + "_" + end_name, END);
if (end_op_desc == nullptr) {
GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr.");
return FAILED;
@@ -318,15 +316,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning);
if (is_need_update_desc) {
if (UpdateEndOpDesc(src_node, output_index, end_op_desc) != SUCCESS) {
GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d, engine name is %s", output_index,
engine_end_name.c_str());
GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d", output_index);
return FAILED;
}
} else {
GeTensorDesc input_desc;
if (end_op_desc->AddInputDesc(input_desc) != SUCCESS) {
GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d, engine name is %s", output_index,
engine_end_name.c_str());
GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d", output_index);
return FAILED;
}
}
@@ -346,11 +342,11 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
}
/// For fe, op id has been set in AddNode,
/// we can take op id of srcNode as the mark of parentId now
auto const &src_node_opdesc = src_node->GetOpDesc();
const auto &src_node_opdesc = src_node->GetOpDesc();
GE_CHECK_NOTNULL(src_node_opdesc);
int64_t node_id = src_node_opdesc->GetId();
const string pld_name = kPlaceHolderType + std::to_string(graph_info_.num_of_pld_end_);
auto pld_op_desc = MakeShared<OpDesc>(pld_name, PLACEHOLDER);
auto pld_op_desc = MakeShared<OpDesc>(pld_graph->GetName() + "_" + pld_name, PLACEHOLDER);
if (pld_op_desc == nullptr) {
GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr.");
return FAILED;
@@ -370,15 +366,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
is_need_update_desc = (input_index >= 0) && (graph_info_.mode_ == kPartitioning);
if (is_need_update_desc) {
if (UpdatePldOpDesc(dst_node, input_index, pld_op_desc) != SUCCESS) {
GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d, engine name is %s", input_index,
engine_pld_name.c_str());
GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d", input_index);
return FAILED;
}
} else {
GeTensorDesc output_desc;
if (pld_op_desc->AddOutputDesc(output_desc) != SUCCESS) {
GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d, engine name is %s", input_index,
engine_pld_name.c_str());
GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d", input_index);
return FAILED;
}
}
@@ -399,8 +393,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr
return FAILED;
}
graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node;
graph_info_.pld_2_end_[new_pld_node] = new_end_node;
graph_info_.end_2_pld_[new_end_node] = new_pld_node;
graph_info_.pld_2_end_[new_pld_node] = new_end_node;
return SUCCESS;
}

@@ -591,7 +585,8 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vector<ge::SubGraphInfoPtr
sgi->SetOutputContext(graph_info_.output_name_);
AddEndPldInformationToSubGraphInfo(sgi);
GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s",
engine_name.c_str(), sub_graph->GetName().c_str(), sgi->GetStreamLabel().c_str());
engine_name.c_str(), sub_graph->GetName().c_str(),
sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str());
output_subgraphs.push_back(sgi);
}
}
@@ -896,8 +891,8 @@ Status ge::GraphPartitioner::AddPlaceHolderEnd(const AnchorPtr &out_anchor, cons
return FAILED;
}
// nodes in original graph
auto src_node = out_anchor->GetOwnerNode();
auto dst_node = in_anchor->GetOwnerNode();
const auto &src_node = out_anchor->GetOwnerNode();
const auto &dst_node = in_anchor->GetOwnerNode();
if ((src_node == nullptr) || (dst_node == nullptr)) {
GELOGE(GE_GRAPH_PARAM_NULLPTR, "src_node or dst_node is null.");
return FAILED;


+ 2
- 2
src/ge/graph/passes/aicpu_constant_folding_pass.cc View File

@@ -323,7 +323,7 @@ Status AicpuConstantFoldingPass::LaunchSingleOpRunTask(const NodePtr &node, cons
aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0;
aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0;
aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0;
aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0;
std::string task_info;
Status ret = kernel_info->GenSingleOpRunTask(node, aicpu_task, task_info);
if (ret != SUCCESS) {
@@ -378,7 +378,7 @@ Status AicpuConstantFoldingPass::LaunchMemCopyTask(const vector<uint64_t> &data_
aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0;
aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0;
aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0;
aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0;
aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0;
std::string task_info;
Status ret = kernel_info->GenMemCopyTask(data_infos.size(), aicpu_task, task_info);
if (ret != SUCCESS) {


+ 5
- 2
src/ge/graph/passes/atomic_addr_clean_pass.cc View File

@@ -172,9 +172,12 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) {
if (!session_graph_id.empty()) {
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
}
// Only flush subgraph name
string node_name = (graph->GetParentGraph() != nullptr)
? (graph->GetName() + "_" + op_desc->GetName() + session_graph_id)
: (op_desc->GetName() + session_graph_id);

string name = op_desc->GetName() + session_graph_id;
op_desc->SetName(name);
op_desc->SetName(node_name);
GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str());
// To avoid same name between graphs, set session graph id to this node
NodePtr clean_addr_node = graph->AddNodeFront(op_desc);


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save