Compare commits

...

7 Commits
master ... r0.6

Author SHA1 Message Date
  mindspore-ci-bot 885af56694 !212 fix securec download links due to mistakes made by openeuler community 4 years ago
  yanghaoran 9f8bdb838e fix securec download links due to mistakes made by openeuler community 4 years ago
  mindspore-ci-bot 2cb83c8f4d !52 Revert "Op debug feature" 4 years ago
  yanghaoran 20f86e636b Revert "Op debug feature" 4 years ago
  mindspore-ci-bot efd823cc18 !51 runpackage sync C75B050 for r0.6 4 years ago
  wuweikang ca11480c34 runpackage sync C75B050 4 years ago
  lujiale db2ea7a6ff update RELEASE.md. 4 years ago
100 changed files with 3350 additions and 1229 deletions
Split View
  1. +21
    -0
      RELEASE.md
  2. +2
    -2
      cmake/external_libs/securec.cmake
  3. +7
    -0
      inc/common/opskernel/ge_task_info.h
  4. +18
    -7
      inc/common/util/error_manager/error_manager.h
  5. +2
    -0
      inc/common/util/platform_info.h
  6. +14
    -0
      inc/common/util/platform_info_def.h
  7. +9
    -5
      inc/external/ge/ge_api_types.h
  8. +1
    -0
      inc/external/graph/operator_reg.h
  9. +1
    -1
      inc/framework/common/debug/log.h
  10. +21
    -2
      inc/framework/common/ge_inner_error_codes.h
  11. +13
    -0
      inc/framework/common/types.h
  12. +55
    -9
      inc/framework/executor/ge_executor.h
  13. +0
    -5
      inc/framework/ge_runtime/model_runner.h
  14. +56
    -0
      inc/framework/memory/memory_api.h
  15. +0
    -4
      inc/framework/omg/omg.h
  16. +1
    -0
      inc/framework/omg/omg_inner_types.h
  17. +110
    -0
      inc/framework/omg/parser/model_parser.h
  18. +92
    -0
      inc/framework/omg/parser/op_parser.h
  19. +31
    -0
      inc/framework/omg/parser/parser_api.h
  20. +138
    -0
      inc/framework/omg/parser/parser_factory.h
  21. +43
    -0
      inc/framework/omg/parser/parser_inner_ctx.h
  22. +74
    -0
      inc/framework/omg/parser/weights_parser.h
  23. +4
    -1
      inc/graph/compute_graph.h
  24. +27
    -0
      inc/graph/debug/ge_attr_define.h
  25. +3
    -0
      inc/graph/detail/model_serialize_imp.h
  26. +2
    -4
      inc/graph/op_desc.h
  27. +48
    -25
      src/common/graph/compute_graph.cc
  28. +29
    -0
      src/common/graph/ge_attr_define.cc
  29. +9
    -17
      src/common/graph/ge_attr_value.cc
  30. +2
    -0
      src/common/graph/graph.mk
  31. +95
    -24
      src/common/graph/model_serialize.cc
  32. +16
    -18
      src/common/graph/node.cc
  33. +39
    -102
      src/common/graph/op_desc.cc
  34. +35
    -0
      src/common/graph/ref_relation.cc
  35. +50
    -26
      src/common/graph/shape_refiner.cc
  36. +5
    -5
      src/common/graph/utils/ge_ir_utils.h
  37. +27
    -6
      src/common/graph/utils/node_utils.cc
  38. +23
    -2
      src/ge/CMakeLists.txt
  39. +1
    -0
      src/ge/common/CMakeLists.txt
  40. +0
    -1
      src/ge/common/formats/utils/formats_trans_utils.h
  41. +5
    -2
      src/ge/common/ge/tbe_plugin_manager.cc
  42. +1
    -1
      src/ge/common/ge/tbe_plugin_manager.h
  43. +1
    -0
      src/ge/common/ge_common.mk
  44. +28
    -23
      src/ge/common/helper/model_helper.cc
  45. +17
    -14
      src/ge/common/helper/om_file_helper.cc
  46. +16
    -13
      src/ge/common/model_parser/base.cc
  47. +483
    -0
      src/ge/common/model_parser/graph_parser_util.cc
  48. +68
    -0
      src/ge/common/model_parser/graph_parser_util.h
  49. +41
    -14
      src/ge/common/profiling/profiling_manager.cc
  50. +5
    -0
      src/ge/common/profiling/profiling_manager.h
  51. +3
    -0
      src/ge/common/types.cc
  52. +1
    -1
      src/ge/common/util.cc
  53. +12
    -10
      src/ge/engine_manager/dnnengine_manager.cc
  54. +1
    -0
      src/ge/executor/CMakeLists.txt
  55. +237
    -71
      src/ge/executor/ge_executor.cc
  56. +5
    -0
      src/ge/executor/module.mk
  57. +7
    -0
      src/ge/ge_inference.mk
  58. +1
    -2
      src/ge/ge_local_engine/engine/host_cpu_engine.h
  59. +1
    -0
      src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc
  60. +7
    -0
      src/ge/ge_runner.mk
  61. +0
    -19
      src/ge/ge_runtime/model_runner.cc
  62. +4
    -6
      src/ge/ge_runtime/runtime_model.cc
  63. +0
    -2
      src/ge/ge_runtime/runtime_model.h
  64. +45
    -21
      src/ge/generator/ge_generator.cc
  65. +16
    -23
      src/ge/graph/build/graph_builder.cc
  66. +0
    -1
      src/ge/graph/build/graph_builder.h
  67. +164
    -45
      src/ge/graph/build/memory/block_mem_assigner.cc
  68. +8
    -0
      src/ge/graph/build/memory/block_mem_assigner.h
  69. +193
    -79
      src/ge/graph/build/memory/graph_mem_assigner.cc
  70. +16
    -5
      src/ge/graph/build/memory/graph_mem_assigner.h
  71. +26
    -0
      src/ge/graph/build/model_builder.cc
  72. +39
    -0
      src/ge/graph/build/stream_allocator.cc
  73. +1
    -0
      src/ge/graph/build/stream_allocator.h
  74. +27
    -7
      src/ge/graph/execute/graph_execute.cc
  75. +14
    -3
      src/ge/graph/execute/graph_execute.h
  76. +1
    -2
      src/ge/graph/label/while_label_maker.cc
  77. +8
    -8
      src/ge/graph/load/graph_loader.cc
  78. +46
    -54
      src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc
  79. +2
    -1
      src/ge/graph/load/new_model_manager/cpu_queue_schedule.h
  80. +26
    -24
      src/ge/graph/load/new_model_manager/data_dumper.cc
  81. +364
    -282
      src/ge/graph/load/new_model_manager/davinci_model.cc
  82. +34
    -30
      src/ge/graph/load/new_model_manager/davinci_model.h
  83. +82
    -62
      src/ge/graph/load/new_model_manager/model_manager.cc
  84. +13
    -3
      src/ge/graph/load/new_model_manager/model_manager.h
  85. +3
    -0
      src/ge/graph/load/new_model_manager/model_utils.cc
  86. +4
    -4
      src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
  87. +1
    -1
      src/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc
  88. +2
    -2
      src/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc
  89. +1
    -1
      src/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc
  90. +1
    -1
      src/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc
  91. +15
    -16
      src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc
  92. +28
    -23
      src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
  93. +76
    -68
      src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  94. +1
    -0
      src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h
  95. +1
    -1
      src/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc
  96. +1
    -1
      src/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc
  97. +2
    -2
      src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
  98. +16
    -11
      src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc
  99. +3
    -1
      src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h
  100. +2
    -3
      src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc

+ 21
- 0
RELEASE.md View File

@@ -1,3 +1,24 @@
# Release 0.6.0-beta

## Major Features and Improvements
- GE supports function control operators such as If/Case/While/For.
- In a single operator call scenario, GE supports recording the correspondence between operators and tasks for performance commissioning.
- GE supports new operator overflow positioning solution.

## Bugfixes
- Fix the problem that the aclmdlGetCurOutputDims interface failed to query output Dims in dynamic batch scenarios.
- Fix the problem that the operator compilation options (advanced and advanced) cannot be selected.
- Fix the problem that zero copy function cannot be performed in the scene of converging conditional operators after Data operators.
- Fix the problem that the empty graph cannot be handled.



## Thanks to our Contributors
Thanks goes to these wonderful people:
wangcong,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong
Contributions of any kind are welcome!


# Release 0.5.0-beta

## Major Features and Improvements


+ 2
- 2
cmake/external_libs/securec.cmake View File

@@ -1,7 +1,7 @@
graphengine_add_pkg(securec
VER 1.1.10
URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz
MD5 0782dd2351fde6920d31a599b23d8c91
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
MD5 193f0ca5246c1dd84920db34d2d8249f
LIBS c_sec
PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001
CMAKE_OPTION " "


+ 7
- 0
inc/common/opskernel/ge_task_info.h View File

@@ -63,5 +63,12 @@ struct HcomOpertion {
int32_t root;
};

struct HcomRemoteAccessAddrInfo {
uint32_t remotetRankID;
uint64_t remoteAddr; // host embedding table address
uint64_t localAddr; // device HBM address
uint64_t length; // memory Length in Bytes
};

} // namespace ge
#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_

+ 18
- 7
inc/common/util/error_manager/error_manager.h View File

@@ -31,27 +31,37 @@ class ErrorManager {

///
/// @brief init
/// @param [in] path current so path
/// @param [in] path: current so path
/// @return int 0(success) -1(fail)
///
int Init(std::string path);

///
/// @brief Report error message
/// @param [in] errCode error code
/// @param [in] mapArgs parameter map
/// @param [in] error_code: error code
/// @param [in] args_map: parameter map
/// @return int 0(success) -1(fail)
///
int ReportErrMessage(std::string error_code, const std::map<std::string, std::string> &args_map);

///
/// @brief output error message
/// @param [in] handle print handle
/// @param [in] handle: print handle
/// @return int 0(success) -1(fail)
///
int OutputErrMessage(int handle);

///
/// @brief output message
/// @param [in] handle: print handle
/// @return int 0(success) -1(fail)
///
int OutputMessage(int handle);

///
/// @brief Report error message
/// @param [in] vector parameter key, vector parameter value
/// @param [in] key: vector parameter key
/// @param [in] value: vector parameter value
///
void ATCReportErrMessage(std::string error_code, const std::vector<std::string> &key = {},
const std::vector<std::string> &value = {});
@@ -60,7 +70,7 @@ class ErrorManager {
struct ErrorInfo {
std::string error_id;
std::string error_message;
std::vector<std::string> arglist;
std::vector<std::string> arg_list;
};

ErrorManager() {}
@@ -77,7 +87,8 @@ class ErrorManager {

bool is_init_ = false;
std::map<std::string, ErrorInfo> error_map_;
std::vector<std::string> error_message_evc_;
std::vector<std::string> error_messages_;
std::vector<std::string> warning_messages_;
};

#endif // ERROR_MANAGER_H_

+ 2
- 0
inc/common/util/platform_info.h View File

@@ -82,6 +82,8 @@ class PlatformInfoManager {

void ParseVectorCoreMemoryRates(map<string, string> &vectorCoreMemoryRatesMap, PlatformInfo &platformInfoTemp);

void ParseCPUCache(map<string, string> &CPUCacheMap, PlatformInfo &platformInfoTemp);

void ParseVectorCoreintrinsicDtypeMap(map<string, string> &vectorCoreintrinsicDtypeMap,
PlatformInfo &platformInfoTemp);



+ 14
- 0
inc/common/util/platform_info_def.h View File

@@ -73,6 +73,8 @@ typedef struct tagAiCoreSpec {

typedef struct tagAiCoreMemoryRates {
double ddrRate;
double ddrReadRate;
double ddrWriteRate;
double l2Rate;
double l2ReadRate;
double l2WriteRate;
@@ -86,6 +88,7 @@ typedef struct tagAiCoreMemoryRates {
} AiCoreMemoryRates;

typedef struct tagVectorCoreSpec {
double vecFreq;
uint64_t vecCalcSize;
uint64_t smaskBuffer;
uint64_t ubSize;
@@ -94,10 +97,15 @@ typedef struct tagVectorCoreSpec {
uint64_t ubbankNum;
uint64_t ubburstInOneBlock;
uint64_t ubbankGroupNum;
uint64_t vectorRegSize;
uint64_t predicateRegSize;
uint64_t addressRegSize;
} VectorCoreSpec;

typedef struct tagVectorCoreMemoryRates {
double ddrRate;
double ddrReadRate;
double ddrWriteRate;
double l2Rate;
double l2ReadRate;
double l2WriteRate;
@@ -105,6 +113,11 @@ typedef struct tagVectorCoreMemoryRates {
double ubToDdrRate;
} VectorCoreMemoryRates;

typedef struct tagCPUCache {
uint32_t AICPUSyncBySW;
uint32_t TSCPUSyncBySW;
} CPUCache;

typedef struct tagPlatformInfo {
StrInfo strInfo;
SoCInfo socInfo;
@@ -113,6 +126,7 @@ typedef struct tagPlatformInfo {
map<string, vector<string>> aiCoreIntrinsicDtypeMap;
VectorCoreSpec vectorCoreSpec;
VectorCoreMemoryRates vectorCoreMemoryRates;
CPUCache cpucache;
map<string, vector<string>> vectorCoreIntrinsicDtypeMap;
} PlatformInfo;



+ 9
- 5
inc/external/ge/ge_api_types.h View File

@@ -46,7 +46,6 @@ const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
const char *const OPTION_EXEC_OP_DEBUG_LEVEL = "ge.exec.opDebugLevel";
const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
@@ -174,6 +173,9 @@ const char *const kDynamicBatchSize = "ge.dynamicBatchSize";
// configure whether to use dynamic image size
const char *const kDynamicImageSize = "ge.dynamicImageSize";

// Configure whether to use dynamic dims
const char *const kDynamicDims = "ge.dynamicDims";

// Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y,
// example: GA|RL, support configure multiple, split by |
const std::string AUTO_TUNE_MODE = "ge.autoTuneMode";
@@ -269,6 +271,7 @@ static const char *const INPUT_SHAPE = "input_shape";
static const char *const OP_NAME_MAP = "op_name_map";
static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
static const char *const DYNAMIC_DIMS = kDynamicDims;
static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
@@ -291,10 +294,11 @@ static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c

// for interface: aclgrphBuildModel
const std::set<std::string> ir_builder_suppported_options = {
INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, DYNAMIC_BATCH_SIZE,
DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY,
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES,
LOG_LEVEL};
INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP,
DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS,
INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY,
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES,
INPUT_FP16_NODES, LOG_LEVEL};
// for interface: aclgrphBuildInitialize
const std::set<std::string> global_options = {CORE_TYPE,
SOC_VERSION,


+ 1
- 0
inc/external/graph/operator_reg.h View File

@@ -343,6 +343,7 @@ class OpReg {
auto x_type = op.GetInputDesc(in_name).GetDataType(); \
TensorDesc op_output_desc = op.GetOutputDesc(out_name); \
op_output_desc.SetShape(ge::Shape(x_shape)); \
op_output_desc.SetOriginShape(ge::Shape(x_shape)); \
op_output_desc.SetDataType(x_type); \
return op.UpdateOutputDesc(out_name, op_output_desc); \
}


+ 1
- 1
inc/framework/common/debug/log.h View File

@@ -232,7 +232,7 @@
rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
return ge::RT_FAILED; \
return RT_ERROR_TO_GE_STATUS(_rt_ret); \
} \
} while (0);



+ 21
- 2
inc/framework/common/ge_inner_error_codes.h View File

@@ -280,8 +280,25 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r

// Executor module error code definition
GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized.");
GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist.");
GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PATH_INVALID, 2, "Model file path is invalid.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_KEY_PATH_INVALID, 3, "Key file path of model is invalid.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_ID_INVALID, 4, "Model id is invalid.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_DATA_SIZE_INVALID, 5, "Data size of model is invalid.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_WEIGHT_SIZE_INVALID, 6, "Weight size of model is invalid.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PARTITION_NUM_INVALID, 7, "Partition number of model is invalid.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_QUEUE_ID_INVALID, 8, "Queue id of model is invalid.");
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, 9, "Model does not support encryption.");
GE_ERRORNO_EXECUTOR(GE_EXEC_READ_MODEL_FILE_FAILED, 10, "Failed to read model file.");
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_REPEATED, 11, "The model is loaded repeatedly.");
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_PARTITION_FAILED, 12, "Failed to load model partition.");
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, 13, "Failed to load weight partition.");
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_TASK_PARTITION_FAILED, 14, "Failed to load task partition.");
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, 15, "Failed to load kernel partition.");
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, 16, "Failed to allocate feature map memory.");
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, 17, "Failed to allocate weight memory.");
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_VAR_MEM_FAILED, 18, "Failed to allocate variable memory.");
GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 19, "GE AIPP is not exist.");
GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 20, "GE Dynamic AIPP is not support to query temporarily.");

// Generator module error code definition
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed.");
@@ -289,6 +306,8 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, 2, "Graph mana
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph manager build graph failed.");
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed.");
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed.");

#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR)
} // namespace ge

#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_

+ 13
- 0
inc/framework/common/types.h View File

@@ -339,6 +339,7 @@ REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext");
REGISTER_OPTYPE_DECLARE(INITDATA, "InitData");
REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape")
REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity");
REGISTER_OPTYPE_DECLARE(BITCAST, "Bitcast");

// ANN dedicated operator
REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean");
@@ -432,6 +433,8 @@ REGISTER_OPTYPE_DECLARE(HCOMALLREDUCE, "HcomAllReduce");
REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter");
REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend");
REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive");
REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead");
REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite");

REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign");
REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp");
@@ -559,6 +562,16 @@ enum ModelCheckType {
};

///
/// @brief dynamic input type
///
enum DynamicInputType {
FIXED = 0, // default mode
DYNAMIC_BATCH = 1,
DYNAMIC_IMAGE = 2,
DYNAMIC_DIMS = 3
};

///
/// @brief magic number of the model file
///
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_MAGIC_NUM;


+ 55
- 9
inc/framework/executor/ge_executor.h View File

@@ -26,23 +26,26 @@
#include "common/ge_types.h"
#include "common/types.h"
#include "graph/tensor.h"
#include "graph/ge_tensor.h"
#include "runtime/base.h"

namespace ge {
class ModelListenerAdapter;

class SingleOp;
class DynamicSingleOp;

struct RunModelData {
uint32_t index; // Data index
uint32_t modelId;
std::vector<DataBuffer> blobs; // All input/output data buffer
uint32_t timestamp; // Data creation time
uint32_t timeout; // Processing timeout
uint64_t request_id = 0; // Request ID
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0
std::vector<DataBuffer> blobs; // All input/output data buffer
uint32_t timestamp; // Data creation time
uint32_t timeout; // Processing timeout
uint64_t request_id = 0; // Request ID
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty
};

class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
@@ -87,16 +90,52 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
///
ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
uint64_t image_width);

///
/// @ingroup ge
/// @brief Set dynamic dims info
/// @param [in] model_id: model id allocate from manager
/// @param [in] dynamic_input_addr: dynamic input addr created by user
/// @param [in] length: length of dynamic input addr
/// @param [in] dynamic_dim_num: number of dynamic dimension
/// @param [in] dynamic_dims: array of dynamic dimensions
/// @return execute result
///
ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
const std::vector<uint64_t> &dynamic_dims);

///
/// @ingroup ge
/// @brief Get current dynamic dims info by combined dims
/// @param [in] model_id: model id allocate from manager
/// @param [in] combined_dims: array of combined dimensions
/// @param [out] cur_dynamic_dims: current dynamic dims
/// @return execute result
///
ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &combined_dims,
std::vector<uint64_t> &cur_dynamic_dims);

///
/// @ingroup ge
/// @brief Get dynamic batch_info
/// @param [in] model_id
/// @param [out] batch_info
/// @param [out] dynamic_type
/// @return execute result
///
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type);

///
/// @ingroup ge
/// @brief Get combined dynamic dims info
/// @param [in] model_id
/// @param [out] batch_info
/// @return execute result
///
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

///
/// @ingroup ge
@@ -209,6 +248,13 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
std::vector<DataBuffer> &outputs);

static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op);

static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs);

static ge::Status ReleaseSingleOpResource(void *stream);

ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);


+ 0
- 5
inc/framework/ge_runtime/model_runner.h View File

@@ -35,9 +35,6 @@ class ModelRunner {

bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id,
std::shared_ptr<DavinciModel> davinci_model, std::shared_ptr<ModelListener> listener);

bool DistributeTask(uint32_t model_id);

bool LoadModelComplete(uint32_t model_id);

const std::vector<uint32_t> &GetTaskIdList(uint32_t model_id) const;
@@ -46,8 +43,6 @@ class ModelRunner {

const std::map<std::string, std::shared_ptr<RuntimeInfo>> &GetRuntimeInfoMap(uint32_t model_id) const;

void *GetModelHandle(uint32_t model_id) const;

bool UnloadModel(uint32_t model_id);

bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data);


+ 56
- 0
inc/framework/memory/memory_api.h View File

@@ -0,0 +1,56 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_
#define INC_FRAMEWORK_MEMORY_MEMORY_API_H_

#include <string>
#include <vector>

#include "ge/ge_api_error_codes.h"
#include "runtime/mem.h"

namespace ge {
enum MemStorageType {
HBM = 0,
RDMA_HBM,
};

struct HostVarInfo {
uint64_t base_addr;
uint64_t var_size;
};

///
/// \param size [in] rdma pool memory size to be allocated.
/// \param mem_type [in] memory type for rdma pool.
/// \return Status result of function
Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM);

///
/// \param var_info [in] host variable addr infos.
/// \param mem_type [in] memory type for rdma pool.
/// \return Status result of function
Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM);

///
/// \param var_name [in] var_name name of host variable.
/// \param base_addr [out] base_addr vase addr of host variable.
/// \param var_size [out] var_size memory_size of host variable.
/// \return Status result of function
Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size);
} // namespace ge
#endif // INC_FRAMEWORK_MEMORY_MEMORY_API_H_

+ 0
- 4
inc/framework/omg/omg.h View File

@@ -96,10 +96,6 @@ Status CheckCustomAiCpuOpLib();

Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);

Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format);

Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);

void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
std::vector<std::string> &output_nodes_name);



+ 1
- 0
inc/framework/omg/omg_inner_types.h View File

@@ -120,6 +120,7 @@ struct OmgContext {
bool is_dynamic_input = false;
std::string dynamic_batch_size;
std::string dynamic_image_size;
std::string dynamic_dims;
};
} // namespace ge



+ 110
- 0
inc/framework/omg/parser/model_parser.h View File

@@ -0,0 +1,110 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_
#define INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_

#include <google/protobuf/message.h>
#include "framework/common/types.h"
#include "framework/omg/omg_inner_types.h"
#include "graph/attr_value.h"
#include "graph/compute_graph.h"
#include "graph/ge_tensor.h"
#include "graph/graph.h"
#include "graph/op_desc.h"
#include "graph/operator.h"
#include "graph/range_vistor.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"

using Status = domi::Status;

namespace domi {
using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>(
const google::protobuf::Message *root_proto, const std::string &graph)>;
class ModelParser {
public:
ModelParser() {}

virtual ~ModelParser() {}

/**
* @ingroup domi_omg
* @brief Analyze network model data
* @param [in] file Network model file path
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status Parse(const char *file, ge::Graph &graph) = 0;

/**
* @ingroup domi_omg
* @brief Parse relevant data from memory and save it to graph
* @param [in] input Model file memory data
* @param [in|out] graph A graph for saving the model information after analysis
* @return SUCCESS
* @return FAILED
* @author
*/
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0;

/**
* @ingroup domi_omg
* @brief Analyze network model data
* @param [in] proto network model
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0;

/**
* @ingroup domi_omg
* @brief Analyze callback model data in subgraph
* @param [in] proto network model
* @param [in] callback callback of subgraph
* @param [in|out] graph Save the network information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback,
ge::ComputeGraphPtr &graph) = 0;
/**
* @ingroup domi_omg
* @brief Convert model files to JSON format
* @param [in] model_file Model file path to be converted
* @param [out] json_file Converted JSON file path
* @return SUCCESS
* @return Others failed
*/
virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; }

/*
* @ingroup domi_omg
* @brief Convert network data type
* @param [in] type Data type to be converted
* @return ge::DataType
*/
virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0;

virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0;
};
} // namespace domi

#endif // INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_

+ 92
- 0
inc/framework/omg/parser/op_parser.h View File

@@ -0,0 +1,92 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_
#define INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_

#include <google/protobuf/text_format.h>
#include "common/types.h"
#include "omg/omg_inner_types.h"
#include "proto/om.pb.h"
#include "graph/ge_tensor.h"
#include "graph/op_desc.h"
#include "graph/utils/op_desc_utils.h"

using google::protobuf::Message;
using Status = domi::Status;

namespace ge {
/**
* @ingroup domi_omg
* @brief Used to analyze operator information
*
*/
class OpParser {
public:
/**
* @ingroup domi_omg
* @brief Deconstructor
*/
virtual ~OpParser() {}

/**
* @ingroup domi_omg
* @brief Analytic operator parameters
* @param [in] op_src Parameter data to be resolved
* @param [out] graph Parsed parameter data
* @return SUCCESS
* @return FAILED
*/
virtual Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0;

/**
* @ingroup domi_omg
* @brief Analytic operator parameters
* @param [in] op_src Parameter data to be resolved
* @param [out] Operator parameter data
* @return SUCCESS
* @return FAILED
*/
virtual Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0;

/**
* @ingroup domi_omg
* @brief Analytic operator weight information
* @param [in] op_src Weight data to be resolved
* @param [out] op_dest Weight data after analysis
* @return SUCCESS
* @return FAILED
*/
virtual Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0;

/**
* @ingroup domi_omg
* @brief Get the format information according to the parameters in the operator
* @param [in] op_src Parameter data to be resolved
* @param [out] format Output the parsed format
* @return SUCCESS
* @return FAILED
*/
virtual Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) {
(void)op_src;
// Indicates that the op does not provide a value for format
format = domi::DOMI_TENSOR_RESERVED;
return domi::SUCCESS;
}
};
} // namespace ge

#endif // INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_

+ 31
- 0
inc/framework/omg/parser/parser_api.h View File

@@ -0,0 +1,31 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_
#define INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_

#include <iostream>
#include <map>
#include <string>
#include "ge/ge_api_error_codes.h"

namespace ge {
// Initialize parser
Status ParserInitialize(const std::map<std::string, std::string>& options);
// Finalize parser, release all resources
Status ParserFinalize();
} // namespace ge
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_

+ 138
- 0
inc/framework/omg/parser/parser_factory.h View File

@@ -0,0 +1,138 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_
#define INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_

#include <map>
#include <memory>
#include <mutex>
#include <string>
#include "framework/common/types.h"
#include "framework/omg/omg_inner_types.h"

using Status = domi::Status;

namespace domi {
class WeightsParser;
class ModelParser;

typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void);

// Create modelparser for different frameworks
class ModelParserFactory {
public:
static ModelParserFactory *Instance();

/**
* @ingroup domi_omg
* @brief Create a modelparser based on the type entered
* @param [in] type Framework type
* @return Created modelparser
*/
std::shared_ptr<ModelParser> CreateModelParser(const domi::FrameworkType type);

/**
* @ingroup domi_omg
* @brief Register create function
* @param [in] type Framework type
* @param [in] fun ModelParser's create function
*/
void RegisterCreator(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun);

protected:
ModelParserFactory() {}
~ModelParserFactory();

private:
std::map<domi::FrameworkType, MODEL_PARSER_CREATOR_FUN> creator_map_;
}; // end class ModelParserFactory

class ModelParserRegisterar {
public:
ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) {
ModelParserFactory::Instance()->RegisterCreator(type, fun);
}
~ModelParserRegisterar() {}
};

// Registration macros for model parsers
#define REGISTER_MODEL_PARSER_CREATOR(type, clazz) \
std::shared_ptr<ModelParser> Creator_##type##_Model_Parser() { \
std::shared_ptr<clazz> ptr = nullptr; \
try { \
ptr = make_shared<clazz>(); \
} catch (...) { \
ptr = nullptr; \
} \
return std::shared_ptr<ModelParser>(ptr); \
} \
ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser)

typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void);

// Create weightsparser for different frameworks
class WeightsParserFactory {
public:
static WeightsParserFactory *Instance();

/**
* @ingroup domi_omg
* @brief Create weightsparser based on the type entered
* @param [in] type Framework type
* @return Created weightsparser
*/
std::shared_ptr<WeightsParser> CreateWeightsParser(const domi::FrameworkType type);

/**
* @ingroup domi_omg
* @brief Register create function
* @param [in] type Framework type
* @param [in] fun WeightsParser's create function
*/
void RegisterCreator(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun);

protected:
WeightsParserFactory() {}
~WeightsParserFactory();

private:
std::map<domi::FrameworkType, WEIGHTS_PARSER_CREATOR_FUN> creator_map_;
}; // end class WeightsParserFactory

class WeightsParserRegisterar {
public:
WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) {
WeightsParserFactory::Instance()->RegisterCreator(type, fun);
}
~WeightsParserRegisterar() {}
};

// Register macro of weight resolver
#define REGISTER_WEIGHTS_PARSER_CREATOR(type, clazz) \
std::shared_ptr<WeightsParser> Creator_##type##_Weights_Parser() { \
std::shared_ptr<clazz> ptr = nullptr; \
try { \
ptr = make_shared<clazz>(); \
} catch (...) { \
ptr = nullptr; \
} \
return std::shared_ptr<WeightsParser>(ptr); \
} \
WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser)
}; // namespace domi

#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_

+ 43
- 0
inc/framework/omg/parser/parser_inner_ctx.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_
#define INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_

#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "external/register/register_fmk_types.h"
#include "external/register/register_types.h"
#include "framework/omg/omg_inner_types.h"

namespace ge {
struct ParserContext {
std::unordered_map<std::string, std::vector<int64_t>> input_dims;
domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND;
;
RunMode run_mode = ONLY_PRE_CHECK;
std::string custom_proto_path; // save caffe custom proto path, used by caffe parse
std::string caffe_proto_path; // save caffe proto path, used by caffe parse
};

ParserContext &GetParserContext();
} // namespace ge

#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_

+ 74
- 0
inc/framework/omg/parser/weights_parser.h View File

@@ -0,0 +1,74 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_
#define INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_

#include "graph/graph.h"
#include "graph/attr_value.h"
#include "graph/compute_graph.h"
#include "graph/ge_tensor.h"
#include "graph/op_desc.h"
#include "graph/operator.h"
#include "graph/range_vistor.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"

namespace domi {
/**
* @ingroup domi_omg
* @brief Weight information resolver
*
*/
class WeightsParser {
public:
/**
* @ingroup domi_omg
* @brief Constructor
*/
WeightsParser() {}

/**
* @ingroup domi_omg
* @brief Deconstructor
*/
virtual ~WeightsParser() {}

/**
* @ingroup domi_omg
* @brief Analyze weight data
* @param [in] file Path of weight file after training
* @param [in|out] graph Graph for saving weight information after analysis
* @return SUCCESS
* @return Others failed
*/
virtual Status Parse(const char *file, ge::Graph &graph) = 0;

/**
* @ingroup domi_omg
* @brief Parse relevant data from memory and save it to graph
* @param [in] input Model file memory data
* @param [in|out] graph A graph for saving the model information after analysis
* @return SUCCESS
* @return FAILED
* @author
*/
virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0;
};
} // namespace domi

#endif // INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_

+ 4
- 1
inc/graph/compute_graph.h View File

@@ -87,11 +87,14 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
// AddNode with NodePtr
NodePtr AddNode(NodePtr node);
NodePtr AddNode(OpDescPtr op);
NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize.
NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize
NodePtr AddNodeFront(NodePtr node);
NodePtr AddNodeFront(const OpDescPtr &op);
NodePtr AddInputNode(NodePtr node);
NodePtr AddOutputNode(NodePtr node);
// insert node with specific pre_node
NodePtr AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node);
NodePtr AddNodeAfter(NodePtr node, const NodePtr &pre_node);

graphStatus RemoveNode(const NodePtr &node);
graphStatus RemoveInputNode(const NodePtr &node);


+ 27
- 0
inc/graph/debug/ge_attr_define.h View File

@@ -185,6 +185,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE;

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT;

// to be deleted
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION;
@@ -934,12 +937,14 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PRED_VALUE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH;

// Control flow
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ACTIVE_STREAM_LIST;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCHN_PRED_VALUE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS;

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG;
@@ -983,6 +988,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NEE
// For mutil-batch
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERT_BY_MBATCH;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_TYPE;

// For inserted op
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERTED_BY_GE;
@@ -1022,6 +1028,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE;

// for unregistered op
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_OPPATH;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_ATTRLIST;

// op overflow dump
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE;
@@ -1075,8 +1085,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX;

// atc user def dtype&format
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_DATATYPE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_FORMAT;

// for fusion op plugin
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE;

// graph partition for aicpu
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME;

// input and output memory type
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_VARIABLE_PLACEMENT;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INPUT_MEMORY_TYPE;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OUTPUT_MEMORY_TYPE;

// input_output_offset
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_BASIC_OFFSET;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET;
} // namespace ge

#endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_

+ 3
- 0
inc/graph/detail/model_serialize_imp.h View File

@@ -67,6 +67,9 @@ class ModelSerializeImp {
bool HandleNodeNameRef();

bool UnserializeOpDesc(OpDescPtr &opDesc, proto::OpDef &opDefProto);
void AttrDefToOpDesc(OpDescPtr &op_desc, std::vector<string> &key_in, std::vector<string> &key_out,
std::vector<uint32_t> &value_in, std::vector<uint32_t> &value_out, std::vector<string> &opt);
void OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto);

bool UnserializeNode(ComputeGraphPtr &graph, proto::OpDef &opDefProto);



+ 2
- 4
inc/graph/op_desc.h View File

@@ -159,10 +159,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {

std::map<string, uint32_t> GetAllInputName() const;

void SetAllInputName(const std::map<string, uint32_t> &input_name_idx);

std::vector<string> GetAllOptionalInputName() const;

std::map<string, uint32_t> GetAllOutputName();

bool UpdateInputName(std::map<string, uint32_t> inputNameIdx);
@@ -300,6 +296,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {
std::map<std::string, SubgraphType> subgraph_ir_names_to_type_;

vector<GeTensorDescPtr> inputs_desc_{};
map<string, uint32_t> input_name_idx_{};
std::unordered_set<string> optional_input_names_{};
vector<GeTensorDescPtr> outputs_desc_{};
map<string, uint32_t> output_name_idx_{};
std::function<graphStatus(Operator &)> infer_func_ = nullptr;


+ 48
- 25
src/common/graph/compute_graph.cc View File

@@ -62,18 +62,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string ComputeGraph::GetName() co
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ComputeGraph::SetName(const string &name) { name_ = name; }

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY size_t ComputeGraph::GetAllNodesSize() const {
size_t s = nodes_.size();
for (const auto &sub_graph : sub_graph_) {
s += sub_graph->GetAllNodesSize();
}
return s;
return GetAllNodes().size();
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::Vistor<NodePtr> ComputeGraph::GetAllNodes() const {
if (sub_graph_.empty()) {
return Vistor<NodePtr>(shared_from_this(), nodes_);
}

std::vector<std::shared_ptr<ComputeGraph>> subgraphs;
return AllGraphNodes(subgraphs);
}
@@ -277,7 +269,7 @@ NodePtr ComputeGraph::AddNodeFront(NodePtr node) {

NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) {
if (op == nullptr) {
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null.");
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null.");
return nullptr;
}
op->SetId(nodes_.size());
@@ -287,9 +279,38 @@ NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) {
return AddNodeFront(node_ptr);
}

NodePtr ComputeGraph::AddNodeAfter(NodePtr node, const NodePtr &pre_node) {
if (node == nullptr || node->GetOpDesc() == nullptr || pre_node == nullptr) {
GELOGE(GRAPH_FAILED, "The node ptr or op desc should not be null.");
return nullptr;
}
node->GetOpDesc()->SetId(nodes_.size());
auto node_iter = std::find(nodes_.begin(), nodes_.end(), pre_node);
if (node_iter != nodes_.end()) {
nodes_.insert(node_iter + 1, node);
} else {
GELOGE(GRAPH_FAILED, "Cannot find pre_node in nodes_.");
return nullptr;
}

return node;
}

NodePtr ComputeGraph::AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node) {
if (op == nullptr) {
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null.");
return nullptr;
}
op->SetId(nodes_.size());
NodePtr node_ptr = shared_ptr<Node>(new (std::nothrow) Node(op, shared_from_this()));
GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GRAPH_FAILED, "node_ptr is NULL!!!"); return nullptr);
GE_IF_BOOL_EXEC(node_ptr->Init() != GRAPH_SUCCESS, GELOGE(GRAPH_FAILED, "node init failed."); return nullptr);
return AddNodeAfter(node_ptr, pre_node);
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(NodePtr node) {
if (node == nullptr || node->GetOpDesc() == nullptr) {
GELOGE(GRAPH_FAILED, "The node ptr should be not null.");
GELOGE(GRAPH_FAILED, "The node ptr should not be null.");
return nullptr;
}
node->GetOpDesc()->SetId((int64_t)GetDirectNodesSize());
@@ -299,7 +320,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(Nod

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpDescPtr op) {
if (op == nullptr) {
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null.");
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null.");
return nullptr;
}
op->SetId(GetDirectNodesSize());
@@ -311,7 +332,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpD

NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize.
if (op == nullptr) {
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null.");
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null.");
return nullptr;
}
op->SetId(id);
@@ -324,7 +345,7 @@ NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize.

NodePtr ComputeGraph::AddInputNode(NodePtr node) {
if (node == nullptr) {
GELOGE(GRAPH_FAILED, "The node ptr should be not null.");
GELOGE(GRAPH_FAILED, "The node ptr should not be null.");
return nullptr;
}
input_nodes_.push_back(node);
@@ -336,7 +357,7 @@ NodePtr ComputeGraph::AddInputNode(NodePtr node) {

NodePtr ComputeGraph::AddOutputNode(NodePtr node) {
if (node == nullptr || node->GetOpDesc() == nullptr) {
GELOGE(GRAPH_FAILED, "The node ptr or opdesc should be not null.");
GELOGE(GRAPH_FAILED, "The node ptr or opdesc should not be null.");
return nullptr;
}

@@ -372,7 +393,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) {
if (out_anchor->GetOwnerNode()->GetType() == CONSTANT || out_anchor->GetOwnerNode()->GetType() == CONSTANTOP) {
GE_CHK_BOOL_RET_STATUS(GraphUtils::RemoveEdge(out_anchor, in_anchor) == GRAPH_SUCCESS, GRAPH_FAILED,
"Remove edge from const op failed.");
if (out_anchor->GetOwnerNode()->GetOutDataNodes().size() == 0) {
if (out_anchor->GetOwnerNode()->GetOutNodes().size() == 0) {
GELOGI("Remove const op %s.", out_anchor->GetOwnerNode()->GetName().c_str());
auto iter = find(nodes_.begin(), nodes_.end(), out_anchor->GetOwnerNode());
if (iter != nodes_.end()) {
@@ -386,7 +407,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) {

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveNode(const NodePtr &node) {
if (node == nullptr) {
GELOGE(GRAPH_FAILED, "The node ptr should be not null.");
GELOGE(GRAPH_FAILED, "The node ptr should not be null.");
return GRAPH_FAILED;
}

@@ -415,7 +436,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveN
// Used in sub_graph scenes
graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) {
if (node == nullptr) {
GELOGE(GRAPH_FAILED, "The node ptr should be not null.");
GELOGE(GRAPH_FAILED, "The node ptr should not be null.");
return GRAPH_FAILED;
}

@@ -430,7 +451,7 @@ graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) {
// Used in sub_graph scenes
graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) {
if (node == nullptr) {
GELOGE(GRAPH_FAILED, "The node ptr should be not null.");
GELOGE(GRAPH_FAILED, "The node ptr should not be null.");
return GRAPH_FAILED;
}

@@ -451,7 +472,7 @@ graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) {

std::shared_ptr<ComputeGraph> ComputeGraph::AddSubGraph(std::shared_ptr<ComputeGraph> sub_graph) {
if (sub_graph == nullptr) {
GELOGE(GRAPH_FAILED, "The graph ptr should be not null.");
GELOGE(GRAPH_FAILED, "The graph ptr should not be null.");
return nullptr;
}
sub_graph_.push_back(sub_graph);
@@ -461,7 +482,7 @@ std::shared_ptr<ComputeGraph> ComputeGraph::AddSubGraph(std::shared_ptr<ComputeG

graphStatus ComputeGraph::RemoveSubGraph(const std::shared_ptr<ComputeGraph> &sub_graph) {
if (sub_graph == nullptr) {
GELOGE(GRAPH_FAILED, "The graph ptr should be not null.");
GELOGE(GRAPH_FAILED, "The graph ptr should not be null.");
return GRAPH_FAILED;
}

@@ -500,8 +521,7 @@ ComputeGraph::AddSubgraph(const std::string &name, const std::shared_ptr<Compute
return GRAPH_PARAM_INVALID;
}
if (!this->parent_graph_.expired()) {
GE_LOGE("The subgraphs can only be added to the root graph");
return GRAPH_PARAM_INVALID;
GELOGW("The subgraphs should only be added to the root graph");
}
if (name != subgraph->GetName()) {
GELOGW("The subgraph name %s is different with input %s", subgraph->GetName().c_str(), name.c_str());
@@ -653,7 +673,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::InsertE
GELOGW("node or OpDescPtr is nullptr.");
continue;
}
GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should be not null."); return GRAPH_FAILED);
GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should not be null."); return GRAPH_FAILED);
if (node->GetOpDesc()->GetType() == RECV) {
auto iter = find(node_vec.begin(), node_vec.end(), node);
if (iter == node_vec.end()) {
@@ -799,7 +819,8 @@ graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map<No
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::TopologicalSorting() {
auto ret = TopologicalSortingGraph();
if (ret != SUCCESS) {
GELOGE(ret, "Sub graph partition Failed");
GraphUtils::DumpGEGraphToOnnx(*this, "black_box");
GELOGE(ret, "Graph [%s] topological sort failed, saved to file black_box", name_.c_str());
return ret;
}

@@ -1117,9 +1138,11 @@ graphStatus ComputeGraph::RemoveExtraOutEdge(const NodePtr &node) {
}

graphStatus ComputeGraph::Verify() {
bool is_unknown_graph = GetGraphUnknownFlag();
for (const auto &node_ptr : GetAllNodes()) {
GE_CHECK_NOTNULL(node_ptr);
GE_CHECK_NOTNULL(node_ptr->GetOpDesc());
GE_IF_BOOL_EXEC(is_unknown_graph, continue);
GE_CHK_BOOL_EXEC(node_ptr->GetOpDesc()->CommonVerify() == GRAPH_SUCCESS, return GRAPH_FAILED,
"Verifying %s failed.", node_ptr->GetName().c_str());
}


+ 29
- 0
src/common/graph/ge_attr_define.cc View File

@@ -158,6 +158,10 @@ const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size";
const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims";
const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size";

// Identify node connecting to input and output
const std::string ATTR_NAME_NODE_CONNECT_INPUT = "_is_connected_to_data";
const std::string ATTR_NAME_NODE_CONNECT_OUTPUT = "_is_connected_to_netoutput";

// To be deleted
const std::string ATTR_TO_BE_DELETED = "to_be_deleted";
const std::string PERMUTE_RESHAPE_FUSION = "permute_reshape_fusion";
@@ -905,6 +909,7 @@ const std::string ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE = "is_end_of_inputmem_l
const std::string ATTR_NAME_PRED_VALUE = "_pred_value";
const std::string ATTR_NAME_BATCH_NUM = "_batch_num";
const std::string ATTR_NAME_BATCH_LABEL = "_batch_label";
const std::string ATTR_NAME_COMBINED_BATCH = "_combined_batch";

// Control flow
const std::string ATTR_NAME_STREAM_SWITCH_COND = "switch_condition";
@@ -914,6 +919,7 @@ const std::string ATTR_NAME_SWITCHN_PRED_VALUE = "switch_pred_value";
const std::string ATTR_NAME_ITERATORS_PER_LOOP = "iterations_per_loop";
const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG = "is_flow_ctrl_node";
const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE = "subgraph_first_active";
const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS = "combined_dynamic_dims";

const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL = "_switch_branch_node_label";
const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG = "_switch_true_branch_flag";
@@ -983,6 +989,8 @@ const std::string ATTR_INSERT_BY_MBATCH = "mbatch-inserted-node";

const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS = "_mbatch_origin_input_dims";

const std::string ATTR_DYNAMIC_TYPE = "mbatch_dynamic_type";

// For inserted op
const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge";

@@ -1021,6 +1029,10 @@ const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_
const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list";
const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list";

// for unregistered op
const std::string ATTR_NAME_UNREGST_OPPATH = "_unregst_oppath";
const std::string ATTR_NAME_UNREGST_ATTRLIST = "_unregst_attrlist";

// used for Horovod
const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id";
const std::string ATTR_HOROVOD_ATTR_REDUCE_TYPE = "reduce_op";
@@ -1032,6 +1044,23 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node";
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr";
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index";

// atc user def dtype&format
const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type";
const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format";

// for fusion op plugin
const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE = "_fusionop_original_type";

// graph partition for aicpu
const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME = "pld_front_node_engine_name";
const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME = "end_rear_node_engine_name";

// input and output memory type
const std::string ATTR_VARIABLE_PLACEMENT = "_variable_placement";
const std::string ATTR_INPUT_MEMORY_TYPE = "_input_memory_type";
const std::string ATTR_OUTPUT_MEMORY_TYPE = "_output_memory_type";

// input_output_offset
const std::string ATTR_ZERO_COPY_BASIC_OFFSET = "_zero_copy_basic_offset";
const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET = "_zero_copy_relative_offset";
} // namespace ge

+ 9
- 17
src/common/graph/ge_attr_value.cc View File

@@ -1216,27 +1216,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc(
GE_CHK_BOOL_EXEC(imp.UnserializeOpDesc(op_desc, *op_def), return op_desc, "op_desc unserialize failed");
op_desc->extAttrs_ = org_op_desc->extAttrs_;

if (op_desc->HasAttr("_input_name_idx_key")) {
if (op_desc->DelAttr("_input_name_idx_key") != SUCCESS) {
GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_key failed.");
}
// This function may be called by some passes of fusion engine, in this condition, do not need these attribute
if (!op_desc->input_name_idx_.empty()) {
op_desc->input_name_idx_.clear();
}

if (op_desc->HasAttr("_input_name_idx_value")) {
if (op_desc->DelAttr("_input_name_idx_value") != SUCCESS) {
GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_value failed.");
}
}

if (op_desc->HasAttr("_opt_input")) {
if (op_desc->DelAttr("_opt_input") != SUCCESS) {
GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed.");
}
}

if (!op_desc->output_name_idx_.empty()) {
op_desc->output_name_idx_.clear();
}
if (!op_desc->optional_input_names_.empty()) {
op_desc->optional_input_names_.clear();
}

return op_desc;
}
@@ -1260,6 +1249,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CopyOpDesc(c

op_desc->extAttrs_ = org_op_desc->extAttrs_;

op_desc->input_name_idx_.insert(org_op_desc->input_name_idx_.begin(), org_op_desc->input_name_idx_.end());
op_desc->optional_input_names_.insert(org_op_desc->optional_input_names_.begin(),
org_op_desc->optional_input_names_.end());
op_desc->output_name_idx_.insert(org_op_desc->output_name_idx_.begin(), org_op_desc->output_name_idx_.end());

op_desc->infer_func_ = org_op_desc->infer_func_;


+ 2
- 0
src/common/graph/graph.mk View File

@@ -124,6 +124,7 @@ LOCAL_SRC_FILES := \
../../out/graph/lib64/stub/operator.cc \
../../out/graph/lib64/stub/operator_factory.cc \
../../out/graph/lib64/stub/tensor.cc \
../../out/graph/lib64/stub/inference_context.cc \


LOCAL_SHARED_LIBRARIES :=
@@ -201,6 +202,7 @@ LOCAL_SRC_FILES := \
../../out/graph/lib64/stub/operator.cc \
../../out/graph/lib64/stub/operator_factory.cc \
../../out/graph/lib64/stub/tensor.cc \
../../out/graph/lib64/stub/inference_context.cc \


LOCAL_SHARED_LIBRARIES :=


+ 95
- 24
src/common/graph/model_serialize.cc View File

@@ -128,21 +128,42 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op
for (const std::string &name : op_desc->GetSubgraphInstanceNames()) {
op_def_proto->add_subgraph_name(name);
}
if (!op_desc->output_name_idx_.empty()) {
proto::AttrDef key;
proto::AttrDef value;
for (auto &item : op_desc->output_name_idx_) {
key.mutable_list()->add_s(item.first);
value.mutable_list()->add_i(item.second);
}
auto op_desc_attr = op_def_proto->mutable_attr();
op_desc_attr->insert({"_output_name_key", key});
op_desc_attr->insert({"_output_name_value", value});
}
OpDescToAttrDef(op_desc, op_def_proto);
}
return true;
}

void ModelSerializeImp::OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto) {
proto::AttrDef key_in;
proto::AttrDef value_in;
auto op_desc_attr = op_def_proto->mutable_attr();
if (!op_desc->input_name_idx_.empty()) {
for (auto &item : op_desc->input_name_idx_) {
key_in.mutable_list()->add_s(item.first);
value_in.mutable_list()->add_i(item.second);
}
op_desc_attr->insert({"_input_name_key", key_in});
op_desc_attr->insert({"_input_name_value", value_in});
}
proto::AttrDef key_out;
proto::AttrDef value_out;
if (!op_desc->output_name_idx_.empty()) {
for (auto &item : op_desc->output_name_idx_) {
key_out.mutable_list()->add_s(item.first);
value_out.mutable_list()->add_i(item.second);
}
op_desc_attr->insert({"_output_name_key", key_out});
op_desc_attr->insert({"_output_name_value", value_out});
}
proto::AttrDef opt_input;
if (!op_desc->optional_input_names_.empty()) {
for (auto &item : op_desc->optional_input_names_) {
opt_input.mutable_list()->add_s(item);
}
op_desc_attr->insert({"_opt_input", opt_input});
}
}

bool ModelSerializeImp::SerializeNode(const NodePtr &node, proto::OpDef *op_def_proto, bool is_dump) {
if (node == nullptr || op_def_proto == nullptr) {
GELOGE(GRAPH_FAILED, "Input Para Node Invalid");
@@ -236,13 +257,70 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali
}
}

void ModelSerializeImp::AttrDefToOpDesc(OpDescPtr &op_desc, std::vector<string> &key_in, std::vector<string> &key_out,
std::vector<uint32_t> &value_in, std::vector<uint32_t> &value_out,
std::vector<string> &opt_input) {
if (!key_in.empty()) {
if (key_in.size() != value_in.size()) {
GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(),
value_in.size());
} else {
for (uint32_t i = 0; i < key_in.size(); ++i) {
op_desc->input_name_idx_.insert(std::pair<string, uint32_t>(key_in.at(i), value_in.at(i)));
}
}
}
if (!key_out.empty()) {
if (key_out.size() != value_out.size()) {
GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(),
value_out.size());
} else {
for (uint32_t i = 0; i < key_out.size(); ++i) {
op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key_out.at(i), value_out.at(i)));
}
}
}
if (!opt_input.empty()) {
for (const auto &i : opt_input) {
op_desc->optional_input_names_.insert(i);
}
}
}

bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) {
std::vector<string> key;
std::vector<uint32_t> value;
std::vector<string> opt_input;
std::vector<string> key_in;
std::vector<uint32_t> value_in;
if (op_def_proto.attr().count("_opt_input") > 0) {
auto &name_list = op_def_proto.attr().at("_opt_input").list();
for (const auto &item_s : name_list.s()) {
opt_input.push_back(item_s);
}
auto op_desc_attr = op_def_proto.mutable_attr();
op_desc_attr->erase("_opt_input");
}
if (op_def_proto.attr().count("_input_name_key") > 0) {
auto &output_name_key_list = op_def_proto.attr().at("_input_name_key").list();
for (const auto &item_s : output_name_key_list.s()) {
key_in.push_back(item_s);
}
auto op_desc_attr = op_def_proto.mutable_attr();
op_desc_attr->erase("_input_name_key");
}
if (op_def_proto.attr().count("_input_name_value") > 0) {
auto &input_name_value_list = op_def_proto.attr().at("_input_name_value").list();
for (const auto &item_i : input_name_value_list.i()) {
value_in.push_back(static_cast<uint32_t>(item_i));
}
auto op_desc_attr = op_def_proto.mutable_attr();
op_desc_attr->erase("_input_name_value");
}
std::vector<string> key_out;
std::vector<uint32_t> value_out;
if (op_def_proto.attr().count("_output_name_key") > 0) {
auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list();
for (const auto &item_s : output_name_key_list.s()) {
key.push_back(item_s);
key_out.push_back(item_s);
}
auto op_desc_attr = op_def_proto.mutable_attr();
op_desc_attr->erase("_output_name_key");
@@ -250,7 +328,7 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d
if (op_def_proto.attr().count("_output_name_value") > 0) {
auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list();
for (const auto &item_i : output_name_value_list.i()) {
value.push_back(static_cast<uint32_t>(item_i));
value_out.push_back(static_cast<uint32_t>(item_i));
}
auto op_desc_attr = op_def_proto.mutable_attr();
op_desc_attr->erase("_output_name_value");
@@ -281,15 +359,8 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d
op_desc->SetSubgraphInstanceName(graph_index++, name);
}

if (key.size() != 0) {
if (key.size() != value.size()) {
GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size());
} else {
for (uint32_t i = 0; i < key.size(); ++i) {
op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key.at(i), value.at(i)));
}
}
}
// insert name index by key and value
AttrDefToOpDesc(op_desc, key_in, key_out, value_in, value_out, opt_input);

return true;
}


+ 16
- 18
src/common/graph/node.cc View File

@@ -449,9 +449,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InDataAnchorPtr Node::GetInDataAn
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AnchorPtr Node::GetInAnchor(int idx) const {
// Idx can't be less than -1 or >= in_data_anchors_.size(), -1 means index of control anchor_
if (idx < -1 || idx >= static_cast<int>(in_data_anchors_.size())) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E19019", {"opname", "index", "anchorname", "optype"},
{GetName().c_str(), std::to_string(idx), "in_anchor", GetType().c_str()});
GELOGW("Op[%s] doesn't have index[%d]'s in_anchor which optype is %s.", GetName().c_str(), idx, GetType().c_str());
return nullptr;
} else {
@@ -743,26 +740,27 @@ graphStatus Node::Verify() const {
const string aipp_data_type = "AippData";
const string const_type = "Const";
const string variable_type = "Variable";
bool is_unknown_graph = GetOwnerComputeGraph()->GetGraphUnknownFlag();
GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr");

for (const auto &in_anchor_ptr : GetAllInDataAnchors()) {
if (in_anchor_ptr == nullptr) {
GELOGW("in anchor ptr is null");
continue;
}
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type ||
op_->GetType() == const_type || op_->GetType() == variable_type ||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0;
if (!valid_anchor) {
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"name", "index"},
{GetName(), std::to_string(in_anchor_ptr->GetIdx())});
GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx());
return GRAPH_FAILED;
if (!is_unknown_graph) {
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) {
GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue);
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type ||
op_->GetType() == const_type || op_->GetType() == variable_type ||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0;
if (!valid_anchor) {
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"},
{GetName(), std::to_string(in_anchor_ptr->GetIdx())});
GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx());
return GRAPH_FAILED;
}
}
}

string frameworkop_type = "FrameworkOp";
if (op_->GetType() != frameworkop_type) {
bool need_update_name = op_->GetType() != frameworkop_type && !is_unknown_graph;
if (need_update_name) {
auto node_op = ge::OperatorFactoryImpl::CreateOperator("node_op", op_->GetType());
if (node_op.IsEmpty()) {
GELOGW("get op from OperatorFactory fail. opType: %s", op_->GetType().c_str());
@@ -782,7 +780,7 @@ graphStatus Node::Verify() const {
}
node_op.BreakConnect();
}
GE_IF_BOOL_EXEC(is_unknown_graph, return GRAPH_SUCCESS;);
if (op_->CommonVerify() == GRAPH_SUCCESS) {
Operator op_proxy = ge::OpDescUtils::CreateOperatorFromNode(shared_from_this());
auto verify_func = op_->GetVerifyFunc();


+ 39
- 102
src/common/graph/op_desc.cc View File

@@ -64,12 +64,6 @@ const std::string ATTR_NAME_IS_INPUT_CONST = "is_input_const";

const std::string ATTR_NAME_OP_INFER_DEPENDS = "_op_infer_depends";

const std::string ATTR_NAME_OPT_INPUT = "_opt_input";

const std::string ATTR_NAME_INPUT_NAME_IDX_KEY = "_input_name_idx_key";

const std::string ATTR_NAME_INPUT_NAME_IDX_VALUE = "_input_name_idx_value";

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDesc::OpDesc() {
op_def_.InitDefault();
if (op_def_.GetProtoMsg() != nullptr) {
@@ -211,8 +205,7 @@ graphStatus OpDesc::AddInputDesc(uint32_t index, const ge::GeTensorDesc &input_d
}

graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &input_desc) {
auto input_name_idx = GetAllInputName();
if (input_name_idx.find(name) != input_name_idx.end()) {
if (input_name_idx_.find(name) != input_name_idx_.end()) {
GELOGI("input %s is exist, update it", name.c_str());
graphStatus ret = UpdateInputDesc(name, input_desc);
return ret;
@@ -224,17 +217,15 @@ graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &inp
return GRAPH_FAILED;
}
inputs_desc_.push_back(in_desc);
(void)input_name_idx.insert(make_pair(name, index));
SetAllInputName(input_name_idx);
(void)input_name_idx_.insert(make_pair(name, index));
return GRAPH_SUCCESS;
}
}

graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int num, size_t index) {
auto input_name_idx = GetAllInputName();
for (unsigned int i = 0; i < num; i++) {
string input_name = name + std::to_string(i);
GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED,
GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED,
"Add input tensor_desc is existed. name[%s]", input_name.c_str());

std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc());
@@ -251,24 +242,22 @@ graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int nu
(void)inputs_desc_.insert(inputs_desc_.begin() + index + i, in_desc);

// Update index in input_name_idx
for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) {
for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) {
if (it->second >= (index + i)) {
it->second += 1;
}
}

(void)input_name_idx.insert(make_pair(input_name, i + index));
(void)input_name_idx_.insert(make_pair(input_name, i + index));
}
SetAllInputName(input_name_idx);

return GRAPH_SUCCESS;
}

graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int num) {
auto input_name_idx = GetAllInputName();
for (unsigned int i = 0; i < num; i++) {
string input_name = name + std::to_string(i);
GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED,
GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED,
"Add input tensor_desc is existed. name[%s]", input_name.c_str());

std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc());
@@ -279,13 +268,12 @@ graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int n
(void)inputs_desc_.insert(inputs_desc_.begin(), in_desc);

// Update index in input_name_idx
for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) {
for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) {
it->second += 1;
}

(void)input_name_idx.insert(make_pair(input_name, 0));
(void)input_name_idx_.insert(make_pair(input_name, 0));
}
SetAllInputName(input_name_idx);

return GRAPH_SUCCESS;
}
@@ -316,19 +304,10 @@ graphStatus OpDesc::AddOutputDescForward(const string &name, const unsigned int

graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorDesc &input_desc) {
if (OpDesc::AddInputDesc(name, input_desc) == GRAPH_FAILED) return GRAPH_FAILED;
vector<string> optional_input_names;
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names);
optional_input_names.push_back(name);
(void)AttrUtils::SetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names);
(void)optional_input_names_.insert(name);
return GRAPH_SUCCESS;
}

std::vector<string> OpDesc::GetAllOptionalInputName() const {
vector<string> optional_input_names;
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names);
return optional_input_names;
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) {
GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index);
@@ -343,12 +322,11 @@ OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) {
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescMembersAreEqual(const OpDesc &r_op_desc) const {
return (
IsEqual(this->GetAllInputName(), r_op_desc.GetAllInputName(), "OpDesc.GetAllInputName()") &&
IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") &&
IsEqual(this->GetAllOptionalInputName(), r_op_desc.GetAllOptionalInputName(), "OpDesc.GetAllOptionalInputName()") &&
IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") &&
IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_"));
return (IsEqual(this->input_name_idx_, r_op_desc.input_name_idx_, "OpDesc.input_name_idx_") &&
IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") &&
IsEqual(this->optional_input_names_, r_op_desc.optional_input_names_, "OpDesc.optional_input_names_") &&
IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") &&
IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_"));
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescAttrsAreEqual(const OpDesc &r_op_desc) const {
@@ -422,9 +400,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::operator==(const OpD
}

graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc &tensor_Desc) {
auto input_name_idx = GetAllInputName();
auto it = input_name_idx.find(name);
if (it == input_name_idx.end()) {
auto it = input_name_idx_.find(name);
if (it == input_name_idx_.end()) {
GELOGW("Cann't find the input desc. name[%s]", name.c_str());
return GRAPH_FAILED;
}
@@ -444,9 +421,8 @@ graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc &
}

bool OpDesc::InputIsSet(const string &name) const {
auto input_name_idx = GetAllInputName();
auto it = input_name_idx.find(name);
if (it != input_name_idx.end()) {
auto it = input_name_idx_.find(name);
if (it != input_name_idx_.end()) {
GE_IF_BOOL_EXEC(it->second >= inputs_desc_.size(), GELOGE(GRAPH_FAILED, "it->second is invalid."); return false);
auto tensor_desc = inputs_desc_[it->second];
GE_IF_BOOL_EXEC(tensor_desc == nullptr, GELOGE(GRAPH_FAILED, "tensor_desc is null."); return false);
@@ -464,9 +440,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc OpDesc::GetInputDesc
}

GeTensorDesc OpDesc::GetInputDesc(const string &name) const {
auto input_name_idx = GetAllInputName();
auto it = input_name_idx.find(name);
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), GeTensorDesc());
auto it = input_name_idx_.find(name);
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), GeTensorDesc());
GE_CHK_BOOL_RET_STATUS_NOLOG(it->second < inputs_desc_.size(), GeTensorDesc());
return *(inputs_desc_[it->second].get());
}
@@ -476,7 +451,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDescPtr OpDesc::MutableIn
if (inputs_desc_[index] == nullptr) {
return nullptr;
}
GE_CHK_BOOL_RET_STATUS(inputs_desc_[index]->IsValid() == GRAPH_SUCCESS, nullptr, "input desc is invalid");
if (inputs_desc_[index]->IsValid() != GRAPH_SUCCESS) {
GELOGW("input desc is invalid");
return nullptr;
}
return inputs_desc_[index];
}

@@ -491,12 +469,11 @@ GeTensorDescPtr OpDesc::MutableInputDesc(const string &name) const {
}

GE_FUNC_HOST_VISIBILITY OpDesc::Vistor<string> OpDesc::GetAllInputNames() const {
auto input_name_idx = GetAllInputName();
vector<string> names;
if (input_name_idx.empty()) {
if (input_name_idx_.empty()) {
return OpDesc::Vistor<string>(shared_from_this(), names);
}
for (std::pair<string, uint32_t> input : input_name_idx) {
for (std::pair<string, uint32_t> input : input_name_idx_) {
names.push_back(input.first);
}
return OpDesc::Vistor<string>(shared_from_this(), names);
@@ -672,9 +649,8 @@ OpDesc::GetInputDescPtrDfault(uint32_t index) const {
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ConstGeTensorDescPtr OpDesc::GetInputDescPtr(const string &name) const {
auto input_name_idx = GetAllInputName();
auto it = input_name_idx.find(name);
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), shared_ptr<const GeTensorDesc>());
auto it = input_name_idx_.find(name);
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), shared_ptr<const GeTensorDesc>());
return inputs_desc_[it->second];
}

@@ -708,45 +684,12 @@ graphStatus OpDesc::AddDynamicOutputDesc(const string &name, const unsigned int
}

bool OpDesc::IsOptionalInput(const string &name) const {
vector<string> optional_input_names;
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names);
for (auto &item : optional_input_names) {
if (item == name) {
return true;
}
}
return false;
return optional_input_names_.find(name) != optional_input_names_.end();
}

bool OpDesc::IsOptionalInput(uint32_t index) const { return IsOptionalInput(GetInputNameByIndex(index)); }

std::map<string, uint32_t> OpDesc::GetAllInputName() const {
std::map<string, uint32_t> input_name_idx;
std::vector<string> key;
std::vector<uint32_t> value;
(void)AttrUtils::GetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key);
(void)AttrUtils::GetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value);

if (key.size() != value.size()) {
GE_LOGE("twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size());
} else {
for (uint32_t i = 0; i < key.size(); ++i) {
input_name_idx.insert(std::pair<string, uint32_t>(key.at(i), value.at(i)));
}
}
return input_name_idx;
}

void OpDesc::SetAllInputName(const std::map<string, uint32_t> &input_name_idx) {
std::vector<string> key;
std::vector<uint32_t> value;
for (auto &item : input_name_idx) {
key.emplace_back(item.first);
value.emplace_back(item.second);
}
(void)AttrUtils::SetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key);
(void)AttrUtils::SetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value);
}
std::map<string, uint32_t> OpDesc::GetAllInputName() const { return input_name_idx_; }

std::map<string, uint32_t> OpDesc::GetAllOutputName() { return output_name_idx_; }

@@ -757,7 +700,6 @@ bool OpDesc::UpdateInputName(std::map<string, uint32_t> input_name_idx) {
auto factory_map_size = input_name_idx.size();
// It indicates that some inputs have no optionalname.
// The redundant optionalname of factory needs to be deleted and then assigned
auto all_input_name_idx = GetAllInputName();
if (input_map_size < factory_map_size) {
GELOGI("UpdateInputName org inputname map size: %zu, factory inputname map size: %zu", input_map_size,
factory_map_size);
@@ -770,18 +712,17 @@ bool OpDesc::UpdateInputName(std::map<string, uint32_t> input_name_idx) {
}
if (input_name_idx.size() == input_map_size) {
GELOGI("UpdateInputName");
all_input_name_idx = input_name_idx;
input_name_idx_ = input_name_idx;
} else {
ret = false;
GELOGW("after UpdateInputName factoryName map size : %zu", input_name_idx.size());
}
} else if (input_map_size == factory_map_size) {
all_input_name_idx = input_name_idx;
input_name_idx_ = input_name_idx;
} else {
ret = false;
GELOGW("org inputname map size: %zu, factory inputname map size: %zu", input_map_size, factory_map_size);
}
SetAllInputName(all_input_name_idx);
return ret;
}

@@ -924,21 +865,19 @@ graphStatus OpDesc::CommonVerify() const {
}

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetInputNameByIndex(uint32_t index) const {
auto input_name_idx = GetAllInputName();
auto it = input_name_idx.begin();
for (; it != input_name_idx.end(); ++it) {
auto it = input_name_idx_.begin();
for (; it != input_name_idx_.end(); ++it) {
if (it->second == index) {
break;
}
}
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), "");
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), "");
return it->first;
}

int OpDesc::GetInputIndexByName(const string &name) const {
auto input_name_idx = GetAllInputName();
auto it_find = input_name_idx.find(name);
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx.end(), -1);
auto it_find = input_name_idx_.find(name);
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx_.end(), -1);
return static_cast<int>(it_find->second);
}

@@ -1231,12 +1170,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector<bool> OpDesc::GetIsInputCo

GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::RestoreInputNameIdx(const string &name,
const int &index) {
auto input_name_idx = GetAllInputName();
if (input_name_idx.find(name) != input_name_idx.end()) {
if (input_name_idx_.find(name) != input_name_idx_.end()) {
GELOGI("Restore input name index is existed. name[%s]", name.c_str());
}
(void)input_name_idx.insert(make_pair(name, index));
SetAllInputName(input_name_idx);
(void)input_name_idx_.insert(make_pair(name, index));
return GRAPH_SUCCESS;
}



+ 35
- 0
src/common/graph/ref_relation.cc View File

@@ -170,6 +170,7 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile(
// data_nodes has been sorted
// for while, input num must be same as output num
auto input_num = root_node->GetAllInDataAnchorsSize();
NodePtr netoutput = nullptr;

size_t ref_i = 0;
while (ref_i < input_num) {
@@ -212,10 +213,44 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile(
cell_netoutput_in.in_out = NODE_IN;
cell_netoutput_in.in_out_idx = ele.second;
ref_i_all_refs.emplace_back(cell_netoutput_in);
netoutput = ele.first;
}
node_refs.emplace_back(ref_i_all_refs);
ref_i++;
}
/* There exist scene like the follows, it means data0 data1 netoutput 0'th
* and 1'th tensor should be the same addr.
* Data0 Data1
* \/
* /\
* netoutput
*/
if (netoutput == nullptr) {
return GRAPH_SUCCESS;
}
for (const auto &in_anchor : netoutput->GetAllInDataAnchors()) {
auto peer_out_data_anchor = in_anchor->GetPeerOutAnchor();
if (peer_out_data_anchor == nullptr) {
continue;
}
auto peer_out_data_node = peer_out_data_anchor->GetOwnerNode();
if (peer_out_data_node == nullptr || peer_out_data_node->GetOpDesc() == nullptr) {
GELOGW("Node[%s]\'s peer_out_data_node or peer_out_data_node desc is null", (netoutput->GetName()).c_str());
continue;
}
if (peer_out_data_node->GetType() != DATA) {
continue;
}
auto in_data_anchor_idx = in_anchor->GetIdx();
auto net_in_desc = netoutput->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx));
int ref_d;
int ref_n;
(void)AttrUtils::GetInt(peer_out_data_node->GetOpDesc(), kRefIndex, ref_d);
(void)AttrUtils::GetInt(net_in_desc, kRefIndex, ref_n);

node_refs[ref_d].insert(node_refs[ref_d].end(), node_refs[ref_n].begin(), node_refs[ref_n].end());
node_refs[ref_n].insert(node_refs[ref_n].end(), node_refs[ref_d].begin(), node_refs[ref_d].end());
}

return GRAPH_SUCCESS;
}


+ 50
- 26
src/common/graph/shape_refiner.cc View File

@@ -49,10 +49,6 @@ graphStatus ReverseBrushWhileBodySubGraph(const ConstNodePtr &node) {
}

for (const auto &node_sub : sub_graph_body->GetAllNodes()) {
if (node_sub->GetInDataNodes().size() == 0) {
continue;
}

for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) {
auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i);
(void)input_desc->SetUnknownDimNumShape();
@@ -303,11 +299,11 @@ graphStatus UpdateParentNodeOutTensor(const ConstNodePtr &node) {
}
} // namespace
void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) {
if (node == nullptr) {
GELOGE(GRAPH_FAILED, "node is null");
if (!IsLogEnable(GE, DLOG_DEBUG)) {
return;
}
if (!IsLogEnable(GE, DLOG_DEBUG)) {
if (node == nullptr) {
GELOGE(GRAPH_FAILED, "node is null");
return;
}
ge::OpDescPtr op_desc = node->GetOpDesc();
@@ -325,6 +321,18 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str
TypeUtils::FormatToSerialString(input_desc->GetFormat()) + " ";
}
str += input_desc_str;

input_desc_str = "input origin shape: ";
for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) {
input_desc_str += "[";
for (int64_t dim : input_desc->GetOriginShape().GetDims()) {
input_desc_str += std::to_string(dim) + " ";
}
input_desc_str += "]";
input_desc_str += ":" + TypeUtils::DataTypeToSerialString(input_desc->GetOriginDataType()) + ":" +
TypeUtils::FormatToSerialString(input_desc->GetOriginFormat()) + " ";
}
str += input_desc_str;
}

if (op_desc->GetAllOutputsDescSize() != 0) {
@@ -342,6 +350,21 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str
TypeUtils::FormatToSerialString(output_desc->GetFormat()) + " ";
}
str += output_desc_str;

output_desc_str = "output origin shape: ";
for (const auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
if (output_desc == nullptr) {
continue;
}
output_desc_str += "[";
for (int64_t dim : output_desc->GetOriginShape().GetDims()) {
output_desc_str += std::to_string(dim) + " ";
}
output_desc_str += "]";
output_desc_str += ":" + TypeUtils::DataTypeToSerialString(output_desc->GetOriginDataType()) + ":" +
TypeUtils::FormatToSerialString(output_desc->GetOriginFormat()) + " ";
}
str += output_desc_str;
}
GELOGD("Shape dump [%s], Node name: [%s]. %s", phase.c_str(), node->GetName().c_str(), str.c_str());
}
@@ -362,7 +385,6 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator &
return ret;
}
}

// Get infer func and execute
ret = op_desc->CallInferFunc(op);
if (ret == GRAPH_PARAM_INVALID) {
@@ -479,19 +501,20 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh
GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str());
return GRAPH_FAILED;
}
PrintInOutTensorShape(node, "before_infershape");
Operator op = OpDescUtils::CreateOperatorFromNode(node);

auto inference_context = CreateInferenceContext(context_map, node);
if (inference_context == nullptr) {
GELOGE(GRAPH_FAILED, "inference context is null");
return GRAPH_FAILED;
bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag();
if (!is_unknown_graph) {
auto inference_context = CreateInferenceContext(context_map, node);
if (inference_context == nullptr) {
GELOGE(GRAPH_FAILED, "inference context is null");
return GRAPH_FAILED;
}
GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size());
op.SetInferenceContext(inference_context);
}

GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size());

PrintInOutTensorShape(node, "before_infershape");

Operator op = OpDescUtils::CreateOperatorFromNode(node);
op.SetInferenceContext(inference_context);
graphStatus status = InferShapeAndType(node, op, before_subgraph);
if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) {
(void)ge::NodeUtils::UpdatePeerNodeInputDesc(node);
@@ -499,16 +522,17 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh
GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str());
return GRAPH_FAILED;
}

auto ctx_after_infer = op.GetInferenceContext();
if (ctx_after_infer != nullptr) {
GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size());
if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) {
GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size());
(void)context_map.emplace(node, ctx_after_infer);
if (!is_unknown_graph) {
auto ctx_after_infer = op.GetInferenceContext();
if (ctx_after_infer != nullptr) {
GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size());
if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) {
GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(),
ctx_after_infer->GetMarks().size());
(void)context_map.emplace(node, ctx_after_infer);
}
}
}

PrintInOutTensorShape(node, "after_infershape");

return GRAPH_SUCCESS;


+ 5
- 5
src/common/graph/utils/ge_ir_utils.h View File

@@ -1,18 +1,18 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
*/

#ifndef COMMON_GRAPH_UTILS_GE_IR_UTILS_H_
#define COMMON_GRAPH_UTILS_GE_IR_UTILS_H_


+ 27
- 6
src/common/graph/utils/node_utils.cc View File

@@ -295,14 +295,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
if (op_desc == nullptr) {
return GRAPH_FAILED;
}
bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag();
if (is_unknown_graph) {
return GRAPH_SUCCESS;
}
for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) {
auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx());
ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size()));
bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag();
if (!is_unknown_graph) {
output_tensor->SetOriginShape(output_tensor->GetShape());
output_tensor->SetOriginDataType(output_tensor->GetDataType());
}
output_tensor->SetOriginShape(output_tensor->GetShape());
output_tensor->SetOriginDataType(output_tensor->GetDataType());

GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s",
node_ptr->GetName().c_str(), output_tensor->GetOriginShape().GetShapeSize(),
TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(),
@@ -321,8 +323,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d",
peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(),
output_tensor->GetDataType(), output_tensor->GetOriginDataType());
peer_input_desc->SetShape(output_tensor->GetShape());
peer_input_desc->SetOriginShape(output_tensor->GetOriginShape());
peer_input_desc->SetShape(output_tensor->GetShape());
peer_input_desc->SetDataType(output_tensor->GetDataType());
peer_input_desc->SetOriginDataType(output_tensor->GetOriginDataType());
std::vector<std::pair<int64_t, int64_t>> shape_range;
@@ -337,6 +339,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer
}
return GRAPH_SUCCESS;
}

bool NodeUtils::IsInNodesEmpty(const Node &node) {
for (const auto &in_anchor : node.in_data_anchors_) {
if (in_anchor != nullptr) {
@@ -446,6 +449,7 @@ std::string NodeUtils::GetNodeType(const Node &node) {
(void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type);
return type;
}

ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) {
auto op_desc = node.GetOpDesc();
if (op_desc == nullptr) {
@@ -498,6 +502,14 @@ bool NodeUtils::IsSubgraphInput(const NodePtr &node) {
return false;
}
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) {
bool is_unknown_shape = false;
(void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape);
if (is_unknown_shape) return false;
}

if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) &&
kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 &&
kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) {
return false;
}

@@ -519,7 +531,16 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) {
if (parent_op_desc == nullptr) {
return false;
}

if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) {
bool is_unknown_shape = false;
(void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape);
if (is_unknown_shape) return false;
}

if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) &&
kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 &&
kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) {
return false;
}



+ 23
- 2
src/ge/CMakeLists.txt View File

@@ -95,7 +95,18 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/task_info.cc"
"graph/manager/*.cc"
"graph/manager/graph_caching_allocator.cc"
"graph/manager/graph_context.cc"
"graph/manager/graph_manager.cc"
"graph/manager/graph_manager_utils.cc"
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_var_manager.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/trans_var_data_utils.cc"
"graph/manager/util/debug.cc"
"graph/manager/util/hcom_util.cc"
"graph/manager/util/rt_context_util.cc"
"graph/manager/util/variable_accelerate_ctrl.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/util/debug.cc"
"graph/manager/util/hcom_util.cc"
@@ -240,7 +251,17 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/task_info.cc"
"graph/manager/*.cc"
"graph/manager/graph_caching_allocator.cc"
"graph/manager/graph_context.cc"
"graph/manager/graph_manager.cc"
"graph/manager/graph_manager_utils.cc"
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_var_manager.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/trans_var_data_utils.cc"
"graph/manager/util/debug.cc"
"graph/manager/util/rt_context_util.cc"
"graph/manager/util/variable_accelerate_ctrl.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/util/debug.cc"
"graph/manager/util/rt_context_util.cc"


+ 1
- 0
src/ge/common/CMakeLists.txt View File

@@ -54,6 +54,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"helper/om_file_helper.cc"
"math/fp16_math.cc"
"model_parser/base.cc"
# "model_parser/graph_parser_util.cc"
"model_saver.cc"
"op/attr_value_util.cc"
"op/ge_op_utils.cc"


+ 0
- 1
src/ge/common/formats/utils/formats_trans_utils.h View File

@@ -21,7 +21,6 @@
#include <sstream>
#include <string>
#include <vector>

#include "external/graph/types.h"
#include "graph/ge_tensor.h"



+ 5
- 2
src/ge/common/ge/tbe_plugin_manager.cc View File

@@ -182,7 +182,7 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
}

void TBEPluginManager::LoadCustomOpLib() {
LoadPluginSo();
LoadPluginSo(options_);

std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
GELOGI("The size of registration_datas is: %zu", registration_datas.size());
@@ -193,10 +193,13 @@ void TBEPluginManager::LoadCustomOpLib() {
}
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo(
const std::map<string, string> &options) {
vector<string> file_list;
string caffe_parser_path;
std::string plugin_path;

options_ = options;
GetCustomOpPath(plugin_path);

// Whether there are files in the plugin so path


+ 1
- 1
src/ge/common/ge/tbe_plugin_manager.h View File

@@ -48,7 +48,7 @@ class TBEPluginManager {

static void InitPreparation(const std::map<string, string> &options);

void LoadPluginSo();
void LoadPluginSo(const std::map<string, string> &options);

private:
TBEPluginManager() = default;


+ 1
- 0
src/ge/common/ge_common.mk View File

@@ -36,6 +36,7 @@ GE_COMMON_LOCAL_SRC_FILES := \
properties_manager.cc \
types.cc\
model_parser/base.cc \
model_parser/graph_parser_util.cc \
tbe_kernel_store.cc \
op/attr_value_util.cc \
op/ge_op_utils.cc \


+ 28
- 23
src/ge/common/helper/model_helper.cc View File

@@ -91,9 +91,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
}
auto ge_model_weight = ge_model->GetWeight();
GELOGI("WEIGHTS_DATA size is %zu , %p", ge_model_weight.GetSize(), ge_model_weight.GetData());
if (SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, ge_model_weight.GetData(),
ge_model_weight.GetSize()) != SUCCESS) {
GELOGW("Add weight partition failed"); // weight is not necessary
// weight is not necessary
if (ge_model_weight.GetSize() > 0) {
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA,
ge_model_weight.GetData(), ge_model_weight.GetSize()),
"Add weight partition failed");
}

TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore();
@@ -239,45 +241,48 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(const ge::ModelData &model_data) {
if (model_data.model_data == nullptr || model_data.model_len == 0) {
GELOGE(FAILED, "Model_data is nullptr, or model_data_size is 0");
return FAILED;
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0");
return GE_EXEC_MODEL_DATA_SIZE_INVALID;
}

if (is_assign_model_) {
GELOGE(FAILED, "Model helper has already loaded!");
return FAILED;
GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!");
return GE_EXEC_LOAD_MODEL_REPEATED;
}

if (ReleaseLocalModelData() != SUCCESS) {
GELOGE(FAILED, "ReleaseLocalModelData failed.");
return FAILED;
GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed.");
return INTERNAL_ERROR;
}

Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_);
if (ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_) != SUCCESS) {
GELOGE(FAILED, "Parse model content failed!");
return FAILED;
GELOGE(status, "Parse model content failed!");
return status;
}

file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data);

OmFileLoadHelper om_load_helper;
if (om_load_helper.Init(model_addr_tmp_, model_len_tmp_) != SUCCESS) {
GELOGE(FAILED, "Om_load_helper init failed");
status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_);
if (status != SUCCESS) {
GELOGE(status, "Om_load_helper init failed");
model_addr_tmp_ = nullptr;
return FAILED;
return status;
}
auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_addr_tmp_);
if (partition_table->num == kOriginalOmPartitionNum) {
model_addr_tmp_ = nullptr;
GELOGE(FAILED, "om model is error,please use executable om model");
return FAILED;
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model");
return GE_EXEC_MODEL_PARTITION_NUM_INVALID;
}
// Encrypt model need to del temp model/no encrypt model don't need to del model
model_addr_tmp_ = nullptr;

if (GenerateGeModel(om_load_helper) != SUCCESS) {
GELOGE(FAILED, "GenerateGeModel failed");
return FAILED;
status = GenerateGeModel(om_load_helper);
if (status != SUCCESS) {
GELOGE(status, "GenerateGeModel failed");
return status;
}

is_assign_model_ = true;
@@ -289,19 +294,19 @@ Status ModelHelper::GenerateGeModel(OmFileLoadHelper &om_load_helper) {
GE_CHECK_NOTNULL(model_);
Status ret = LoadModelData(om_load_helper);
if (ret != SUCCESS) {
return ret;
return GE_EXEC_LOAD_MODEL_PARTITION_FAILED;
}
ret = LoadWeights(om_load_helper);
if (ret != SUCCESS) {
return ret;
return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED;
}
ret = LoadTask(om_load_helper);
if (ret != SUCCESS) {
return ret;
return GE_EXEC_LOAD_TASK_PARTITION_FAILED;
}
ret = LoadTBEKernelStore(om_load_helper);
if (ret != SUCCESS) {
return ret;
return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED;
}
return SUCCESS;
}


+ 17
- 14
src/ge/common/helper/om_file_helper.cc View File

@@ -41,8 +41,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(c

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data,
const uint32_t model_data_size) {
if (LoadModelPartitionTable(model_data, model_data_size) != SUCCESS) {
return FAILED;
Status status = LoadModelPartitionTable(model_data, model_data_size);
if (status != SUCCESS) {
return status;
}
is_inited_ = true;
return SUCCESS;
@@ -66,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod
}

if (!found) {
if (type != ModelPartitionType::TBE_KERNELS) {
if (type != ModelPartitionType::TBE_KERNELS && type != ModelPartitionType::WEIGHTS_DATA) {
GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type));
return FAILED;
}
@@ -83,7 +84,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const {

// Model length too small
if (model.model_len < (sizeof(ModelFileHeader) + sizeof(ModelPartitionTable))) {
GELOGE(PARAM_INVALID, "Invalid model. length < sizeof(ModelFileHeader) + sizeof(ModelPartitionTable).");
GELOGE(PARAM_INVALID,
"Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].",
model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable));
return PARAM_INVALID;
}

@@ -93,9 +96,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const {
if ((model_header->length != model.model_len - sizeof(ModelFileHeader)) ||
(MODEL_FILE_MAGIC_NUM != model_header->magic)) {
GELOGE(PARAM_INVALID,
"Invalid model. file_header->length(%u) + sizeof(ModelFileHeader)(%zu) != model->model_len(%u) || "
"MODEL_FILE_MAGIC_NUM != file_header->magic",
model_header->length, sizeof(ModelFileHeader), model.model_len);
"Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || "
"MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]",
model_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, model_header->magic);
return PARAM_INVALID;
}
return SUCCESS;
@@ -112,16 +115,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
// Original model partition include graph-info
if ((partition_table->num != PARTITION_SIZE) && (partition_table->num != (PARTITION_SIZE - 1)) &&
(partition_table->num != 1)) {
GELOGE(PARAM_INVALID, "Invalid partition_table->num:%u", partition_table->num);
return PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "Invalid partition_table->num:%u", partition_table->num);
return GE_EXEC_MODEL_PARTITION_NUM_INVALID;
}
size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
partition_table->num, sizeof(ModelFileHeader), mem_offset);
if (model_data_size <= mem_offset) {
GELOGE(PARAM_INVALID, "invalid model data, partition_table->num:%u, model data size %u", partition_table->num,
model_data_size);
return PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u",
partition_table->num, model_data_size);
return GE_EXEC_MODEL_DATA_SIZE_INVALID;
}
for (uint32_t i = 0; i < partition_table->num; i++) {
ModelPartition partition;
@@ -131,9 +134,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
context_.partition_datas_.push_back(partition);

if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) {
GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.",
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.",
partition.size + mem_offset, model_data_size);
return PARAM_INVALID;
return GE_EXEC_MODEL_DATA_SIZE_INVALID;
}
mem_offset += partition.size;
GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size);


+ 16
- 13
src/ge/common/model_parser/base.cc View File

@@ -35,15 +35,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro
ge::ModelData &model_data) {
std::string real_path = RealPath(model_path);
if (real_path.empty()) {
GELOGE(PARAM_INVALID, "Model file path '%s' is invalid", model_path);
return PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path);
return GE_EXEC_MODEL_PATH_INVALID;
}

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return FAILED, "File size not valid.");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return GE_EXEC_READ_MODEL_FILE_FAILED,
"File size not valid.");

std::ifstream fs(real_path.c_str(), std::ifstream::binary);

GE_CHK_BOOL_RET_STATUS(fs.is_open(), FAILED, "Open file failed! path:%s", model_path);
GE_CHK_BOOL_RET_STATUS(fs.is_open(), GE_EXEC_READ_MODEL_FILE_FAILED, "Open file failed! path:%s", model_path);

// get length of file:
(void)fs.seekg(0, std::ifstream::end);
@@ -55,7 +56,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro

char *data = new (std::nothrow) char[len];
if (data == nullptr) {
GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%ld)", len);
GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%u)", len);
return MEMALLOC_FAILED;
}

@@ -79,31 +80,33 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo
GE_CHECK_NOTNULL(model.model_data);

// Model length too small
GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), PARAM_INVALID,
"Invalid model. length < sizeof(ModelFileHeader).");
GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), GE_EXEC_MODEL_DATA_SIZE_INVALID,
"Invalid model. Model data size %u must be greater than or equal to %zu.", model.model_len,
sizeof(ModelFileHeader));
// Get file header
auto file_header = reinterpret_cast<ModelFileHeader *>(model.model_data);
// Determine whether the file length and magic number match
GE_CHK_BOOL_RET_STATUS(
file_header->length == model.model_len - sizeof(ModelFileHeader) && file_header->magic == MODEL_FILE_MAGIC_NUM,
PARAM_INVALID,
"Invalid model. file_header->length + sizeof(ModelFileHeader) != model->model_len || MODEL_FILE_MAGIC_NUM != "
"file_header->magic");
GE_EXEC_MODEL_DATA_SIZE_INVALID,
"Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || "
"MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]",
file_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, file_header->magic);

Status res = SUCCESS;

// Get data address
uint8_t *data = reinterpret_cast<uint8_t *>(model.model_data) + sizeof(ModelFileHeader);
if (file_header->is_encrypt == ModelEncryptType::UNENCRYPTED) { // Unencrypted model
GE_CHK_BOOL_RET_STATUS(model.key.empty(), PARAM_INVALID,
GE_CHK_BOOL_RET_STATUS(model.key.empty(), GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION,
"Invalid param. model is unencrypted, but key is not empty.");

model_data = data;
model_len = file_header->length;
GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader));
} else {
GELOGE(PARAM_INVALID, "Invalid model. ModelEncryptType not supported.");
res = PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported.");
res = GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION;
}

return res;


+ 483
- 0
src/ge/common/model_parser/graph_parser_util.cc View File

@@ -0,0 +1,483 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph_parser_util.h"
#include <memory>
#include "common/auth/file_saver.h"
#include "common/convert/pb2json.h"
#include "common/debug/log.h"
#include "common/debug/memory_dumper.h"
#include "common/model_parser/base.h"
#include "common/model_saver.h"
#include "common/properties_manager.h"
#include "common/string_util.h"
#include "common/types.h"
#include "common/util.h"
#include "common/util/error_manager/error_manager.h"
#include "framework/common/debug/ge_log.h"
#include "framework/omg/parser/parser_inner_ctx.h"
#include "graph/compute_graph.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/optimize/common/params.h"
#include "graph/utils/type_utils.h"
#include "omg/omg_inner_types.h"
#include "omg/parser/model_parser.h"
#include "omg/parser/parser_factory.h"
#include "omg/parser/weights_parser.h"
#include "parser/common/pre_checker.h"
#include "proto/ge_ir.pb.h"
#include "register/op_registry.h"
#include "external/register/register_types.h"

namespace ge {
namespace {
// The function is incomplete. Currently, only l2_optimize, off_optimize is supported.
const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\"";
const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\"";
const char *const kSplitError1 = "size not equal to 2 split by \":\"";
const char *const kEmptyError = "can not be empty";
const char *const kFloatNumError = "exist float number";
const char *const kDigitError = "is not digit";

vector<string> SplitInputShape(const std::string &input_shape) {
vector<string> shape_pair_vec;
size_t pos = input_shape.rfind(":");
if (pos != std::string::npos) {
shape_pair_vec.emplace_back(input_shape.substr(0, pos));
shape_pair_vec.emplace_back(input_shape.substr(pos + 1, input_shape.size() - pos));
}
return shape_pair_vec;
}

static std::map<std::string, ge::DataType> output_type_str_to_datatype = {
{"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}};

static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) {
if ((s == "true") || (s == "false")) {
return true;
} else {
ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, {atc_param, s});
GELOGE(PARAM_INVALID, "Input parameter[--%s]'s value[%s] must be true or false.", atc_param.c_str(), s.c_str());
return false;
}
}

bool CheckDigitStr(std::string &str) {
for (char c : str) {
if (!isdigit(c)) {
GELOGE(domi::FAILED, "value[%s] is not positive integer", str.c_str());
return false;
}
}
return true;
}

Status StringToInt(std::string &str, int32_t &value) {
try {
if (!CheckDigitStr(str)) {
GELOGE(PARAM_INVALID, "Invalid of digit string: %s ", str.c_str());
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str});
return PARAM_INVALID;
}
value = stoi(str);
} catch (std::invalid_argument &) {
GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch invalid_argument.", str.c_str());
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str});
return PARAM_INVALID;
} catch (std::out_of_range &) {
GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch out_of_range.", str.c_str());
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"output_type", str});
return PARAM_INVALID;
}
return SUCCESS;
}

Status VerifyOutputTypeAndOutNodes(std::vector<std::string> &out_type_vec) {
std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes;
std::set<std::string> out_nodes_info;
for (uint32_t i = 0; i < user_out_nodes.size(); ++i) {
// out_nodes set should include output_type and output_format
std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second);
out_nodes_info.emplace(tmp);
}
for (uint32_t i = 0; i < out_type_vec.size(); ++i) {
if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10059", {"value"}, {out_type_vec[i]});
GELOGE(domi::FAILED, "Can not find this node (%s) in out_nodes.", out_type_vec[i].c_str());
return domi::FAILED;
}
}
return domi::SUCCESS;
}

Status ParseOutputType(const std::string &output_type, std::map<std::string, vector<uint32_t>> &out_type_index_map,
std::map<std::string, vector<ge::DataType>> &out_type_dt_map) {
if (output_type.find(':') == std::string::npos) {
GELOGI("output_type is not multiple nodes, means all out nodes");
auto it = output_type_str_to_datatype.find(output_type);
if (it == output_type_str_to_datatype.end()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {output_type});
GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], only support DT_FLOAT, DT_FLOAT16, DT_UINT8!!",
output_type.c_str());
return domi::FAILED;
}
return domi::SUCCESS;
}
std::vector<std::string> out_type_vec;
vector<string> nodes_v = StringUtils::Split(output_type, ';');
for (const string &node : nodes_v) {
vector<string> node_index_type_v = StringUtils::Split(node, ':');
if (node_index_type_v.size() != 3) { // The size must be 3.
ErrorManager::GetInstance().ATCReportErrMessage("E10058", {"value"}, {node});
GELOGE(PARAM_INVALID,
"The param of output_type is invalid, the correct format is [opname:index:dtype],"
"while the actual input is %s.",
node.c_str());
return domi::FAILED;
}
ge::DataType tmp_dt;
std::string node_name = StringUtils::Trim(node_index_type_v[0]);
std::string index_str = StringUtils::Trim(node_index_type_v[1]);
int32_t index;
if (StringToInt(index_str, index) != SUCCESS) {
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s.", index_str.c_str());
return domi::FAILED;
}
std::string dt_value = StringUtils::Trim(node_index_type_v[2]);
auto it = output_type_str_to_datatype.find(dt_value);
if (it == output_type_str_to_datatype.end()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {dt_value});
GELOGE(ge::PARAM_INVALID, "output_type [%s] is invalid.", dt_value.c_str());
return domi::FAILED;
} else {
tmp_dt = it->second;
}
out_type_vec.push_back(node_name + ":" + index_str);
auto it_index = out_type_index_map.find(node_name);
if (it_index == out_type_index_map.end()) {
vector<uint32_t> tmp_vec;
tmp_vec.push_back(index);
out_type_index_map.emplace(node_name, tmp_vec);
} else {
it_index->second.push_back(index);
}

auto it_dt = out_type_dt_map.find(node_name);
if (it_dt == out_type_dt_map.end()) {
vector<ge::DataType> tmp_vec;
tmp_vec.push_back(tmp_dt);
out_type_dt_map.emplace(node_name, tmp_vec);
} else {
it_dt->second.push_back(tmp_dt);
}
}
return VerifyOutputTypeAndOutNodes(out_type_vec);
}

Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) {
if (op_desc->GetType() == DATA) {
GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", op_desc->GetName().c_str());
ErrorManager::GetInstance().ATCReportErrMessage("E10068", {"parameter", "value", "reason"},
{"out_nodes", op_desc->GetName(), "it can not be set input data"});
return domi::FAILED;
}

int32_t out_size = op_desc->GetOutputsSize();
if (index < 0 || index >= out_size) {
GELOGE(domi::FAILED,
"out_node [%s] output index:%d must be smaller "
"than node output size:%d and can not be negative!",
op_desc->GetName().c_str(), index, out_size);
std::string fail_reason = "output index:" + to_string(index) +
" must be smaller than output size:" + to_string(out_size) + " and can not be negative!";
ErrorManager::GetInstance().ATCReportErrMessage("E10060", {"parameter", "value", "reason"},
{"out_nodes", op_desc->GetName(), fail_reason});
return domi::FAILED;
}
return domi::SUCCESS;
}

Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
std::vector<std::string> &output_nodes_name) {
ge::OpDescPtr tmpDescPtr = node->GetOpDesc();
if (tmpDescPtr == nullptr) {
GELOGE(domi::FAILED, "Get outnode op desc fail.");
return domi::FAILED;
}
size_t size = tmpDescPtr->GetOutputsSize();
if (node->GetType() != NETOUTPUT) {
for (size_t index = 0; index < size; ++index) {
output_nodes_info.push_back(std::make_pair(node, index));
output_nodes_name.push_back(node->GetName());
}
} else {
const auto in_anchors = node->GetAllInDataAnchors();
for (auto in_anchor : in_anchors) {
auto out_anchor = in_anchor->GetPeerOutAnchor();
if (out_anchor == nullptr) {
GELOGE(domi::FAILED, "Get leaf node op desc fail.");
return domi::FAILED;
}
auto out_node = out_anchor->GetOwnerNode();
output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx()));
output_nodes_name.push_back(out_node->GetName());
}
}
return SUCCESS;
}
} // namespace

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputFp16NodesFormat(const string &is_output_fp16) {
if (is_output_fp16.empty()) {
return SUCCESS;
}

vector<domiTensorFormat_t> &output_formats = domi::GetContext().output_formats;
output_formats.clear();
vector<string> node_format_vec = StringUtils::Split(is_output_fp16, ',');
for (auto &is_fp16 : node_format_vec) {
StringUtils::Trim(is_fp16);
if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) {
GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]",
is_output_fp16.c_str());
return PARAM_INVALID;
}
if (is_fp16 == "false") {
output_formats.push_back(DOMI_TENSOR_ND);
} else if (is_fp16 == "true") {
output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0);
}
}
return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph,
const std::string &output_type,
const std::string &output) {
ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph);
GE_CHECK_NOTNULL(compute_graph);

std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes;
std::vector<domiTensorFormat_t> output_formats = domi::GetContext().output_formats;
std::vector<std::pair<ge::NodePtr, int32_t>> output_nodes_info;
std::vector<std::string> output_nodes_name;

std::map<std::string, vector<uint32_t>> out_type_index_map;
std::map<std::string, vector<ge::DataType>> out_type_dt_map;
if (!output_type.empty()) {
if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) {
GELOGE(domi::FAILED, "Parse output_type failed.");
return domi::FAILED;
}
}

// User declared outputs
for (uint32_t i = 0; i < user_out_nodes.size(); ++i) {
ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first);
if (out_node == nullptr) {
GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str());
return domi::FAILED;
}
auto op_desc = out_node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
if (CheckOutNode(op_desc, user_out_nodes[i].second) != SUCCESS) {
GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str());
return domi::FAILED;
}
if (i < output_formats.size()) {
if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) {
GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str());
if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) {
GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str());
}
}
}
auto it_index = out_type_index_map.find(user_out_nodes[i].first);
auto it_dt = out_type_dt_map.find(user_out_nodes[i].first);
if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) {
GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str());
(void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second);
(void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second);
}
output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second));
output_nodes_name.push_back(out_node->GetName());
}
// default output node (leaf)
if (user_out_nodes.empty()) {
for (ge::NodePtr node : compute_graph->GetDirectNode()) {
if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) {
Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name);
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail.");
}
}
}
compute_graph->SetGraphOutNodesInfo(output_nodes_info);
domi::GetContext().net_out_nodes = output_nodes_name;
return domi::SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ParseInputShape(
const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map,
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) {
vector<string> shape_vec = StringUtils::Split(input_shape, ';');
const int DEFAULT_SHAPE_PAIR_SIZE = 2;
for (const auto &shape : shape_vec) {
vector<string> shape_pair_vec = SplitInputShape(shape);
if (shape_pair_vec.size() != DEFAULT_SHAPE_PAIR_SIZE) {
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
{shape, kSplitError1, kInputShapeSample1});
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.",
shape.c_str(), kSplitError1, kInputShapeSample1);
return false;
}
if (shape_pair_vec[1].empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
{shape, kEmptyError, kInputShapeSample1});
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.",
shape.c_str(), kEmptyError, kInputShapeSample1);
return false;
}

vector<string> shape_value_strs = StringUtils::Split(shape_pair_vec[1], ',');
vector<int64_t> shape_values;
for (auto &shape_value_str : shape_value_strs) {
// stoul: The method may throw an exception: invalid_argument/out_of_range
if (std::string::npos != shape_value_str.find('.')) {
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
{shape, kFloatNumError, kInputShapeSample2});
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.",
shape.c_str(), kFloatNumError, kInputShapeSample2);
return false;
}

long left_result = 0;
try {
left_result = stol(StringUtils::Trim(shape_value_str));
if (!shape_value_str.empty() && (shape_value_str.front() == '-')) {
// The value maybe dynamic shape [-1], need substr it and verify isdigit.
shape_value_str = shape_value_str.substr(1);
}
for (char c : shape_value_str) {
if (!isdigit(c)) {
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
{shape, kDigitError, kInputShapeSample2});
GELOGE(PARAM_INVALID, "--input_shape's shape value[%s] is not digit", shape_value_str.c_str());
return false;
}
}
} catch (const std::out_of_range &) {
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"},
{"input_shape", shape_value_str});
GELOGW("Input parameter[--input_shape]’s value[%s] cause out of range execption!", shape_value_str.c_str());
return false;
} catch (const std::invalid_argument &) {
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"},
{"input_shape", shape_value_str});
GELOGW("Input parameter[--input_shape]’s value[%s] cause invalid argument!", shape_value_str.c_str());
return false;
} catch (...) {
ErrorManager::GetInstance().ATCReportErrMessage("E10015", {"parameter", "value"},
{"input_shape", shape_value_str});
GELOGW("Input parameter[--input_shape]’s value[%s] cause unkown execption!", shape_value_str.c_str());
return false;
}
int64_t result = left_result;
// - 1 is not currently supported
if (!is_dynamic_input && result <= 0) {
ErrorManager::GetInstance().ATCReportErrMessage("E10011", {"shape", "result"}, {shape, std::to_string(result)});
GELOGW(
"Input parameter[--input_shape]’s shape value[%s] is invalid, "
"expect positive integer, but value is %ld.",
shape.c_str(), result);
return false;
}
shape_values.push_back(result);
}

shape_map.emplace(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values));
user_shape_map.push_back(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values));
}

return true;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputNodes(const string &out_nodes) {
try {
// parse output node
if (!out_nodes.empty()) {
domi::GetContext().out_nodes_map.clear();
domi::GetContext().user_out_nodes.clear();

vector<string> nodes_v = StringUtils::Split(out_nodes, ';');
for (const string &node : nodes_v) {
vector<string> key_value_v = StringUtils::Split(node, ':');
if (key_value_v.size() != 2) { // The size must be 2.
ErrorManager::GetInstance().ATCReportErrMessage(
"E10068", {"parameter", "value", "reason"},
{"out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""});
GELOGE(PARAM_INVALID,
"The input format of --out_nodes is invalid, the correct format is "
"\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.",
node.c_str());
return PARAM_INVALID;
}
auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]);
// stoi: The method may throw an exception: invalid_argument/out_of_range
if (!CheckDigitStr(key_value_v[1])) {
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes});
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str());
return PARAM_INVALID;
}
int32_t index = stoi(StringUtils::Trim(key_value_v[1]));
if (iter != domi::GetContext().out_nodes_map.end()) {
iter->second.emplace_back(index);
} else {
std::vector<int32_t> index_v;
index_v.emplace_back(index);
domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v);
}
domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index));
}
}
} catch (std::invalid_argument &) {
GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str());
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes});
return PARAM_INVALID;
} catch (std::out_of_range &) {
GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str());
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"out_nodes", out_nodes});
return PARAM_INVALID;
}

return SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOpConf(const char *op_conf) {
if (op_conf != nullptr && *op_conf != '\0') {
// divided by ":"
PropertiesManager::Instance().SetPropertyDelimiter(OP_CONF_DELIMITER);
// Parsing the op_conf configuration item file
if (!PropertiesManager::Instance().Init(op_conf)) {
GELOGE(FAILED, "op_name_map init failed!");
return FAILED;
}
// Return map and put it into ATC global variable
domi::GetContext().op_conf_map = PropertiesManager::Instance().GetPropertyMap();
}
return SUCCESS;
}
} // namespace ge

+ 68
- 0
src/ge/common/model_parser/graph_parser_util.h View File

@@ -0,0 +1,68 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_COMMON_GRAPH_PARSER_UTIL_H_
#define GE_COMMON_GRAPH_PARSER_UTIL_H_

#include <google/protobuf/message.h>
#include <string>
#include <unordered_map>
#include <vector>
#include "framework/common/types.h"
#include "framework/omg/omg_inner_types.h"
#include "framework/omg/parser/parser_inner_ctx.h"
#include "proto/ge_ir.pb.h"
#include "proto/om.pb.h"

#include "graph/compute_graph.h"
#include "graph/graph.h"
#include "graph/model.h"
#include "runtime/kernel.h"

using domi::Status;
using std::pair;
using std::string;
using std::unordered_map;
using std::vector;

namespace ge {
/**
* @ingroup domi_omg
* @brief init omg context
* @return void
*/
Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format);

Status ParseOutputFp16NodesFormat(const string &is_output_fp16);

Status ParseOutputNodes(const string &out_nodes);

bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map,
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input);

Status ParseOpConf(const char *op_conf);
} // namespace ge

namespace domi {
/**
* @ingroup domi_omg
* @brief get omg context
* @return reference of OmgContext
*/
ge::OmgContext &GetContext();
} // namespace domi

#endif // GE_COMMON_GRAPH_PARSER_UTIL_H_

+ 41
- 14
src/ge/common/profiling/profiling_manager.cc View File

@@ -16,15 +16,12 @@

#include "common/profiling/profiling_manager.h"

#include <nlohmann/json.hpp>
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "framework/common/string_util.h"
#include "graph/ge_context.h"
#include "runtime/base.h"

using Json = nlohmann::json;

namespace {
const char *const kJobID = "jobID";
const char *const kDeviceID = "deviceID";
@@ -35,6 +32,7 @@ const char *const kEvents = "events";
const char *const kAiCoreEvents = "ai_core_events";
const char *const kName = "name";
const char *const kTraceID = "traceId";
const char *const kProfDir = "resultPath";
const size_t kReportMaxLen = 2048;
} // namespace

@@ -100,6 +98,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
Json start_prof_conf = Json::parse(config);
Json &prof_conf = start_prof_conf[kStartCfg][0];
job_id_ = prof_conf[kJobID];
auto iter = prof_conf.find(kProfDir);
if (iter != prof_conf.end()) {
prof_dir_ = prof_conf[kProfDir];
}
Json &device_id = prof_conf[kDeviceID];
if (device_id.size() != 0) {
vector<int32_t>().swap(device_id_);
@@ -126,23 +128,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
}
}

GELOGI("Profiling json config from acl:%s", config.c_str());
Json &features = prof_conf[kFeatures];
if (ParseFeaturesFromAclCfg(features) != SUCCESS) {
GELOGE(FAILED, "Parse feature from acl cfg failed.");
return FAILED;
}
is_profiling_ = true;
} catch (...) {
GELOGE(FAILED, "Json conf is not invalid !");
return ge::PARAM_INVALID;
}
#endif
return ge::SUCCESS;
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg(
const Json &features) {
#ifdef DAVINCI_SUPPORT_PROFILING
try {
for (size_t i = 0; i < features.size(); ++i) {
Json &feature = features[i];
const Json &feature = features[i];
if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) {
continue;
}

const std::string &name = feature[kName];
if (name == "op_trace") {
GELOGI("Op trace config from acl");
Json &conf = feature[kConf];
Json &events = conf[0][kEvents];
const Json &conf = feature[kConf];
const Json &events = conf[0][kEvents];
const std::string &ai_core_events = events[0][kAiCoreEvents];
GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str());
is_op_trace_ = true;
// op trace get conf
ProfMgrConf prof_mgr_conf;
int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf);
if (result != 0) {
@@ -154,10 +169,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_);
} else if (name == "task_trace") {
is_op_trace_ = false;
if (feature.find(kConf) != feature.end()) {
const Json &conf = feature[kConf];
std::stringstream task_trace_conf;
task_trace_conf << conf;
task_trace_conf_ = task_trace_conf.str();
}
GELOGI("Task trace config from acl");
} else if (name == "system_trace") {
is_op_trace_ = false;
Json &conf = feature[kConf];
const Json &conf = feature[kConf];
std::stringstream system_trace_conf;
system_trace_conf << conf;
system_trace_conf_ = system_trace_conf.str();
@@ -165,10 +186,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
}
profiling_opts_.push_back(name);
}

is_profiling_ = true;
} catch (...) {
GELOGE(FAILED, "Json conf is not invalid !");
GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !");
return ge::PARAM_INVALID;
}
#endif
@@ -235,6 +254,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
p_device[kDeviceID] = std::to_string(device_id);
p_device[kJobID] = job_id_;
p_device[kTraceID] = std::to_string(GetContext().TraceId());
if (!prof_dir_.empty()) {
p_device[kProfDir] = prof_dir_;
GELOGI("Prof dir: %s.", prof_dir_.c_str());
}

Json features;
if (is_op_trace_) {
@@ -258,6 +281,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
Json f;
if (profiling_opts_[i] == "system_trace") {
f[kConf] = nlohmann::json::parse(system_trace_conf_);
} else if (profiling_opts_[i] == "task_trace") {
if (!task_trace_conf_.empty()) {
f[kConf] = nlohmann::json::parse(task_trace_conf_);
}
}
f[kName] = profiling_opts_[i];
features[i] = f;


+ 5
- 0
src/ge/common/profiling/profiling_manager.h View File

@@ -17,6 +17,7 @@
#ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_
#define GE_COMMON_PROFILING_PROFILING_MANAGER_H_

#include <nlohmann/json.hpp>
#include <map>
#include <string>
#include <vector>
@@ -30,6 +31,7 @@
using std::map;
using std::string;
using std::vector;
using Json = nlohmann::json;

namespace ge {
const std::string GE_PROFILING_MODULE = "Framework";
@@ -84,11 +86,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
void PluginUnInit(const std::string &module) const;

private:
ge::Status ParseFeaturesFromAclCfg(const Json &feature);
bool is_profiling_ = false;
bool is_op_trace_ = false;
bool is_load_ = false;
int32_t op_trace_iter_num_ = 0;
string job_id_;
string prof_dir_;
vector<int32_t> device_id_;
vector<string> op_trace_conf_;
vector<string> profiling_opts_;
@@ -96,6 +100,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
string recv_profiling_config_;
string send_profiling_config_;
string system_trace_conf_;
string task_trace_conf_;
const ProfilingEngineImpl engine_;
};
} // namespace ge


+ 3
- 0
src/ge/common/types.cc View File

@@ -292,6 +292,7 @@ REGISTER_OPTYPE_DEFINE(BASICLSTMCELL, "BasicLSTMCell");
REGISTER_OPTYPE_DEFINE(GETNEXT, "GetNext");
REGISTER_OPTYPE_DEFINE(INITDATA, "InitData");
REGISTER_OPTYPE_DEFINE(REFIDENTITY, "RefIdentity");
REGISTER_OPTYPE_DEFINE(BITCAST, "Bitcast");

/***************Ann special operator*************************/
REGISTER_OPTYPE_DEFINE(ANN_MEAN, "AnnMean");
@@ -382,6 +383,8 @@ REGISTER_OPTYPE_DEFINE(HCOMALLREDUCE, "HcomAllReduce");
REGISTER_OPTYPE_DEFINE(HCOMREDUCESCATTER, "HcomReduceScatter");
REGISTER_OPTYPE_DEFINE(HCOMSEND, "HcomSend");
REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive");
REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead");
REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite");

REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign");
REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp");


+ 1
- 1
src/ge/common/util.cc View File

@@ -363,7 +363,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const
std::map<std::string, std::string> args_map;
if (file_path.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param});
GELOGW("Input parameter's value is empty.");
GELOGW("Input parameter %s is empty.", file_path.c_str());
return false;
}
std::string real_path = RealPath(file_path.c_str());


+ 12
- 10
src/ge/engine_manager/dnnengine_manager.cc View File

@@ -181,13 +181,12 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str());
return "";
}
string ge_core_type;
std::string ge_core_type;
Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type);
if (ret != SUCCESS) {
GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE");
}
string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine;
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"));
std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine;
GELOGD("engine type will exclude: %s", exclude_core_Type.c_str());

std::map<std::string, std::string> unsupported_reasons;
for (const auto &it : op_infos) {
if (it.engine == exclude_core_Type) {
@@ -204,7 +203,7 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time;
op_desc->SetOpEngineName(it.engine);
op_desc->SetOpKernelLibName(kernel_name);
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(),
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(),
it.engine.c_str(), op_desc->GetName().c_str());
return it.engine;
} else {
@@ -222,6 +221,9 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
unsupported_reasons.emplace(kernel_name, unsupported_reason);
GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s",
kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (!op_desc->HasAttr("_is_ge_op")) {
ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()});
}
}
} else {
GELOGW(
@@ -371,7 +373,7 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std:
}

Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) {
GELOGI("Begin to read json file");
GELOGD("Begin to read json file");
if (file_path.empty()) {
GELOGE(FAILED, "Json path %s is not valid", file_path.c_str());
return FAILED;
@@ -406,12 +408,12 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h
return FAILED;
}
ifs.close();
GELOGI("Read json file success");
GELOGD("Read json file success");
return SUCCESS;
}

Status DNNEngineManager::CheckJsonFile() {
GELOGI("Begin to check json file");
GELOGD("Begin to check json file");
for (auto &it : engines_map_) {
std::string engine_name = it.first;
int count = 0;
@@ -431,7 +433,7 @@ Status DNNEngineManager::CheckJsonFile() {
return FAILED;
}
}
GELOGI("Check json file success");
GELOGD("Check json file success");
return SUCCESS;
}
} // namespace ge

+ 1
- 0
src/ge/executor/CMakeLists.txt View File

@@ -60,6 +60,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR}
"../graph/load/new_model_manager/task_info/task_info.cc"
"../graph/load/new_model_manager/tbe_handle_store.cc"
"../graph/load/new_model_manager/zero_copy_task.cc"
"../graph/load/new_model_manager/zero_copy_offset.cc"
"../graph/manager/graph_caching_allocator.cc"
"../graph/manager/graph_manager_utils.cc"
"../graph/manager/graph_mem_allocator.cc"


+ 237
- 71
src/ge/executor/ge_executor.cc View File

@@ -36,6 +36,9 @@
#include "mmpa/mmpa_api.h"
#include "single_op/single_op_manager.h"

using std::string;
using std::vector;

namespace {
const size_t kDynamicBatchSizeVecSize = 1;
const size_t kStaticBatchInfoSize = 1;
@@ -102,20 +105,36 @@ void SetDynamicInputDataFlag(const ge::RunModelData &input_data, const std::vect
ge::InputData &inputs) {
inputs.is_dynamic_batch = true;
std::string batch_label;
size_t match_idx = 0;
for (size_t i = 0; i < batch_info.size(); ++i) {
if (batch_info[i].size() == kDynamicBatchSizeVecSize &&
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_batch_size)) {
batch_label = kBatchLabel + std::to_string(i);
inputs.batch_label = batch_label;
// dynamic_dims
if (input_data.dynamic_dims.size() != 0) {
bool is_match = true;
for (size_t j = 0; j < static_cast<size_t>(input_data.dynamic_dims.size()); ++j) {
if (static_cast<uint64_t>(batch_info[i][j]) != input_data.dynamic_dims[j]) {
is_match = false;
break;
}
}
if (is_match) {
match_idx = i;
break;
}
// dynamic_batch_size
} else if (batch_info[i].size() == kDynamicBatchSizeVecSize &&
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_batch_size)) {
match_idx = i;
break;
// dynamic_image_size
} else if (batch_info[i].size() == kDynamicImageSizeVecSize &&
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_image_height) &&
batch_info[i][1] == static_cast<int64_t>(input_data.dynamic_image_width)) {
batch_label = kBatchLabel + std::to_string(i);
inputs.batch_label = batch_label;
match_idx = i;
break;
}
}
batch_label = kBatchLabel + std::to_string(match_idx);
inputs.batch_label = batch_label;
GELOGI("current batch label:%s", batch_label.c_str());
}

@@ -225,39 +244,41 @@ Status GeExecutor::Finalize() {
Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
uint64_t batch_size) {
if (dynamic_input_addr == nullptr) {
GELOGE(FAILED, "Dynamic input addr is nullptr!");
return FAILED;
GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!");
return PARAM_INVALID;
}

uint64_t size = sizeof(uint64_t);
if (length < size) {
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, size);
return FAILED;
GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size);
return PARAM_INVALID;
}

// Verify whether the input dynamic batch matches the model gear
std::vector<std::vector<int64_t>> batch_info;
std::vector<uint64_t> batch_num{batch_size};
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
int32_t dynamic_type = static_cast<int32_t>(FIXED);
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get dynamic input info failed.");
return FAILED;
GELOGE(ret, "Get dynamic input info failed.");
return ret;
}

if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) {
GELOGE(FAILED, "The current dynamic input does not match the gear of the model.");
return FAILED;
GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model.");
return PARAM_INVALID;
}

ret = GraphExecutor::SetDynamicSize(model_id, batch_num);
ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_BATCH));
if (ret != SUCCESS) {
GELOGE(FAILED, "Set dynamic size failed");
return FAILED;
GELOGE(ret, "Set dynamic size failed");
return ret;
}
// memcpy dynamic_batch_size from host to device
if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) {
GELOGE(FAILED, "memcpy dynamic batch input data failed!");
return FAILED;
rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "memcpy dynamic batch input data failed! ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;
}
@@ -265,40 +286,42 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad
Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
uint64_t image_height, uint64_t image_width) {
if (dynamic_input_addr == nullptr) {
GELOGE(FAILED, "Dynamic input addr is nullptr!");
return FAILED;
GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!");
return PARAM_INVALID;
}

uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint64_t);
if (length < dynamic_input_size) {
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size);
return FAILED;
GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size);
return PARAM_INVALID;
}

// Verify whether the input dynamic resolution matches the model gear
std::vector<std::vector<int64_t>> batch_info;
std::vector<uint64_t> batch_num{image_height, image_width};
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
int32_t dynamic_type = static_cast<int32_t>(FIXED);
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get dynamic input info failed.");
return FAILED;
GELOGE(ret, "Get dynamic input info failed.");
return ret;
}

if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) {
GELOGE(FAILED, "The current dynamic input does not match the gear of the model.");
return FAILED;
GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model.");
return PARAM_INVALID;
}

ret = GraphExecutor::SetDynamicSize(model_id, batch_num);
ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_IMAGE));
if (ret != SUCCESS) {
GELOGE(FAILED, "Set dynamic size failed");
return FAILED;
GELOGE(ret, "Set dynamic size failed");
return ret;
}
// Memcpy dynamic resolution height from host to device
if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) !=
RT_ERROR_NONE) {
GELOGE(FAILED, "memcpy dynamic resolution input data failed!");
return FAILED;
rtError_t rt_ret =
rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

uint64_t remain_size = length - sizeof(uint64_t);
@@ -311,16 +334,109 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad
return SUCCESS;
}

Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
const vector<uint64_t> &dynamic_dims) {
if (dynamic_input_addr == nullptr) {
GELOGE(FAILED, "Dynamic input addr is nullptr!");
return FAILED;
}

Status ret = GraphExecutor::SetDynamicSize(model_id, dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS));
if (ret != SUCCESS) {
GELOGE(FAILED, "Set dynamic size failed");
return FAILED;
}

vector<uint64_t> cur_dynamic_dims;
if (GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims) != SUCCESS) {
GELOGE(FAILED, "GetCurDynamicDims failed.");
return FAILED;
}

size_t dynamic_dim_num = cur_dynamic_dims.size();
uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint64_t));
if (length < dynamic_input_size) {
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size);
return FAILED;
}

for (uint32_t i = 0; i < dynamic_dim_num; ++i) {
// Memcpy dynamic dim[i] from host to device
if (rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + sizeof(uint64_t) * i),
length - sizeof(uint64_t) * i, &cur_dynamic_dims[i], sizeof(uint64_t),
RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) {
GELOGE(FAILED, "memcpy dynamic resolution input data failed!");
return FAILED;
}
}
return SUCCESS;
}

Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> &combined_dims,
vector<uint64_t> &cur_dynamic_dims) {
vector<vector<int64_t>> combined_batch;
if (GraphExecutor::GetCombinedDynamicDims(model_id, combined_batch) != SUCCESS) {
GELOGE(FAILED, "Get combined dynamic dims info failed.");
return FAILED;
}
if (combined_batch.empty()) {
GELOGE(FAILED, "Combined dynamic dims is empty.");
return FAILED;
}

if (combined_dims.size() != combined_batch[0].size()) {
GELOGE(FAILED, "Input dynamic dims's dimension size[%zu] is different from model[%zu].", combined_dims.size(),
combined_batch[0].size());
return FAILED;
}
bool matched = false;
size_t idx = 0;
for (size_t i = 0; i < combined_batch.size(); i++) {
bool is_match = true;
for (size_t j = 0; j < combined_dims.size(); j++) {
if (combined_dims[j] != static_cast<uint64_t>(combined_batch[i][j])) {
is_match = false;
break;
}
}
if (is_match) {
idx = i;
matched = true;
break;
}
}

if (!matched) {
GELOGE(FAILED, "Input dynamic dims can not match model.");
return FAILED;
}

// batch_info save the dynamic info of combined_dims
vector<vector<int64_t>> batch_info;
int32_t dynamic_type = static_cast<int32_t>(FIXED);
if (GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type) != SUCCESS) {
GELOGE(FAILED, "Get dynamic input info failed.");
return FAILED;
}

cur_dynamic_dims.clear();
for (size_t i = 0; i < batch_info[idx].size(); i++) {
cur_dynamic_dims.emplace_back(static_cast<uint64_t>(batch_info[idx][i]));
}

return SUCCESS;
}

Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
GELOGI("Begin to get current shape");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}
Status ret = GraphExecutor::GetCurShape(model_id, batch_info);
Status ret = GraphExecutor::GetCurShape(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get current shape failed");
return FAILED;
GELOGE(ret, "Get current shape failed");
return ret;
}
return SUCCESS;
}
@@ -330,12 +446,12 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
const kAippDynamicPara &aippParms) {
GELOGI("Enter to SetDynamicAippData.");
if (dynamic_input_addr == nullptr) {
GELOGE(FAILED, "Dynamic aipp input addr is nullptr!");
return FAILED;
GELOGE(PARAM_INVALID, "Dynamic aipp input addr is nullptr!");
return PARAM_INVALID;
}
if (aippBatchPara.empty()) {
GELOGE(FAILED, "aippBatchPara is empty.");
return FAILED;
GELOGE(PARAM_INVALID, "aippBatchPara is empty.");
return PARAM_INVALID;
}
uint64_t batch_num = aippBatchPara.size();
uint64_t real_aippParms_size = sizeof(kAippDynamicPara) - sizeof(kAippDynamicBatchPara);
@@ -345,24 +461,25 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
"batch num is %lu, struct_len is %lu",
model_id, length, batch_num, struct_len);
if (struct_len > length) {
GELOGE(FAILED, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length);
return FAILED;
GELOGE(PARAM_INVALID, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length);
return PARAM_INVALID;
}
// Memcpy real kAippDynamicBatchPara from host to device
if (rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE) !=
RT_ERROR_NONE) {
GELOGE(FAILED, "memcpy real_aippParms_size failed!");
return FAILED;
rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "memcpy real_aippParms_size failed! ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
uint64_t remain_len = length - real_aippParms_size;
uint8_t *aipp_batch_para_dev = reinterpret_cast<uint8_t *>(dynamic_input_addr) + real_aippParms_size;

for (uint64_t i = 0; i < batch_num; ++i) {
if (rtMemcpy(reinterpret_cast<void *>(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)),
(remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), sizeof(kAippDynamicBatchPara),
RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) {
GELOGE(FAILED, "memcpy kAippDynamicBatchPara input data failed!");
return FAILED;
rt_ret = rtMemcpy(reinterpret_cast<void *>(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)),
(remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]),
sizeof(kAippDynamicBatchPara), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "memcpy kAippDynamicBatchPara input data failed! ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
return SUCCESS;
@@ -429,7 +546,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
}
Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed.");
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
return FAILED;
}
return GraphLoader::UnloadModel(model_id);
@@ -468,17 +585,19 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
output_formats, new_model_desc);
if (ret != domi::SUCCESS) {
GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret);
return TransferDomiErrorCode(ret);
return ret;
}

if (input_formats.size() != input_desc_infos.size()) {
GELOGE(ge::FAILED, "input_formats.size() != input_desc_infos.size().");
return ge::FAILED;
GELOGE(ge::PARAM_INVALID, "input_formats size %zu is not equal to input_desc_infos size %zu.", input_formats.size(),
input_desc_infos.size());
return ge::PARAM_INVALID;
}

if (output_formats.size() != output_desc_infos.size()) {
GELOGE(ge::FAILED, "output_formats.size() != output_desc_infos.size().");
return ge::FAILED;
GELOGE(ge::PARAM_INVALID, "output_formats size %zu is not equal to output_desc_infos size %zu.",
output_formats.size(), output_desc_infos.size());
return ge::PARAM_INVALID;
}

// Transfer data to TensorDesc
@@ -494,16 +613,18 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
/// @brief Get dynamic batch_info
/// @param [in] model_id
/// @param [out] batch_info
/// @param [out] dynamic_type
/// @return execute result
///
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) {
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type) {
GELOGI("Begin to get dynamic batch info.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(ret, "GetDynamicBatchInfo failed.");
return ret;
@@ -515,6 +636,30 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto

///
/// @ingroup ge
/// @brief Get combined dynamic dims info
/// @param [in] model_id
/// @param [out] batch_info
/// @return execute result
///
Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64_t>> &batch_info) {
GELOGI("Begin to get combined dynamic dims info.");
if (!isInit_) {
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return GE_EXEC_NOT_INIT;
}

Status ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetCombinedDynamicDims failed.");
return ret;
}

GELOGI("Get combined dynamic dims succ.");
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Get AIPP input format
/// @param [in] model_id
/// @param [in] index
@@ -628,8 +773,8 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da

string filePath = RealPath(path.c_str());
if (filePath.empty()) {
GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str());
return ge::FAILED;
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "File path is invalid. please check your text file '%s'.", path.c_str());
return GE_EXEC_MODEL_PATH_INVALID;
}
GELOGI("load modelData from file: %s.", path.c_str());
std::string key_path;
@@ -710,12 +855,20 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
GetDomiOutputData(run_output_data, output_data);

if ((run_input_data.dynamic_batch_size != 0) || (run_input_data.dynamic_image_width != 0) ||
(run_input_data.dynamic_image_height != 0)) {
(run_input_data.dynamic_image_height != 0) || (run_input_data.dynamic_dims.size() != 0)) {
std::vector<std::vector<int64_t>> batch_info;
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info);
int32_t dynamic_type = static_cast<int32_t>(FIXED);
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get dynamic input info failed.");
return FAILED;
GELOGE(ret, "Get dynamic input info failed.");
return ret;
}
if (dynamic_type == static_cast<int32_t>(DYNAMIC_DIMS)) {
ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Get dynamic input info failed.");
return FAILED;
}
}
if (!batch_info.empty()) {
SetDynamicInputDataFlag(run_input_data, batch_info, input_data);
@@ -790,6 +943,11 @@ Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelDa
return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op);
}

Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op) {
return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op);
}

Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
std::vector<DataBuffer> &outputs) {
if (executor == nullptr) {
@@ -800,13 +958,21 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer
return executor->ExecuteAsync(inputs, outputs);
}

ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &inputs, vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &outputs) {
GE_CHECK_NOTNULL(executor);
return executor->ExecuteAsync(input_desc, inputs, output_desc, outputs);
}

Status GeExecutor::ReleaseSingleOpResource(void *stream) {
return SingleOpManager::GetInstance().ReleaseResource(stream);
}

Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) {
std::vector<std::vector<int64_t>> batch_info;
Status ret = GetDynamicBatchInfo(model_id, batch_info);
int32_t dynamic_type = static_cast<int32_t>(FIXED);
Status ret = GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(ret, "Calc batch info size failed. ret = %d", ret);
return ret;


+ 5
- 0
src/ge/executor/module.mk View File

@@ -26,6 +26,7 @@ local_ge_executor_src_files := \
../graph/load/new_model_manager/data_inputer.cc \
../graph/load/new_model_manager/data_dumper.cc \
../graph/load/new_model_manager/zero_copy_task.cc \
../graph/load/new_model_manager/zero_copy_offset.cc \
../graph/load/new_model_manager/task_info/task_info.cc \
../graph/load/new_model_manager/task_info/event_record_task_info.cc \
../graph/load/new_model_manager/task_info/event_wait_task_info.cc \
@@ -79,6 +80,7 @@ local_ge_executor_shared_library := \
libslog \
libmmpa \
libgraph \
libregister \
libmsprof \

local_ge_executor_ldflags := -lrt -ldl \
@@ -128,6 +130,7 @@ LOCAL_SHARED_LIBRARIES := \
libslog \
libmmpa \
libgraph \
libregister \
libmsprof \

LOCAL_LDFLAGS += $(local_ge_executor_ldflags)
@@ -153,6 +156,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
LOCAL_STATIC_LIBRARIES := \
libge_common \
libgraph \
libregister \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \
@@ -184,6 +188,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
LOCAL_STATIC_LIBRARIES := \
libge_common \
libgraph \
libregister \
libprotobuf \

LOCAL_SHARED_LIBRARIES := \


+ 7
- 0
src/ge/ge_inference.mk View File

@@ -70,6 +70,7 @@ OMG_HOST_SRC_FILES := \
graph/passes/resource_pair_remove_control_pass.cc \
graph/passes/pass_utils.cc \
graph/passes/base_pass.cc \
graph/passes/bitcast_pass.cc \
graph/passes/constant_folding_pass.cc \
graph/passes/aicpu_constant_folding_pass.cc \
graph/passes/reshape_remove_pass.cc \
@@ -91,8 +92,10 @@ OMG_HOST_SRC_FILES := \
graph/passes/print_op_pass.cc \
graph/passes/no_use_reshape_remove_pass.cc \
graph/passes/iterator_op_pass.cc \
graph/passes/input_output_connection_identify_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/passes/mark_graph_unknown_status_pass.cc \
graph/common/omg_util.cc \
graph/common/bcast.cc \
graph/passes/dimension_compute_pass.cc \
@@ -107,6 +110,7 @@ OMG_HOST_SRC_FILES := \
graph/passes/isolated_op_remove_pass.cc \
graph/passes/permute_pass.cc \
graph/passes/ctrl_edge_transfer_pass.cc \
graph/passes/end_of_sequence_add_control_pass.cc \
host_kernels/broadcast_gradient_args_kernel.cc \
host_kernels/greater_kernel.cc \
host_kernels/gather_v2_kernel.cc \
@@ -185,6 +189,8 @@ OMG_HOST_SRC_FILES := \
graph/passes/hccl_group_pass.cc \
graph/passes/switch_fusion_pass.cc \
graph/passes/switch_split_pass.cc \
graph/passes/memcpy_addr_async_pass.cc \
graph/passes/set_input_output_offset_pass.cc \

OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES)

@@ -203,6 +209,7 @@ OME_HOST_SRC_FILES := \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/cpu_queue_schedule.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/new_model_manager/zero_copy_offset.cc \
graph/load/new_model_manager/data_dumper.cc \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/task_info/event_record_task_info.cc \


+ 1
- 2
src/ge/ge_local_engine/engine/host_cpu_engine.h View File

@@ -1,5 +1,5 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_
#define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_



+ 1
- 0
src/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc View File

@@ -61,5 +61,6 @@ REGISTER_OP_CREATOR(SwitchN, GeDeletedOp);
REGISTER_OP_CREATOR(RefMerge, GeDeletedOp);
REGISTER_OP_CREATOR(RefSwitch, GeDeletedOp);
REGISTER_OP_CREATOR(TransShape, GeDeletedOp);
REGISTER_OP_CREATOR(Bitcast, GeDeletedOp);
} // namespace ge_local
} // namespace ge

+ 7
- 0
src/ge/ge_runner.mk View File

@@ -78,6 +78,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/load/new_model_manager/task_info/task_info.cc \
graph/load/new_model_manager/tbe_handle_store.cc \
graph/load/new_model_manager/zero_copy_task.cc \
graph/load/new_model_manager/zero_copy_offset.cc \
graph/manager/graph_context.cc \
graph/manager/graph_manager.cc \
graph/manager/graph_manager_utils.cc \
@@ -98,10 +99,13 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/addn_pass.cc \
graph/passes/aicpu_constant_folding_pass.cc \
graph/passes/assert_pass.cc \
graph/passes/input_output_connection_identify_pass.cc \
graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/passes/mark_graph_unknown_status_pass.cc \
graph/partition/dynamic_shape_partition.cc \
graph/passes/base_pass.cc \
graph/passes/bitcast_pass.cc \
graph/passes/cast_remove_pass.cc \
graph/passes/cast_translate_pass.cc \
graph/passes/common_subexpression_elimination_pass.cc \
@@ -214,6 +218,9 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/variable_prepare_op_pass.cc \
graph/passes/variable_ref_delete_op_pass.cc \
graph/passes/variable_ref_useless_control_out_delete_pass.cc \
graph/passes/end_of_sequence_add_control_pass.cc \
graph/passes/memcpy_addr_async_pass.cc \
graph/passes/set_input_output_offset_pass.cc \
graph/preprocess/graph_preprocess.cc \
graph/preprocess/insert_op/ge_aipp_op.cc \
graph/preprocess/insert_op/util_insert_aipp_op.cc \


+ 0
- 19
src/ge/ge_runtime/model_runner.cc View File

@@ -49,15 +49,6 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint
return true;
}

bool ModelRunner::DistributeTask(uint32_t model_id) {
auto model_iter = runtime_models_.find(model_id);
if (model_iter == runtime_models_.end()) {
GELOGE(PARAM_INVALID, "Model id %u not found.", model_id);
return false;
}
return model_iter->second->DistributeTask();
}

bool ModelRunner::LoadModelComplete(uint32_t model_id) {
auto model_iter = runtime_models_.find(model_id);
if (model_iter == runtime_models_.end()) {
@@ -100,16 +91,6 @@ const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRunti
return model_iter->second->GetRuntimeInfoMap();
}

void *ModelRunner::GetModelHandle(uint32_t model_id) const {
auto model_iter = runtime_models_.find(model_id);
if (model_iter == runtime_models_.end()) {
GELOGW("Model id %u not found.", model_id);
return nullptr;
}

return model_iter->second->GetModelHandle();
}

bool ModelRunner::UnloadModel(uint32_t model_id) {
auto iter = runtime_models_.find(model_id);
if (iter != runtime_models_.end()) {


+ 4
- 6
src/ge/ge_runtime/runtime_model.cc View File

@@ -283,16 +283,14 @@ bool RuntimeModel::Load(uint32_t device_id, uint64_t session_id, std::shared_ptr
}

GenerateTask(device_id, session_id, davinci_model);
return status;
}

bool RuntimeModel::DistributeTask() {
bool status = LoadTask();
status = LoadTask();
if (!status) {
GELOGE(FAILED, "DistributeTask failed");
return false;
return status;
}
return true;

return status;
}

bool RuntimeModel::Run() {


+ 0
- 2
src/ge/ge_runtime/runtime_model.h View File

@@ -35,12 +35,10 @@ class RuntimeModel {
~RuntimeModel();

bool Load(uint32_t device_id, uint64_t session_id, std::shared_ptr<DavinciModel> &davinci_model);
bool DistributeTask();
bool LoadComplete();
const std::vector<uint32_t> &GetTaskIdList() const;
const std::vector<uint32_t> &GetStreamIdList() const;
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &GetRuntimeInfoMap() const { return runtime_info_map_; }
rtModel_t GetModelHandle() const { return rt_model_handle_; }
bool Run();
bool CopyInputData(const InputData &input_data);
bool GetInputOutputDescInfo(bool zero_copy, std::vector<InputOutputDescInfo> *input_desc,


+ 45
- 21
src/ge/generator/ge_generator.cc View File

@@ -23,15 +23,15 @@
#include "common/util/error_manager/error_manager.h"
#include "framework/common/debug/ge_log.h"
#include "ge/ge_api.h"
#include "graph/ge_context.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/manager/graph_manager.h"
#include "graph/manager/util/rt_context_util.h"
#include "graph/opsproto_manager.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/type_utils.h"
#include "model/ge_model.h"
#include "init/gelib.h"
#include "model/ge_model.h"

using std::map;
using std::string;
@@ -46,6 +46,16 @@ const char *const kFileNameSuffix = "online";

std::map<ge::OpEngineType, std::string> engine_type_map{
{ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}};

bool ContainsDynamicInpus(const ge::OpDesc &op_desc) {
for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) {
if (tensor_desc->MutableShape().IsUnknownShape()) {
GELOGI("Contains unknown shape input. set is_dynamic_input to true.");
return true;
}
}
return false;
}
} // namespace

namespace ge {
@@ -55,6 +65,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi
GELOGI("CheckEngineType: use default engine.");
return SUCCESS;
}

// get op engine name
string op_engine_name;
auto iter = engine_type_map.find(engine_type);
@@ -65,6 +76,12 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi
GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type));
return FAILED;
}

if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) {
op_desc->SetOpEngineName(op_engine_name);
op_desc->SetOpKernelLibName(op_engine_name);
return SUCCESS;
}
// set op engine name and opkernelLib. when engine support
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
@@ -195,18 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) {

class GeGenerator::Impl {
public:
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models);
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models);

Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model);

Status SaveParams(GeModelPtr &ge_model, const string &type, const map<string, GeAttrValue> &attrs,
const vector<GeTensor> &inputs, const vector<GeTensor> &outputs);

Status GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id);
Status GenerateInfershapeGraph(const Graph &graph);

GraphManager graph_manager_;
SaveParam save_param_;
bool is_offline_ = true;
bool is_singleop_unregistered_ = false;

private:
static std::string Trim(const std::string &str);
@@ -280,10 +298,9 @@ Status GeGenerator::GenerateOnlineModel(const Graph &graph, const vector<GeTenso
}

Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) {
GraphId graph_id;
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);

Status ret = impl_->GenerateInfershapeGraph(graph, graph_id);
Status ret = impl_->GenerateInfershapeGraph(graph);
if (ret != SUCCESS) {
GELOGE(ret, "Dump infershape json failed");
if (impl_->graph_manager_.Finalize() != SUCCESS) {
@@ -422,11 +439,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
} else {
ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx);
}
GraphId graph_id;
GeRootModelPtr ge_root_model = nullptr;
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model);
Status ret = impl_->BuildModel(graph, inputs, ge_root_model);
if (ret != SUCCESS) {
GELOGE(ret, "Build model failed.");
if (impl_->graph_manager_.Finalize() != SUCCESS) {
@@ -478,6 +495,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
return PARAM_INVALID;
}

domi::GetContext().is_dynamic_input = ContainsDynamicInpus(*op_desc);

if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) {
impl_->is_singleop_unregistered_ = true;
}

// 0. Save original attributes.
OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc);
GE_CHECK_NOTNULL(op_desc_tmp);
@@ -494,9 +517,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
// 2. Create ComputeGraph.
string name = ge::CurrentTimeInStr() + "_" + model_file_name;
ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(name);
if (compute_graph == nullptr) {
return INTERNAL_ERROR;
}
GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR);

// 3. Add Node to ComputeGraph.
@@ -529,16 +549,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph);
GELOGI("ATC parser success in single op build.");

GraphId graph_id;
GeRootModelPtr ge_root_model = nullptr;
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, graph_id, ge_root_model));
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model));
map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs();
GE_CHECK_NOTNULL(ge_root_model);
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
if (name_to_ge_model.empty()) {
GELOGE(PARAM_INVALID, "GetSubgraphInstanceNameToModel is empty.");
return PARAM_INVALID;
}
GeModelPtr &ge_model = name_to_ge_model.begin()->second;
GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str());
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff));
@@ -608,7 +631,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr &
return SUCCESS;
}

Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id,
Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs,
GeRootModelPtr &ge_root_model) {
static GraphId id = 0;
const std::map<std::string, std::string> options;
@@ -627,19 +650,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
return INTERNAL_ERROR;
}
uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id);
if (is_singleop_unregistered_) {
ret = graph_manager_.BuildGraphForUnregisteredOp(id, inputs, ge_root_model, session_id);
} else {
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id);
}

if (ret != SUCCESS) {
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id);
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
}

graph_id = id;
id += 1;

return SUCCESS;
}

Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id) {
Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) {
static GraphId id = 0;
const std::map<std::string, std::string> options;
Status ret = graph_manager_.AddGraph(id, graph, options);
@@ -654,8 +680,6 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed");
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED;
}

graph_id = id;
id += 1;

return SUCCESS;


+ 16
- 23
src/ge/graph/build/graph_builder.cc View File

@@ -37,21 +37,6 @@ const int32_t kInvalidPerfLevel = -1;
namespace ge {
GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {}

Status GraphBuilder::MarkGraph(ComputeGraphPtr &graph) {
GE_CHECK_NOTNULL(graph);
bool is_unknown_shape = false;
for (const auto &node : graph->GetDirectNode()) {
GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape),
"Get node[%s] shape status failed!", node->GetName().c_str());
if (is_unknown_shape) {
break;
}
}
graph->SetGraphUnknownFlag(is_unknown_shape);
GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape);
return SUCCESS;
}

void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) {
stream_max_parallel_num_ = options.stream_max_parallel_num;
hcom_parallel_ = options.hcom_parallel;
@@ -277,14 +262,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
uint64_t session_id) {
GELOGI("Start to build BuildForDynamicShape for dynamic shape.");
// mark unknown shape attr
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) {
auto status = MarkGraph(sub_graph);
if (status != SUCCESS) {
GELOGE(FAILED, "mark graph failed!");
return status;
}
}
// Update Root Graph Data size
for (auto &node : comp_graph->GetDirectNode()) {
auto op_desc = node->GetOpDesc();
@@ -297,11 +274,22 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
}
//
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) {
// exclude functional subgraph in known subgraph
if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
continue;
}
if (sub_graph->GetGraphUnknownFlag()) {
// unknown shape build flow
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),
"Build for unknown shape graph failed.");
} else {
// reset functional subgraph parent graph as known subgraph
for (const auto &node : sub_graph->GetDirectNode()) {
for (const auto &sub_graph_name : node->GetOpDesc()->GetSubgraphInstanceNames()) {
auto sub_sub_graph = comp_graph->GetSubgraph(sub_graph_name);
GE_CHK_STATUS_RET(sub_graph->AddSubgraph(sub_sub_graph), "Failed add subgraph to known graph.");
}
}
// known shape build flow
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id),
"Build for known shape graph failed.");
@@ -450,6 +438,11 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
GELOGI("Begin to calc dynamic shape graph data[%s] size.", op_desc->GetName().c_str());
// data op only has one output anchor
ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0);
if (output_desc.MutableShape().IsUnknownShape()) {
GELOGI("No need to update dynamic shape graph data output size for unknown shape data.");
return SUCCESS;
}

int64_t output_size = 0;
if (ge::TensorUtils::GetSize(output_desc, output_size) != SUCCESS) {
GELOGW("Get size failed!");


+ 0
- 1
src/ge/graph/build/graph_builder.h View File

@@ -67,7 +67,6 @@ class GraphBuilder {
GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID);
Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr,
uint64_t session_id = INVALID_SESSION_ID);
Status MarkGraph(ComputeGraphPtr &graph);
int build_mode_;

std::map<std::string, int> stream_max_parallel_num_;


+ 164
- 45
src/ge/graph/build/memory/block_mem_assigner.cc View File

@@ -55,6 +55,13 @@ using std::unordered_map;
using std::unordered_set;
using std::vector;

void AlignMemOffset(size_t &mem_align_size) {
if (mem_align_size <= 0) {
return;
}
mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
}

void MemoryBlock::SetHeadOffset(size_t offset) {
head_offset_ = offset;
size_t child_offset = head_offset_;
@@ -92,7 +99,7 @@ void MemoryBlock::Resize() {
} else {
size_t block_size = (child_block_size > *iter) ? child_block_size : *iter;
if ((block_size > 0) && (block_size % MEM_ALIGN_SIZE != 0)) {
block_size = (block_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
AlignMemOffset(block_size);
}
block_size_ = block_size;
if (last_continuous_block_) {
@@ -101,6 +108,20 @@ void MemoryBlock::Resize() {
}
}

size_t MemoryBlock::AlignSize() const {
size_t align_block_size = 0;
auto iter = std::max_element(real_size_list_.begin(), real_size_list_.end());
if (iter == real_size_list_.end()) {
GELOGW("real_size_list_ is empty");
} else {
align_block_size = *iter;
if ((align_block_size > 0) && (align_block_size % MEM_ALIGN_SIZE != 0)) {
AlignMemOffset(align_block_size);
}
}
return align_block_size;
}

bool MemoryBlock::IsSameLabel(std::string &first_batch_label) {
if (node_type_index_list_.empty()) {
return false;
@@ -133,31 +154,69 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) {
}

bool CanNotLifeReuse(MemoryBlock *block) {
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_) {
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_) {
return true;
}
return false;
}

void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) {
// continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block
auto it_block = std::max_element(std::begin(block->NoAlignSizeList()), std::end(block->NoAlignSizeList()));
auto it_this = std::max_element(std::begin(NoAlignSizeList()), std::end(NoAlignSizeList()));
if (it_block != std::end(block->NoAlignSizeList()) && it_this != std::end(NoAlignSizeList())) {
if ((continuous_block_ && block->continuous_block_) || (continuous_block_ && (*it_this < *it_block)) ||
(block->continuous_block_ && (*it_this > *it_block))) {
GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", *it_this,
continuous_block_, *it_block, block->continuous_block_);
return;
}
}

MemoryBlock *parent = nullptr;
MemoryBlock *child = nullptr;
// merge small block to large block
if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) {
if ((block->child_offset_ + AlignSize()) <= *it_block) {
parent = block;
child = this;
}
}
if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) {
parent->child_blocks_.emplace_back(child);
parent->child_offset_ += child->AlignSize();
child->deleted_block_ = true;
GELOGI(
"Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to"
" block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]",
child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent,
parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd());
}
}

void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) {
if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) {
return;
}
if (block->continuous_block_) {
AddContinuousLifeReuseBlock(block, total_node_depend_stream_life);
return;
}
MemoryBlock *parent = nullptr;
MemoryBlock *child = nullptr;
// merge small block to large block
if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) {
if ((child_offset_ + block->block_size_) <= block_size_) {
if ((child_offset_ + block->AlignSize()) <= AlignSize()) {
parent = this;
child = block;
} else if ((block->child_offset_ + block_size_) <= block->block_size_) {
} else if ((block->child_offset_ + AlignSize()) <= block->AlignSize()) {
parent = block;
child = this;
}
}
if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) {
parent->child_blocks_.emplace_back(child);
parent->child_offset_ += child->block_size_;
parent->child_offset_ += child->AlignSize();
child->deleted_block_ = true;
GELOGI(
"Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to"
@@ -431,7 +490,7 @@ size_t GetBlockSize(size_t size, const vector<int64_t> &ranges) {
}

GELOGW("Memory needed size:%zu is beyond the biggest block in memory ranges.", size);
return 0;
return size;
}

bool IsDirectOutputNode(const NodePtr &node, int idx) {
@@ -465,34 +524,8 @@ void ReduceReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t
}

bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block,
size_t block_size, size_t real_size, bool continuous, int64_t atomic_addr_clean_id) {
size_t block_size, size_t real_size, bool continuous) {
bool can_reuse = false;

// If node is before atomic_addr_clean node, the continus memory can't be reused.
if (!reusable_block.NodeTypeIndexList().empty()) {
auto node = reusable_block.NodeTypeIndexList()[0].node;
if (node != nullptr) {
auto op_desc = node->GetOpDesc();
if (op_desc != nullptr) {
if ((op_desc->GetId() < atomic_addr_clean_id) && continuous) {
return false;
}
}
}
}

// continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block
if (continuous || reusable_block.continuous_block_) {
auto it =
std::max_element(std::begin(reusable_block.NoAlignSizeList()), std::end(reusable_block.NoAlignSizeList()));
if (it != std::end(reusable_block.NoAlignSizeList())) {
GE_IF_BOOL_EXEC((continuous && reusable_block.continuous_block_) || (continuous && (real_size < *it)) ||
(reusable_block.continuous_block_ && (real_size > *it)),
GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d",
real_size, continuous, *it, reusable_block.continuous_block_);
return false;);
}
}
if (reusable_block.Size() == block_size) {
can_reuse = true;
} else {
@@ -683,6 +716,34 @@ void BlockMemAssigner::PrintSymbolMap() {
}
}

bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) {
if (n == nullptr) {
GELOGE(FAILED, "Node is null.");
return false;
}

// Get the continuous output type of the node, default is false
bool is_output_continuous = false;
auto node_desc = n->GetOpDesc();
if (node_desc == nullptr) {
GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str());
return false;
}

// If GetBool fail, is_output_continuous is false.
(void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
if (is_output_continuous) {
if (n->GetOwnerComputeGraph() != nullptr) {
string graph_name = n->GetOwnerComputeGraph()->GetName();
GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), n->GetName().c_str(),
n->GetAllOutDataAnchorsSize());
return true;
}
}

return false;
}

MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size,
MemoryType mem_type, const NodePtr &n, uint32_t out_index,
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem,
@@ -699,7 +760,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) &&
reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index));
auto stream_id = node_op_desc->GetStreamId();
if (is_reuse_memory) {
if (is_reuse_memory && !continuous) {
for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) {
MemoryBlock *reusable_block = *it;
if (!IsPostReuse(reusable_block)) {
@@ -709,8 +770,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
}

// A node can reuse blocks of the same stream and preorder streams
auto id = GetAtomicAddrCleanId();
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id)) {
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) {
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size);
if (mem_type == kOutput) {
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString());
@@ -750,6 +810,47 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
return block;
}

MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
const bool is_op_reuse_mem) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
auto node_op_desc = n->GetOpDesc();
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");
MemoryBlock *block = nullptr;
int64_t total_size = 0;
for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) {
auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
if (output_op_desc == nullptr) {
return nullptr;
}
int64_t size = 0;
if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) {
GELOGI("Get size failed");
return nullptr;
}
size_t align_size = static_cast<size_t>(size);
AlignMemOffset(align_size);
total_size += align_size;

// only apply total size in first block
if (index != 0) {
zero_memory_list_.emplace_back(n, kOutput, index);
}
}

auto block_size = GetBlockSize(total_size, ranges);
GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size,
block_size);

vector<bool> workspace_reuse_flag;
block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true);
if (block != nullptr) {
// hccl task need align header and tail
block->first_continuous_block_ = true;
block->last_continuous_block_ = true;
}
return block;
}

MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges,
const bool is_op_reuse_mem, const bool continuous) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
@@ -991,6 +1092,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
// Allocate memory for the current node and release node memory of the same size in the workspace
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);)
if (IsContinuousOutput(node)) {
(void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
return SUCCESS;
}
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
int64_t size = 0;
auto output_op_desc = op_desc->GetOutputDescPtr(i);
@@ -1017,7 +1122,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
continue;
}
// atomic can't be reused
if (is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic) {
bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic;
if (need_change) {
is_op_reuse_mem_ = false;
}
MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input);
@@ -1225,10 +1331,12 @@ static bool CompareBlockIndex(MemoryBlock *left, MemoryBlock *right) {
/// @param [in] input blocks need continuous
/// @param [out] blocks after continuous order
/// @param [in/out] blocks ordered
/// @param [in] input or output
///
void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks,
const std::map<MemoryBlock *, uint32_t> block_map,
std::vector<MemoryBlock *> &dest_blocks, std::vector<MemoryBlock *> &continuous_blocks) {
std::vector<MemoryBlock *> &dest_blocks, std::vector<MemoryBlock *> &continuous_blocks,
const std::string &type) {
for (auto &memory_block : org_blocks) {
if (memory_block == nullptr || memory_block->deleted_block_) {
continue;
@@ -1245,7 +1353,7 @@ void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks,
for (auto &memory_block : continuous_blocks) {
GE_IF_BOOL_EXEC(memory_block == nullptr, continue);

GELOGI("Block continuous input index:%d", memory_block->input_index_);
GELOGI("Block continuous %s index:%d", type.c_str(), memory_block->input_index_);
count++;
if (count == 1) {
memory_block->first_continuous_block_ = true;
@@ -1280,7 +1388,7 @@ void BlockMemAssigner::AssignContinuousBlocks() {
continuous_block_map.size(), continuous_blocks.size());
continue;
}
ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks);
ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks, "input");
memory_blocks_.swap(dest_memory_blocks);
}
}
@@ -1292,14 +1400,25 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) {
}
for (size_t i = 0; i < memory_blocks_.size(); ++i) {
auto parent = memory_blocks_[i];
if (parent == nullptr || parent->deleted_block_) {
if (parent == nullptr || parent->deleted_block_ || parent->continuous_block_) {
continue;
}
if (parent->reuse_mem_ && !IsPostReuse(parent)) {
parent->reuse_mem_ = false;
}
for (size_t j = i + 1; j < memory_blocks_.size(); ++j) {
parent->AddLifeReuseBlock(memory_blocks_[j], total_node_depend_stream_life_);
auto child = memory_blocks_[j];
if (child == nullptr) {
continue;
}
// If node is before atomic_addr_clean node, the continus memory can't be reused.
if (!parent->NodeTypeIndexList().empty() && child->continuous_block_) {
auto node = parent->NodeTypeIndexList()[0].node;
if (node == nullptr || node->GetOpDesc() == nullptr || (node->GetOpDesc()->GetId() < GetAtomicAddrCleanId())) {
continue;
}
}
parent->AddLifeReuseBlock(child, total_node_depend_stream_life_);
}
}
}
@@ -1450,8 +1569,8 @@ Status BlockMemAssigner::Assign() {

bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
(node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) ||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
(node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE);
(node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || (node_type == ASSIGNADD) ||
(node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
(node_type == HVDCALLBACKBROADCAST);
}
} // namespace ge

+ 8
- 0
src/ge/graph/build/memory/block_mem_assigner.h View File

@@ -90,6 +90,8 @@ class MemoryBlock {
}
size_t Size() const { return block_size_; }

size_t AlignSize() const;

void SetHeadOffset(size_t offset);

void SetTailOffset(size_t offset);
@@ -118,6 +120,8 @@ class MemoryBlock {

bool IsSameLabel(std::string &first_batch_label);

void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life);

void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life);

void SetLifeTimeEnd(size_t time);
@@ -362,6 +366,10 @@ class BlockMemAssigner : public MemAssigner {
///
void ReuseBlocksByLifeTime(size_t range_size);

bool IsContinuousOutput(const NodePtr &n);

MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);

std::unordered_map<int64_t, std::vector<MemoryBlock *>> reusable_blocks_;

std::map<std::string, uint64_t> reusable_block_counts_;


+ 193
- 79
src/ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -293,7 +293,8 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
} else if (is_loop_graph) {
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start));
} else {
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, mem_clean_start, mem_clean_size), "SetAtomicCleanAttr failed.");
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}),
"SetAtomicCleanAttr failed.");
}
}
}
@@ -441,35 +442,33 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node
GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
vector<int64_t> output_list = out_op_desc->GetOutputOffset();

if (out_op_desc->GetOutputsSize() > output_list.size()) {
if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
out_op_desc->GetOutputsSize(), output_list.size());
return ge::FAILED;
}

memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE;
size_t mem_offset = output_list[0];
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_;
size_t pre_mem_offset = memory_offset_[0].mem_offset_;

output_list[out_data_anchor->GetIdx()] = mem_offset;
int64_t tensor_desc_size = 0;
if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) !=
ge::SUCCESS) {
GELOGE(FAILED, "GetSize failed.");
return FAILED;
}
memory_offset_[0].mem_offset_ += tensor_desc_size;

AlignMemOffset(MEM_ALIGN_SIZE);
mem_offset += tensor_desc_size;
if (mem_offset <= 0) {
return FAILED;
}
mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
GELOGI(
"[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
"[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
"real_size[%ld].",
node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
pre_mem_offset, out_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size);
output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size);
}

out_op_desc->SetOutputOffset(output_list);
memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE;
return ge::SUCCESS;
}

@@ -809,14 +808,12 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt
}

Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
if (compute_graph_ == nullptr) {
GELOGE(ge::PARAM_INVALID, "Graph must not be null.");
return ge::PARAM_INVALID;
}
GE_CHECK_NOTNULL(compute_graph_);
// Atomic op memory start addr
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_);

vector<NodePtr> connect_netoutput_nodes;
for (auto &node : compute_graph_->GetAllNodes()) {
auto node_op_desc = node->GetOpDesc();
if (node_op_desc == nullptr) {
@@ -839,36 +836,20 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
return ge::PARAM_INVALID;
}

// Atomic op memory start addr of loop graph
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);

// Reassign atomic node output memory
Status ret = AssignAtomicOutputMemory(node);
if (ret != SUCCESS) {
GELOGE(ret, "Assign atomic output memory failed, node is %s.", node_op_desc->GetName().c_str());
return ret;
vector<int> is_connect_netoutput;
// If GetBool fail, attr is_connect_netoutput is an empty vector.
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
if (!is_connect_netoutput.empty()) {
connect_netoutput_nodes.emplace_back(node);
continue;
}

// Check atomic workspace
map<string, map<int64_t, int64_t>> sub_node_workspace_info;
sub_node_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, sub_node_workspace_info);
if (!sub_node_workspace_info.empty()) {
bool is_fusion_node = false;
// If GetBool fail, is_fusion_node is false.
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);

if (is_fusion_node) {
// Assign fusion atomic node workspace memory
ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info);
} else {
// Assign single ordinary atomic node workspace memory, not include fusion node
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info);
}

if (ret != SUCCESS) {
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
return ret;
}
// Atomic op memory start addr of loop graph
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
vector<int64_t> mem_offset_end;
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
return FAILED;
}

/// In networks with loop op, atomic op uses atomic_addr_clean op independently,
@@ -883,13 +864,80 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
// Set the address attr of atomic clean operator
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
if (atomic_mem_size != 0) {
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, atomic_mem_start, atomic_mem_size), "SetAtomicCleanAttr failed.");
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}),
"SetAtomicCleanAttr failed.");
}
}

if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) {
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
return FAILED;
}

return SUCCESS;
}

Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
vector<int64_t> &mem_offset_end) {
auto node_op_desc = node->GetOpDesc();
// Assign atomic node output memory
Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
if (ret != SUCCESS) {
GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
return ret;
}

// Check and assign atomic node workspace memory
map<string, map<int64_t, int64_t>> atomic_workspace_info;
atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
if (!atomic_workspace_info.empty()) {
bool is_fusion_node = false;
// If GetBool fail, is_fusion_node is false.
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);

if (is_fusion_node) {
// Assign fusion atomic node workspace memory
ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
} else {
// Assign single ordinary atomic node workspace memory, not include fusion node
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
}
if (ret != SUCCESS) {
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
return ret;
}
}

return SUCCESS;
}

Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
for (auto &node : connect_netoutput_nodes) {
GE_CHECK_NOTNULL(node);
if (node->GetOpDesc() == nullptr) {
GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
continue;
}

// Atomic memory start addr
int64_t original_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
vector<int64_t> mem_offset_end;
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
return FAILED;
}

// All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) {
GELOGE(FAILED, "Failed to set atomic attr separately.");
return FAILED;
}
}
return SUCCESS;
}

Status GraphMemoryAssigner::AssignReferenceMemory() {
for (auto &node : compute_graph_->GetDirectNode()) {
// Get the reference type of the node, default is false
@@ -971,9 +1019,10 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
return true;
}

Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) {
Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
auto op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
mem_offset_end.clear();
GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());

vector<int64_t> atomic_output_index;
@@ -996,24 +1045,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) {

// If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
bool is_assigned_mem = false;
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index);
return ge::PARAM_INVALID;
}
auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
GE_CHECK_NOTNULL(out_data_anchor);
auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
for (auto &input_anchor : input_anchors) {
auto output_node = input_anchor->GetOwnerNode();

/// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
/// has been assigned
vector<int64_t> atomic_input_index;
(void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
is_assigned_mem = true;
break;
}
if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
return ge::FAILED;
}

// If you have already assigned an atomic address, skip it, and you don't need to reassign it.
@@ -1038,6 +1072,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) {

memory_offset_[0].mem_offset_ += size;
AlignMemOffset(MEM_ALIGN_SIZE);
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
}

op_desc->SetOutputOffset(output_list);
@@ -1045,8 +1080,33 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) {
return ge::SUCCESS;
}

Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
bool &is_mem_assigned) {
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index);
return ge::PARAM_INVALID;
}
auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
GE_CHECK_NOTNULL(out_data_anchor);
auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
for (auto &input_anchor : input_anchors) {
auto output_node = input_anchor->GetOwnerNode();

/// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
/// has been assigned
vector<int64_t> atomic_input_index;
(void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
is_mem_assigned = true;
break;
}
}
return SUCCESS;
}

Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
map<string, map<int64_t, int64_t>> &workspace_info) {
map<string, map<int64_t, int64_t>> &workspace_info,
vector<int64_t> &mem_offset_end) {
GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
vector<int64_t> workspace_vector = op_desc->GetWorkspace();

@@ -1078,6 +1138,7 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc
op_desc->GetStreamId(), workspace_size, workspace_size);

memory_offset_[0].mem_offset_ += workspace_size;
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
}
}
op_desc->SetWorkspace(workspace_vector);
@@ -1086,7 +1147,8 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc
}

Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
map<string, map<int64_t, int64_t>> &workspace_info) {
map<string, map<int64_t, int64_t>> &workspace_info,
vector<int64_t> &mem_offset_end) {
GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
map<string, map<int64_t, int64_t>> sub_node_workspace_offset;

@@ -1108,6 +1170,7 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt
op_desc->GetStreamId(), workspace_size, workspace_size);

memory_offset_[0].mem_offset_ += workspace_size;
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
index_offset.insert(std::make_pair(workspace_index, workspace_offset));
}
sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
@@ -1287,6 +1350,47 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
return SUCCESS;
}

Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
const vector<int64_t> &mem_offset_end) {
GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);

// Parsing offset and size vectors
vector<int64_t> memory_offset_start;
vector<int64_t> memory_offset_size;
memory_offset_start.emplace_back(atomic_mem_start);
for (size_t i = 0; i < mem_offset_end.size(); ++i) {
memory_offset_start.emplace_back(mem_offset_end[i]);
// Number 1 means element index
auto size = memory_offset_start[i + 1] - memory_offset_start[i];
memory_offset_size.emplace_back(size);
}
memory_offset_start.pop_back();

const auto &in_control_anchor = node->GetInControlAnchor();
if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
if (peer_out_control_anchor == nullptr) {
continue;
}
auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
auto peer_out_node_desc = peer_out_node->GetOpDesc();
if (peer_out_node_desc == nullptr) {
continue;
}

GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) {
GELOGE(FAILED, "Set atomic clean attr failed.");
return FAILED;
}
}
}
}
return SUCCESS;
}

Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) {
// set the address attr of atomic clean operator for loop graph
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
@@ -1308,7 +1412,7 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6
peer_out_node_desc->GetType().c_str());

if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, atomic_mem_start, atomic_mem_size),
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}),
GELOGE(FAILED, "SetAtomicCleanAttr failed.");
return FAILED);
}
@@ -1317,8 +1421,8 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6
return SUCCESS;
}

ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t atomic_mem_start,
int64_t atomic_mem_size) {
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start,
const vector<int64_t> &atomic_mem_size) {
for (ge::NodePtr &node : compute_graph_->GetAllNodes()) {
auto node_op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
@@ -1327,15 +1431,15 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato
((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) {
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
workspace_vector.emplace_back(atomic_mem_start);
workspace_byte_vector.emplace_back(atomic_mem_size);
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
node_op_desc->SetWorkspace(workspace_vector);
node_op_desc->SetWorkspaceBytes(workspace_byte_vector);

std::vector<int64_t> mem_start_vector;
// If GetListInt fail, mem_start_vector is empty.
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
mem_start_vector.emplace_back(atomic_mem_start);
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
GELOGE(FAILED, "SetListInt failed.");
return FAILED);
@@ -1343,16 +1447,26 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato
std::vector<int64_t> mem_size_vector;
// If GetListInt fail, mem_size_vector is empty.
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
mem_size_vector.emplace_back(atomic_mem_size);
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
GELOGE(FAILED, "SetListInt failed.");
return FAILED);

GELOGI(
"[IMAS]SetAtomicCleanAttr : Set %s name[%s] output[%d] offset to [%ld] streamid[%ld] size[%ld] "
"realsize[%ld].",
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), 0, atomic_mem_start,
node->GetOpDesc()->GetStreamId(), atomic_mem_size, atomic_mem_size);
std::stringstream ss;
for (auto iter : atomic_mem_start) {
ss << iter << " ";
}
string atomic_mem_start_str = ss.str();
ss.clear();
ss.str("");
for (auto iter : atomic_mem_size) {
ss << iter << " ";
}
string atomic_mem_size_str = ss.str();

GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
}
}
return SUCCESS;


+ 16
- 5
src/ge/graph/build/memory/graph_mem_assigner.h View File

@@ -147,22 +147,33 @@ class GraphMemoryAssigner {
///
bool CheckInputIsSupportAtomic(const ge::NodePtr &node);

ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node);
ge::Status GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, bool &is_mem_assigned);

ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node, std::vector<int64_t> &mem_offset_end);

ge::Status AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info);
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info,
std::vector<int64_t> &mem_offset_end);

ge::Status AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info);
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info,
std::vector<int64_t> &mem_offset_end);

ge::Status AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, std::vector<int64_t> &mem_offset_end);

ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes);

ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
const std::vector<int64_t> &mem_offset_end);
///
/// @brief set loop graph atomic attr
/// @param node
/// @param node, atomic memory assignment start offset
/// @param atomic_mem_start: atomic op memory start address
///
ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start);

ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, int64_t atomic_mem_start, int64_t atomic_mem_size);
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start,
const std::vector<int64_t> &atomic_mem_size);

void AlignMemOffset(const int64_t &mem_align_size);



+ 26
- 0
src/ge/graph/build/model_builder.cc View File

@@ -42,10 +42,12 @@
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "graph/passes/memcpy_addr_async_pass.h"
#include "init/gelib.h"
#include "memory/memory_assigner.h"
#include "omg/version.h"
#include "register/op_registry.h"
#include "graph/passes/set_input_output_offset_pass.h"

using std::map;
using std::set;
@@ -668,12 +670,36 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(label_num_), "Assign label failed.");
GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels");

// Add memcpy_addr_async node.
rtFeatureType_t feature_type = FEATURE_TYPE_MEMCPY;
int32_t feature_info = MEMCPY_INFO_SUPPORT_ZEROCOPY;
int64_t value = 0;
rtError_t rt_ret = rtGetRtCapability(feature_type, feature_info, &value);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtGetRtCapability failed.");
return RT_FAILED;
} else {
if (value == RT_CAPABILITY_SUPPORT) {
GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode);
MemcpyAddrAsyncPass memcpy_addr;
GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed.");
GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run.");
} else {
GELOGW("rtGetRtCapability not support memcpy_addr_async.");
}
}

GE_TIMESTAMP_START(AssignMemory);
MemoryAssigner mem_assigner(compute_graph_);
GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_offset_, zero_copy_mem_size_),
"Assign Memory Failed!");
GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory");

GE_TIMESTAMP_START(SetInputOutputOffset);
SetInputOutputOffsetPass input_output_offset;
GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed.");
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run.");

// Compile single op in graph build stage
GE_TIMESTAMP_START(CompileSingleOp);
GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail.");


+ 39
- 0
src/ge/graph/build/stream_allocator.cc View File

@@ -612,6 +612,33 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr
AttrUtils::HasAttr(activate_stream_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE)) {
return false;
}

///
/// stream_0 --> stream_2 --> stream_3 --> stream_4
/// /\ |
/// | \/
/// | stream_1 --> stream_5 --> stream_6 --> stream_7
/// | /\ | |
/// | | \/ |
/// | |---------- stream_8 |
/// | |
/// |-----------------------------------------------------------|
///
/// Exit1(S7) Exit2(S7) Exit3(S7)
/// \ / |
/// AddN(S1) NextIteration(S7)
/// | |
/// NextIteration(S1) /
/// | /
/// | /
/// StreamActive(S7)
///
/// Event between Exit1/Exit2 and AddN should not be optimized
///
if (IsActiveAfterNextIteration(activate_stream_node)) {
continue;
}

visited_nodes.insert(activate_stream_node);
// nodes in stream link to streamActivate no need to add event before activated node
for (const auto &pre_activate_stream_node : activate_stream_node->GetInNodes()) {
@@ -639,6 +666,18 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr
return false;
}

bool StreamAllocator::IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const {
if ((active_node_ptr == nullptr) || active_node_ptr->GetInControlNodes().empty()) {
return false;
}
for (const auto &in_node : active_node_ptr->GetInControlNodes()) {
if ((in_node->GetType() != NEXTITERATION) && (in_node->GetType() != REFNEXTITERATION)) {
return false;
}
}
return true;
}

// Split the stream according to the maximum number of nodes in the stream.
Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) {
if (enable_single_stream_ || stream_num_ == 0) {


+ 1
- 0
src/ge/graph/build/stream_allocator.h View File

@@ -55,6 +55,7 @@ class StreamAllocator {
Status OptimizeByStreamActivate();
// Determine if the successor node of RecvNode is directly or indirectly activated by the SendNode precursor node
bool IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const;
bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const;

Status SplitStreams(std::vector<std::set<int64_t>> &split_streams);
bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const;


+ 27
- 7
src/ge/graph/execute/graph_execute.cc View File

@@ -86,10 +86,10 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) {
return SUCCESS;
}

Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) {
Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->SetDynamicSize(model_id, batch_num);
Status ret = model_manager->SetDynamicSize(model_id, batch_num, dynamic_type);
if (ret != SUCCESS) {
GELOGE(FAILED, "SetDynamicSize failed");
return ret;
@@ -486,12 +486,14 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp
/// @brief Get dynamic batch_info
/// @param [in] model_id
/// @param [out] batch_info
/// @param [out] dynamic_type
/// @return execute result
///
Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) {
Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info);
Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(ret, "GetDynamicBatchInfo failed.");
return ret;
@@ -499,12 +501,30 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::ve
return SUCCESS;
}

Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
///
/// @ingroup ge
/// @brief Get combined dynamic dims info
/// @param [in] model_id
/// @param [out] batch_info
/// @return execute result
///
Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetCombinedDynamicDims(model_id, batch_info);
if (ret != SUCCESS) {
GELOGE(ret, "GetCombinedDynamicDims failed.");
return ret;
}
return SUCCESS;
}

Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->GetCurShape(model_id, batch_info);
Status ret = model_manager->GetCurShape(model_id, batch_info, dynamic_type);
if (ret != SUCCESS) {
GELOGE(FAILED, "GetCurShape failed");
GELOGE(ret, "GetCurShape failed");
return ret;
}
return SUCCESS;


+ 14
- 3
src/ge/graph/execute/graph_execute.h View File

@@ -56,7 +56,7 @@ class GraphExecutor {

Status SetGraphContext(GraphContextPtr graph_context_ptr);

static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num);
static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type);

void SetTrainFlag(bool is_train_graph);

@@ -80,11 +80,22 @@ class GraphExecutor {
/// @brief Get dynamic batch_info
/// @param [in] model_id
/// @param [out] batch_info
/// @param [out] dynamic_type
/// @return execute result
///
static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type);

static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);
///
/// @ingroup ge
/// @brief Get combined dynamic dims info
/// @param [in] model_id
/// @param [out] batch_info
/// @return execute result
///
static Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);



+ 1
- 2
src/ge/graph/label/while_label_maker.cc View File

@@ -104,12 +104,11 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) {
GE_CHECK_NOTNULL(cond_out_desc);

GeTensorDesc pred_desc = cond_out_desc->GetInputDesc(kCondOutputIndex);
GeTensorDesc cond_desc(GeShape(pred_desc.GetShape().GetDims()), pred_desc.GetFormat(), DT_INT32);

// false ==> 0 ==> switch_labels[0] ==> body_leave_index
// true ==> 1 ==> switch_labels[1] ==> body_enter_name
const std::vector<uint32_t> switch_labels = {body_leave_index, body_enter_index};
NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, cond_desc, switch_labels);
NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, pred_desc, switch_labels);
if (switch_node == nullptr) {
GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", cond_graph->GetName().c_str());
return FAILED;


+ 8
- 8
src/ge/graph/load/graph_loader.cc View File

@@ -36,20 +36,20 @@ GraphLoader::~GraphLoader() = default;
Status GraphLoader::UnloadModel(uint32_t model_id) {
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
GELOGI("UnLoad model begin, model_id:%u.", model_id);
GELOGI("UnLoad model begin, model id:%u.", model_id);

Status ret = model_manager->Stop(model_id);
if (ret != SUCCESS) {
GELOGE(ret, "UnloadModel: Stop failed.");
GELOGE(ret, "UnloadModel: Stop failed. model id:%u", model_id);
}

ret = model_manager->Unload(model_id);
if (ret != SUCCESS) {
GELOGE(ret, "UnloadModel: Unload failed.");
GELOGE(ret, "UnloadModel: Unload failed. model id:%u", model_id);
CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_UNLOAD);
return ret;
}
GELOGI("UnLoad model success, model_id:%u.", model_id);
GELOGI("UnLoad model success, model id:%u.", model_id);
return SUCCESS;
}

@@ -123,14 +123,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
Status ret;
try {
if (!CheckInputPathValid(path)) {
GELOGE(PARAM_INVALID, "model path is invalid: %s", path.c_str());
return PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return GE_EXEC_MODEL_PATH_INVALID;
}

GELOGI("Load model begin, model path is: %s", path.c_str());
if (!key_path.empty() && !CheckInputPathValid(key_path)) {
GELOGE(PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return GE_EXEC_MODEL_KEY_PATH_INVALID;
}

ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);


+ 46
- 54
src/ge/graph/load/new_model_manager/cpu_queue_schedule.cc View File

@@ -16,6 +16,7 @@

#include "graph/load/new_model_manager/cpu_queue_schedule.h"
#include "common/debug/ge_log.h"
#include "common/debug/log.h"

namespace {
const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch
@@ -58,7 +59,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}
in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo);
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
@@ -69,7 +70,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

return SUCCESS;
@@ -84,7 +85,7 @@ Status CpuTaskModelDequeue::Distribute() {
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

GELOGI("Cpu kernel launch model dequeue task success.");
@@ -98,24 +99,24 @@ Status CpuTaskModelDequeue::Distribute() {
/// @param [in] outside_addrs: model input/output memory addr
/// @return: 0 for success / others for failed
///
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list,
std::map<const void *, std::vector<void *>> &outside_addrs) {
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) {
if ((args_ != nullptr) || (args_size_ > 0)) {
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
}

args_size_ = sizeof(AddrMapInfo);
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
}
GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM));
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)

AddrMapInfo addr_map_info;
for (const auto &addrs : outside_addrs) {
addr_map_info.addr_num += addrs.second.size();
for (auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) {
addr_map_info.addr_num += virtual_args_addr.second.size();
}
}
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);

@@ -123,38 +124,31 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list,
size_t index = 0;
vector<uint64_t> src_addrs;
vector<uint64_t> dst_addrs;
for (const auto &addrs : outside_addrs) {
for (size_t i = 0; i < addrs.second.size(); ++i) {
src_addrs.push_back(mbuf_list.at(index));
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(addrs.second.at(i))));
for (auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) {
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
src_addrs.push_back(mbuf_list.at(index));
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
}
}
index++;
}

// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
status = rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
}
status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(),
src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
}
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(),
src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
return RT_ERROR_TO_GE_STATUS(status);)

status = rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
}
GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(),
dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
}
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
return RT_ERROR_TO_GE_STATUS(status);)

// src_addr_list is init to src_addr, which is the point to src_addrs
if (!src_addrs.empty() && !dst_addrs.empty()) {
@@ -164,10 +158,8 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list,
}

status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
}
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
return RT_ERROR_TO_GE_STATUS(status);)
return SUCCESS;
}

@@ -180,7 +172,7 @@ Status CpuTaskZeroCopy::Distribute() {
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

GELOGI("Cpu kernel launch zero copy task success.");
@@ -225,7 +217,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}
out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo);
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
@@ -239,7 +231,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb
status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

return SUCCESS;
@@ -254,7 +246,7 @@ Status CpuTaskPrepareOutput::Distribute() {
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

GELOGI("Cpu kernel launch prepare output task success.");
@@ -279,7 +271,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)

@@ -289,7 +281,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

return SUCCESS;
@@ -304,7 +296,7 @@ Status CpuTaskModelEnqueue::Distribute() {
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

GELOGI("Cpu kernel launch model enqueue task success.");
@@ -336,7 +328,7 @@ Status CpuTaskActiveEntry::Distribute() {
rtError_t ret = rtStreamActive(active_stream_, stream_);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(ret);
}

GELOGI("Cpu kernel launch active entry task success.");
@@ -359,14 +351,14 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)

status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

return SUCCESS;
@@ -381,7 +373,7 @@ Status CpuTaskWaitEndGraph::Distribute() {
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

GELOGI("Cpu kernel launch wait end task success.");
@@ -404,14 +396,14 @@ Status CpuTaskModelRepeat::Init(uint32_t model_id) {
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)

status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

return SUCCESS;
@@ -426,7 +418,7 @@ Status CpuTaskModelRepeat::Distribute() {
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(status);
}

GELOGI("Cpu kernel launch repeat task success.");


+ 2
- 1
src/ge/graph/load/new_model_manager/cpu_queue_schedule.h View File

@@ -22,6 +22,7 @@

#include "common/ge_inner_error_codes.h"
#include "graph/load/new_model_manager/task_info/task_info.h"
#include "graph/load/new_model_manager/zero_copy_offset.h"
#include "runtime/kernel.h"

namespace ge {
@@ -93,7 +94,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo {
~CpuTaskZeroCopy() override;

Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, std::vector<void *>> &outside_addrs);
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);

Status Distribute() override;



+ 26
- 24
src/ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -487,8 +487,8 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in
size_t proto_size = op_mapping_info.ByteSizeLong();
bool ret = op_mapping_info.SerializeToString(&proto_str);
if (!ret || proto_size == 0) {
GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
return FAILED;
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
return PARAM_INVALID;
}

if (dev_mem_load_ != nullptr) {
@@ -499,20 +499,20 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in
rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size)

rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

load_flag_ = true;
@@ -525,8 +525,8 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_
size_t proto_size = op_mapping_info.ByteSizeLong();
bool ret = op_mapping_info.SerializeToString(&proto_str);
if (!ret || proto_size == 0) {
GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
return FAILED;
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
return PARAM_INVALID;
}

if (dev_mem_unload_ != nullptr) {
@@ -537,20 +537,20 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_
rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size)

rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
load_flag_ = false;
GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
@@ -588,18 +588,20 @@ Status DataDumper::LoadDumpInfo() {
task.mutable_op()->set_op_type(op_desc->GetType());

if (dump_properties_.GetDumpMode() == kDumpOutput) {
if (DumpOutput(op_iter, task) != SUCCESS) {
GELOGE(FAILED, "Dump output failed");
return FAILED;
Status ret = DumpOutput(op_iter, task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump output failed");
return ret;
}
op_mapping_info.mutable_task()->Add(std::move(task));
continue;
}
if (dump_properties_.GetDumpMode() == kDumpInput) {
if (op_iter.is_task) {
if (DumpInput(op_iter, task) != SUCCESS) {
GELOGE(FAILED, "Dump input failed");
return FAILED;
Status ret = DumpInput(op_iter, task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump input failed");
return ret;
}
}
op_mapping_info.mutable_task()->Add(std::move(task));
@@ -608,14 +610,14 @@ Status DataDumper::LoadDumpInfo() {
if (dump_properties_.GetDumpMode() == kDumpAll) {
auto ret = DumpOutput(op_iter, task);
if (ret != SUCCESS) {
GELOGE(FAILED, "Dump output failed when in dumping all");
return FAILED;
GELOGE(ret, "Dump output failed when in dumping all");
return ret;
}
if (op_iter.is_task) {
ret = DumpInput(op_iter, task);
if (ret != SUCCESS) {
GELOGE(FAILED, "Dump input failed when in dumping all");
return FAILED;
GELOGE(ret, "Dump input failed when in dumping all");
return ret;
}
}
op_mapping_info.mutable_task()->Add(std::move(task));
@@ -630,8 +632,8 @@ Status DataDumper::LoadDumpInfo() {
if (!op_list_.empty() || is_op_debug_) {
auto ret = ExecuteLoadDumpInfo(op_mapping_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Execute load dump info failed");
return FAILED;
GELOGE(ret, "Execute load dump info failed");
return ret;
}
}
return SUCCESS;
@@ -702,8 +704,8 @@ Status DataDumper::UnloadDumpInfo() {
}
auto ret = ExecuteUnLoadDumpInfo(op_mapping_info);
if (ret != SUCCESS) {
GELOGE(FAILED, "Execute unload dump info failed");
return FAILED;
GELOGE(ret, "Execute unload dump info failed");
return ret;
}
return SUCCESS;
}


+ 364
- 282
src/ge/graph/load/new_model_manager/davinci_model.cc
File diff suppressed because it is too large
View File


+ 34
- 30
src/ge/graph/load/new_model_manager/davinci_model.h View File

@@ -28,14 +28,15 @@
#include "common/helper/model_helper.h"
#include "common/helper/om_file_helper.h"
#include "common/opskernel/ge_task_info.h"
#include "common/types.h"
#include "common/properties_manager.h"
#include "common/types.h"
#include "framework/common/util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/aipp_utils.h"
#include "graph/load/new_model_manager/data_dumper.h"
#include "graph/load/new_model_manager/data_inputer.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/new_model_manager/aipp_utils.h"
#include "graph/load/new_model_manager/zero_copy_offset.h"
#include "graph/load/new_model_manager/zero_copy_task.h"
#include "graph/model.h"
#include "graph/node.h"
@@ -285,11 +286,20 @@ class DavinciModel {
/// @ingroup ge
/// @brief Get dynamic batch_info
/// @param [out] batch_info
/// @param [out] dynamic_type
/// @return execute result
///
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info) const;
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const;

///
/// @ingroup ge
/// @brief Get combined dynamic dims info
/// @param [out] batch_info
/// @return None
///
void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const;

void GetCurShape(std::vector<int64_t> &batch_info);
void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type);

void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);

@@ -416,7 +426,7 @@ class DavinciModel {
void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args,
size_t size, size_t offset);

void SetDynamicSize(const std::vector<uint64_t> &batch_num);
void SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type);

bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; }

@@ -456,6 +466,9 @@ class DavinciModel {
void *cur_args = static_cast<char *>(args_) + offset;
return cur_args;
}
void SetTotalIOAddrs(vector<void *> &io_addrs) {
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
}
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
int64_t GetFixedAddrsSize(string tensor_name);
void *GetCurrentFixedAddr(int64_t offset) const {
@@ -474,7 +487,8 @@ class DavinciModel {
Status MallocKnownArgs();
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
Status UpdateKnownZeroCopyAddr(vector<void *> &io_addrs, uint32_t args_offset);
Status UpdateKnownZeroCopyAddr();
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
@@ -513,22 +527,6 @@ class DavinciModel {

///
/// @ingroup ge
/// @brief Save Data address info for ZeroCopy.
/// @param [in] const std::vector<void *> &outside_addrs
/// @return None.
///
void SetInputOutsideAddr(const std::vector<void *> &outside_addrs);

///
/// @ingroup ge
/// @brief Save NetOutput address info for ZeroCopy.
/// @param [in] const std::vector<void *> &outside_addrs
/// @return None.
///
void SetOutputOutsideAddr(const std::vector<void *> &outside_addrs);

///
/// @ingroup ge
/// @brief Copy Check input size and model op size.
/// @param [in] const int64_t &input_size: input size.
/// @param [in] const int64_t &op_size: model op size.
@@ -564,7 +562,7 @@ class DavinciModel {
/// @param [in] batch_label: batch label for multi-batch scenes
/// @return SUCCESS handle successfully / others handle failed
///
Status UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> &data_info, bool is_input,
Status UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input,
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label);

Status CopyInputData(const InputData &input_data, bool device_data = false);
@@ -706,8 +704,7 @@ class DavinciModel {
///
Status BindInputQueue();

Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
std::map<const void *, std::vector<void *>> &outside_addrs);
Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);

///
/// @ingroup ge
@@ -816,8 +813,12 @@ class DavinciModel {

vector<OpDescPtr> variable_op_list_;

std::map<uint32_t, std::pair<int64_t, void *>> input_data_info_; // Virtual address from Data output.
std::map<uint32_t, std::pair<int64_t, void *>> output_data_info_; // Virtual address from NetOutput input.
std::map<uint32_t, ZeroCopyOffset> new_input_data_info_;
std::map<uint32_t, ZeroCopyOffset> new_output_data_info_;
std::map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
std::map<const void *, ZeroCopyOffset> new_output_outside_addrs_;

std::vector<void *> real_virtual_addrs_;

// output op: save cce op actual needed memory size
vector<int64_t> output_memory_size_list_;
@@ -849,9 +850,7 @@ class DavinciModel {
std::mutex outside_addrs_mutex_;
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
std::set<const void *> copy_only_addrs_; // Address need copy to original place.
// {node_addr, {addr_in_task_args}}
std::map<const void *, std::vector<void *>> input_outside_addrs_; // Key is virtual address from Data.
std::map<const void *, std::vector<void *>> output_outside_addrs_; // Key is virtual address from NetOutput.

// {op_id, batch_label}
std::map<int64_t, std::string> zero_copy_op_id_batch_label_;
// {batch_label, addrs}
@@ -920,8 +919,13 @@ class DavinciModel {
int64_t total_fixed_addr_size_ = 0;
std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_;
vector<void *> total_io_addrs_;
vector<void *> orig_total_io_addrs_;
bool base_addr_not_changed_ = false;

vector<vector<int64_t>> batch_info_;
std::vector<std::vector<int64_t>> combined_batch_info_;
int32_t dynamic_type_ = 0;

vector<uint64_t> batch_size_;
// key: input tensor name, generally rts op;


+ 82
- 62
src/ge/graph/load/new_model_manager/model_manager.cc View File

@@ -70,11 +70,11 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size());
rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_FAILED;)
return RT_ERROR_TO_GE_STATUS(rt_ret);)

rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);)
uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr));
param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr;
// In the scene of loading once and running many times, the kernel needs to be destroyed many times,
@@ -84,64 +84,64 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u

rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "malloc device memory failed.");
GELOGE(RT_FAILED, "malloc device memory failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret =
rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), &param_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "memory copy to device failed.");
GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rtStream_t stream = nullptr;
rt_ret = rtStreamCreate(&stream, 0);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "create stream failed.");
GELOGE(RT_FAILED, "create stream failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtKernelLaunchEx failed.");
GELOGE(RT_FAILED, "rtKernelLaunchEx failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtStreamSynchronize(stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtStreamSynchronize failed.");
GELOGE(RT_FAILED, "rtStreamSynchronize failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
if (aicpu_kernel_addr != nullptr) {
rt_ret = rtFree(aicpu_kernel_addr);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "free memory failed.");
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret);
GE_CHK_RT(rtFree(devicebase));
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
rt_ret = rtFree(devicebase);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "free memory failed.");
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret);
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtStreamDestroy(stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtStreamDestroy failed.");
return FAILED;
GELOGE(RT_FAILED, "rtStreamDestroy failed. ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;
}
@@ -168,8 +168,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
std::lock_guard<std::mutex> lock(map_mutex_);
auto it = model_map_.find(model_id);
if (it == model_map_.end()) {
GELOGE(PARAM_INVALID, "model id %u does not exists.", model_id);
return PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return GE_EXEC_MODEL_ID_INVALID;
}
uint64_t session_id = it->second->GetSessionId();
GELOGI("Destroy aicpu session for infer, session id is %u.", session_id);
@@ -223,10 +223,11 @@ Status ModelManager::SetDevice(int32_t deviceId) const {
return SUCCESS;
}

ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) {
ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num,
int32_t dynamic_type) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->SetDynamicSize(batch_num);
davinci_model->SetDynamicSize(batch_num, dynamic_type);
return SUCCESS;
}

@@ -332,8 +333,8 @@ Status ModelManager::DeleteModel(uint32_t id) {
} else if (hybrid_model_it != hybrid_model_map_.end()) {
(void)hybrid_model_map_.erase(hybrid_model_it);
} else {
GELOGE(PARAM_INVALID, "model id %u does not exists.", id);
return PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return GE_EXEC_MODEL_ID_INVALID;
}

return SUCCESS;
@@ -386,7 +387,7 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d

std::shared_ptr<DavinciModel> model = GetModel(model_id);

GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid Model ID %u in InputData! ", model_id);
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id);

GE_IF_BOOL_EXEC(model->GetDataInputTid() == 0, model->SetDataInputTid(mmGetTid()));

@@ -422,7 +423,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
for (size_t i = 0; i < inputs.size(); ++i) {
DataBuffer data;
data.data = inputs[i].data;
data.length = static_cast<uint32_t>(inputs[i].length);
data.length = inputs[i].length;
input_data.blobs.push_back(data);
}

@@ -442,7 +443,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
return SUCCESS;
}

GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid Model ID %u in InputData! ", model_id);
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id);

DataInputer *inputer = model->GetDataInputer();
GE_CHECK_NOTNULL(inputer);
@@ -472,7 +473,7 @@ Status ModelManager::Start(uint32_t model_id) {

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);

GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to start! ", model_id);

Status status = davinci_model->ModelRunStart();
if (status == SUCCESS) {
@@ -499,7 +500,7 @@ Status ModelManager::Stop(uint32_t model_id) {
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to stop!", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to stop!", model_id);

Status status = davinci_model->ModelRunStop();
if (status == SUCCESS) {
@@ -661,7 +662,7 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid Model ID %u !",
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid model id %u!",
model_id);

max_size = davinci_model->TotalMemSize();
@@ -671,8 +672,8 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz
Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
vector<InputOutputDescInfo> &output_desc) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID,
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
model_id);

return davinci_model->GetInputOutputDescInfo(input_desc, output_desc);
}
@@ -682,8 +683,8 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats,
bool new_model_desc) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID,
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);

davinci_model->SetModelDescVersion(new_model_desc);

@@ -697,18 +698,35 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
/// @param [out] batch_info
/// @return execute result
///
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) {
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
"GetDynamicBatchInfo failed, Invalid model id %u!", model_id);

return davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type);
}

///
/// @ingroup ge
/// @brief Get combined dynamic dims info
/// @param [in] model_id
/// @param [out] batch_info
/// @return execute result
///
Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vector<int64_t>> &batch_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetDynamicBatchInfo Failed, Invalid Model ID %u !",
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetCombinedDynamicDims Failed, Invalid Model ID %u!",
model_id);

return davinci_model->GetDynamicBatchInfo(batch_info);
davinci_model->GetCombinedDynamicDims(batch_info);
return SUCCESS;
}

Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) {
Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetCurShape(batch_info);
davinci_model->GetCurShape(batch_info, dynamic_type);
return SUCCESS;
}

@@ -724,8 +742,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
std::vector<uint32_t> &inputFormats,
std::vector<uint32_t> &outputFormats) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID,
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
model_id);

return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
}
@@ -767,8 +785,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {

Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID,
"input key file path is not valid, %s", strerror(errno));
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, GE_EXEC_MODEL_KEY_PATH_INVALID,
"input key file path %s is invalid, %s", model.key.c_str(), strerror(errno));
GenModelId(&model_id);

shared_ptr<DavinciModel> davinci_model = nullptr;
@@ -786,11 +804,11 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
try {
davinci_model = std::make_shared<DavinciModel>(model.priority, listener);
} catch (std::bad_alloc &) {
GELOGE(FAILED, "Make shared failed");
return FAILED;
GELOGE(MEMALLOC_FAILED, "Make shared failed");
return MEMALLOC_FAILED;
} catch (...) {
GELOGE(FAILED, "Make shared failed since other exception raise");
return FAILED;
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
return INTERNAL_ERROR;
}
ret = davinci_model->Assign(ge_model);
if (ret != SUCCESS) {
@@ -803,7 +821,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
davinci_model->SetDeviceId(device_id);
davinci_model->SetOmName(model.om_name);
@@ -851,8 +869,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data,
const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids) {
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, PARAM_INVALID,
"input key file path is not valid, %s", strerror(errno));
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0,
GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s",
model_data.key.c_str(), strerror(errno));

ModelHelper model_helper;
Status ret = model_helper.LoadModel(model_data);
@@ -863,8 +882,8 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d

shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(model_data.priority, nullptr);
if (davinci_model == nullptr) {
GELOGE(FAILED, "create model failed.");
return FAILED;
GELOGE(MEMALLOC_FAILED, "create model failed.");
return MEMALLOC_FAILED;
}

ret = davinci_model->Assign(model_helper.GetGeModel());
@@ -916,7 +935,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id);

if (davinci_model->NeedDestroyAicpuKernel()) {
GELOGI("Start to destroy specified aicpu kernel.");
@@ -973,29 +992,30 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me

auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data);
if (partition_table->num == 1) {
GELOGE(FAILED, "om model is error,please use executable om model");
return FAILED;
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model");
return GE_EXEC_MODEL_PARTITION_NUM_INVALID;
}
ModelPartition task_partition;
if (om_file_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) {
GELOGE(FAILED, "get task model partition failed.");
return FAILED;
GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "get task model partition failed.");
return GE_EXEC_LOAD_TASK_PARTITION_FAILED;
}

std::shared_ptr<domi::ModelTaskDef> model_task_def = MakeShared<domi::ModelTaskDef>();
if (model_task_def == nullptr) {
return FAILED;
return MEMALLOC_FAILED;
}
if (task_partition.size != 0) {
if (!ReadProtoFromArray(task_partition.data, static_cast<int>(task_partition.size), model_task_def.get())) {
GELOGE(FAILED, "ReadProtoFromArray failed.");
return FAILED;
GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "ReadProtoFromArray failed.");
return GE_EXEC_LOAD_TASK_PARTITION_FAILED;
}
}

ModelPartition partition_weight;
ret = om_file_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition_weight);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Get weight partition failed. ret = %u", ret);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED,
"Get weight partition failed. ret = %u", ret);

mem_size = model_task_def->memory_size();
weight_size = partition_weight.size;


+ 13
- 3
src/ge/graph/load/new_model_manager/model_manager.h View File

@@ -187,9 +187,19 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
/// @brief Get dynamic batch_info
/// @param [in] model_id
/// @param [out] batch_info
/// @param [out] dynamic_type
/// @return execute result
///
ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type);
///
/// @ingroup ge
/// @brief Get combined dynamic dims info
/// @param [in] model_id
/// @param [out] batch_info
/// @return execute result
///
ge::Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

///
/// @ingroup ge
@@ -215,13 +225,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::vector<uint32_t> &inputFormats,
std::vector<uint32_t> &outputFormats);

ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info);
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info);

ge::Status SetDevice(int32_t deviceId) const;

ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num);
ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type);

///
/// @ingroup domi_ome


+ 3
- 0
src/ge/graph/load/new_model_manager/model_utils.cc View File

@@ -56,6 +56,7 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) {
if (tensor_size) {
v_input_size.push_back(tensor_size);
}
GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
continue;
}

@@ -64,6 +65,8 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) {
GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i);
continue);

GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);

v_input_size.push_back(tensor_size);
}



+ 4
- 4
src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc View File

@@ -34,7 +34,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList());
if (ret != SUCCESS) {
GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id());
return FAILED;
return ret;
}

model_ = davinci_model->GetRtModelHandle();
@@ -53,14 +53,14 @@ Status EndGraphTaskInfo::Distribute() {
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtEndGraphEx failed, ret: 0x%x", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
} else {
GELOGI("Start to call rtEndGraph");
rtError_t rt_ret = rtEndGraph(model_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtEndGraph failed, ret: 0x%x", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

@@ -69,7 +69,7 @@ Status EndGraphTaskInfo::Distribute() {
rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
task_id_ = task_id;
stream_id_ = stream_id;


+ 1
- 1
src/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc View File

@@ -49,7 +49,7 @@ Status EventRecordTaskInfo::Distribute() {
rtError_t rt_ret = rtEventRecord(event_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;


+ 2
- 2
src/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc View File

@@ -51,13 +51,13 @@ Status EventWaitTaskInfo::Distribute() {
rtError_t rt_ret = rtStreamWaitEvent(stream_, event_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtEventReset(event_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;


+ 1
- 1
src/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc View File

@@ -40,7 +40,7 @@ Status FusionStartTaskInfo::Distribute() {
rtError_t rt_ret = rtKernelFusionStart(stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

GELOGI("FusionStartTaskInfo Distribute Success.");


+ 1
- 1
src/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc View File

@@ -40,7 +40,7 @@ Status FusionStopTaskInfo::Distribute() {
rtError_t rt_ret = rtKernelFusionEnd(stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

GELOGI("FusionStopTaskInfo Distribute Success.");


+ 15
- 16
src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc View File

@@ -73,24 +73,24 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m
// Only in Horovod scenario should get the inputName and GeShape
ret = HcomOmeUtil::GetHorovodInputs(op_desc_, kernel_hccl_infos_);
if (ret != SUCCESS) {
GELOGE(FAILED, "davinci_model: GetHorovodInputs fail! domi error: %u", ret);
return FAILED;
GELOGE(ret, "davinci_model: GetHorovodInputs fail! domi error: %u", ret);
return ret;
}
Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc_, kernel_hccl_infos_);
if (dmrt != SUCCESS) {
GELOGE(FAILED, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt);
return FAILED;
GELOGE(dmrt, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt);
return dmrt;
}
dmrt = HcomOmeUtil::GetHcclCount(op_desc_, kernel_hccl_infos_);
if (dmrt != SUCCESS) {
GELOGE(FAILED, "davinci_model: GetHcomCount fail! domi error: %u", dmrt);
return FAILED;
GELOGE(dmrt, "davinci_model: GetHcomCount fail! domi error: %u", dmrt);
return dmrt;
}
// Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId
dmrt = HcomOmeUtil::GetAllRootId(op_desc_, kernel_hccl_infos_);
if (dmrt != SUCCESS) {
GELOGE(FAILED, "davinci_model: Get rootId fail! domi error: %u", dmrt);
return FAILED;
GELOGE(dmrt, "davinci_model: Get rootId fail! domi error: %u", dmrt);
return dmrt;
}

// GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl
@@ -138,8 +138,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM
uint32_t max_task_count;
ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_count, &max_task_count);
if (ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Get max stream and task count by rts failed.");
return FAILED;
GELOGE(RT_FAILED, "Get max stream and task count by rts failed.");
return RT_ERROR_TO_GE_STATUS(ret);
}
max_node_of_hccl_stream_ = max_task_count / kMaxTaskOfStream;
}
@@ -153,8 +153,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM
ReuseStream(created_stream_num, davinci_model);
ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model);
if (ret != SUCCESS) {
GELOGE(FAILED, "Create hccl stream failed.");
return FAILED;
GELOGE(RT_FAILED, "Create hccl stream failed.");
return RT_ERROR_TO_GE_STATUS(ret);
}
}
GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num);
@@ -179,14 +179,14 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
// Create slave stream, inactive by default, activated by hccl
rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
(void)rtStreamDestroy(stream);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("hccl_stream addr is=%p", stream);
int64_t remain_cap = max_node_of_hccl_stream_ - 1;
@@ -250,8 +250,7 @@ Status HcclTaskInfo::UpdateArgs() {
io_addrs.insert(io_addrs.end(), output_data_addrs_.begin(), output_data_addrs_.end());
io_addrs.insert(io_addrs.end(), workspace_data_addrs_.begin(), workspace_data_addrs_.end());

GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str());
davinci_model_->SetTotalIOAddrs(io_addrs);

GELOGI("HcclTaskInfo::UpdateArgs success.");
return SUCCESS;


+ 28
- 23
src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc View File

@@ -72,11 +72,11 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;)
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;)
return RT_ERROR_TO_GE_STATUS(rt_ret);)
}

GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(),
@@ -113,7 +113,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_output_addr));
void *workspace_base_addr = nullptr;
rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error, ret: Ox%X", rt_ret); return FAILED;);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret););
rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(),
kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE);
fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr =
@@ -123,20 +124,23 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_);

rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

rt_ret = rtMemcpy(kernel_buf_, kernel_buf_size_, static_cast<void *>(&fwk_op_kernel), kernel_buf_size_,
RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

GELOGI("KernelExTaskInfo knonw node Init Success.");
return SUCCESS;
}

// 3. Set workspaceaddr, inputOutputDataAddr
if (CopyTaskInfo(kernel_ex_def, rts_param, op_desc) != SUCCESS) {
GELOGE(FAILED, "copy task info to workspace failed.");
return FAILED;
Status ge_ret = CopyTaskInfo(kernel_ex_def, rts_param, op_desc);
if (ge_ret != SUCCESS) {
GELOGE(ge_ret, "copy task info to workspace failed.");
return ge_ret;
}

const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc);
@@ -155,11 +159,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
auto addrs_size = sizeof(uint64_t) * (io_addrs.size());
if (addrs_size > 0) {
rtError_t rt_ret = rtMalloc(&input_output_addr_, addrs_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); return RT_FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

rt_ret = rtMemcpy(input_output_addr_, addrs_size, io_addrs.data(), addrs_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
@@ -177,11 +182,13 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin

// 4. Return result
rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

rt_ret = rtMemcpy(kernel_buf_, sizeof(STR_FWK_OP_KERNEL), static_cast<void *>(&fwk_op_kernel),
sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;)
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0);

@@ -254,9 +261,7 @@ Status KernelExTaskInfo::UpdateArgs() {
}
}
}
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str());

davinci_model_->SetTotalIOAddrs(io_addrs);
GELOGI("KernelExTaskInfo::UpdateArgs success.");
return SUCCESS;
}
@@ -286,8 +291,8 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
rtError_t rt_ret = rtMemcpy(workspace_data_addrs[0], kernel_def.task_info_size(), kernel_def.task_info().data(),
kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "rtMemcpy error: 0x%X", rt_ret);
return FAILED;
GELOGE(RT_FAILED, "rtMemcpy error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;
@@ -298,7 +303,7 @@ Status KernelExTaskInfo::Distribute() {
rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

if (davinci_model_ == nullptr) {
@@ -311,7 +316,7 @@ Status KernelExTaskInfo::Distribute() {
rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
task_id_ = task_id;
stream_id_ = stream_id;
@@ -326,7 +331,7 @@ Status KernelExTaskInfo::Release() {
rtError_t rt_ret = rtFree(kernel_buf_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree error, ret: 0x%X", rt_ret);
ret = FAILED;
ret = RT_ERROR_TO_GE_STATUS(rt_ret);
} else {
kernel_buf_ = nullptr;
}
@@ -335,7 +340,7 @@ Status KernelExTaskInfo::Release() {
rtError_t rt_ret = rtFree(input_output_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree error, ret: 0x%X", rt_ret);
ret = FAILED;
ret = RT_ERROR_TO_GE_STATUS(rt_ret);
} else {
input_output_addr_ = nullptr;
}
@@ -344,7 +349,7 @@ Status KernelExTaskInfo::Release() {
rtError_t rt_ret = rtFree(ext_info_addr_);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret);
ret = FAILED;
ret = RT_ERROR_TO_GE_STATUS(rt_ret);
} else {
ext_info_addr_ = nullptr;
}


+ 76
- 68
src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -99,13 +99,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
kernel_def.stub_func().c_str());
return RT_FAILED;);
return RT_ERROR_TO_GE_STATUS(rt_ret););
} else if (kernel_type_ != cce::ccKernelType::AI_CPU) {
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
return RT_FAILED;);
return RT_ERROR_TO_GE_STATUS(rt_ret););
}

if (context.origin_op_index_size() > CC_FUSION_OP_MAX) {
@@ -232,7 +232,7 @@ Status KernelTaskInfo::SuperKernelLaunch() {
skt_info_.last_dump_flag);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
call_save_dump_ = true;
GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed");
@@ -241,21 +241,24 @@ Status KernelTaskInfo::SuperKernelLaunch() {
// Create super kernel factory
skt::SuperKernelFactory *factory = &skt::SuperKernelFactory::GetInstance();
// Init super kernel factory
if (factory->Init() != SUCCESS) {
GELOGE(RT_FAILED, "SuperKernelLaunch: SuperKernelFactory init failed");
return RT_FAILED;
Status ge_ret = factory->Init();
if (ge_ret != SUCCESS) {
GELOGE(ge_ret, "SuperKernelLaunch: SuperKernelFactory init failed");
return ge_ret;
}
// Call the fuse API
std::unique_ptr<skt::SuperKernel> superKernel = nullptr;
if (factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel) != SUCCESS) {
GELOGE(RT_FAILED, "SuperKernelLaunch: fuse call failed");
return RT_FAILED;
ge_ret = factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel);
if (ge_ret != SUCCESS) {
GELOGE(ge_ret, "SuperKernelLaunch: fuse call failed");
return ge_ret;
}
// Launch a super kernel
skt_dump_flag_ = GetDumpFlag();
if (superKernel->Launch(skt_info_.last_stream, skt_dump_flag_) != SUCCESS) {
GELOGE(RT_FAILED, "SuperKernelLaunch: launch failed");
return RT_FAILED;
ge_ret = superKernel->Launch(skt_info_.last_stream, skt_dump_flag_);
if (ge_ret != SUCCESS) {
GELOGE(ge_ret, "SuperKernelLaunch: launch failed");
return ge_ret;
}
GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(),
skt_arg_list.size());
@@ -276,9 +279,9 @@ Status KernelTaskInfo::SaveSuperKernelInfo() {
skt_info_.last_dump_flag = dump_flag_;
skt_info_.dump_flag_list.push_back(dump_flag_);
skt_info_.op_desc_list.push_back(op_desc_);
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(dump_args_));
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(skt_dump_args_));
skt_info_.last_group_key = group_key_;
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(dump_args_);
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(skt_dump_args_);
skt_info_.last_op = op_desc_;
// last node in a stream, just launch
if (IsMarkedLastNode()) {
@@ -345,15 +348,15 @@ Status KernelTaskInfo::SuperKernelDistribute() {
// 1.launch before
ret = SuperKernelLaunch();
if (ret != SUCCESS) {
GELOGE(FAILED, "Call SuperKernelLaunch failed!");
return FAILED;
GELOGE(ret, "Call SuperKernelLaunch failed!");
return ret;
}
// 2.launch current
rtError_t rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_,
static_cast<rtSmDesc_t *>(sm_desc_), stream_, dump_flag_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return FAILED;
return rt_ret;
}
call_save_dump_ = true;
UpdateTaskId();
@@ -361,8 +364,8 @@ Status KernelTaskInfo::SuperKernelDistribute() {
} else {
ret = SaveSuperKernelInfo();
if (ret != SUCCESS) {
GELOGE(FAILED, "Call SuperKernelLaunch failed!");
return FAILED;
GELOGE(ret, "Call SuperKernelLaunch failed!");
return ret;
}
GELOGI("Save Current task [block_dim:%u, size:%zu].", block_dim_, skt_info_.kernel_list.size());
}
@@ -403,7 +406,7 @@ Status KernelTaskInfo::Distribute() {
}
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
// set for task_id_
UpdateTaskId();
@@ -448,9 +451,7 @@ Status KernelTaskInfo::UpdateArgs() {
}
}

GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str());

davinci_model_->SetTotalIOAddrs(io_addrs);
GELOGI("KernelTaskInfo::UpdateArgs success.");
return SUCCESS;
}
@@ -459,26 +460,31 @@ Status KernelTaskInfo::Release() {
if (davinci_model_ != nullptr && davinci_model_->IsKnownNode()) {
return SUCCESS;
}
FreeRtMem(&args_);
FreeRtMem(&superkernel_device_args_addr_);
FreeRtMem(&superkernel_dev_nav_table_);
FreeRtMem(&flowtable_);
FreeRtMem(&custom_info_.input_descs);
FreeRtMem(&custom_info_.input_addrs);
FreeRtMem(&custom_info_.output_descs);
FreeRtMem(&custom_info_.output_addrs);
FreeRtMem(&custom_info_.attr_handle);
FreeRtMem(&aicpu_ext_info_addr_);
rtContext_t ctx = nullptr;
rtError_t ret = rtCtxGetCurrent(&ctx);

if (ret == RT_ERROR_NONE) {
FreeRtMem(&args_);
FreeRtMem(&superkernel_device_args_addr_);
FreeRtMem(&superkernel_dev_nav_table_);
FreeRtMem(&flowtable_);
FreeRtMem(&custom_info_.input_descs);
FreeRtMem(&custom_info_.input_addrs);
FreeRtMem(&custom_info_.output_descs);
FreeRtMem(&custom_info_.output_addrs);
FreeRtMem(&custom_info_.attr_handle);
FreeRtMem(&aicpu_ext_info_addr_);
}

if (ctx_.argsOffset != nullptr) {
delete[] ctx_.argsOffset;
ctx_.argsOffset = nullptr;
}

rtError_t ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE;
ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE;
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", static_cast<int>(ret));
return FAILED;
return RT_ERROR_TO_GE_STATUS(ret);
}
sm_desc_ = nullptr;

@@ -508,13 +514,13 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
rtError_t rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;
@@ -591,14 +597,14 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

// copy orign args
rt_ret = rtMemcpy(args_, args_size_, kernel_def.args().data(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
vector<uint8_t> args_info(args_size_);
errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_);
@@ -617,7 +623,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
kAddrLen * tensor_device_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(),
kAddrLen * tensor_device_addrs.size());
@@ -625,16 +631,17 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}
skt_dump_args_ = static_cast<char *>(args_) + offset;
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = static_cast<char *>(args_) + offset;
}

Status ge_ret = UpdateL2Data(kernel_def);
// update origin l2 data
if (UpdateL2Data(kernel_def) != SUCCESS) {
return RT_FAILED;
if (ge_ret != SUCCESS) {
return ge_ret;
}

vector<void *> virtual_io_addrs; // use virtual address for zero copy key.
@@ -698,13 +705,13 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
rtError_t rt_ret = rtMalloc(&custom_info_.attr_handle, op_attr_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtMemcpy(custom_info_.attr_handle, op_attr_size, buffer.GetData(), op_attr_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

// args
@@ -731,14 +738,14 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret =
rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

davinci_model_->SetZeroCopyAddr(op_desc, input_data_addrs, input_data_addrs.data(), custom_info_.input_addrs,
@@ -784,7 +791,8 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
ctx_.genVariableBaseSize = davinci_model_->TotalVarMemSize();
ctx_.l2ctrlSize = sm_contrl_size;

if (UpdateCceArgs(sm_desc, flowtable, kernel_def) != SUCCESS) {
ret = UpdateCceArgs(sm_desc, flowtable, kernel_def);
if (ret != SUCCESS) {
GELOGE(ret, "update cce args fail");
return ret;
}
@@ -800,7 +808,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
rtError_t rt_ret = rtMalloc(&args_, kernel_def.args_size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size())

@@ -808,7 +816,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

// L2
@@ -816,13 +824,13 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
return SUCCESS;
@@ -883,7 +891,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", args_size_)

@@ -891,7 +899,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
@@ -912,12 +920,12 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;
@@ -934,7 +942,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rtError_t rt_ret = rtMalloc(&custom_info_.input_descs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

for (std::size_t i = 0; i < input_size; ++i) {
@@ -942,7 +950,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

@@ -950,7 +958,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rt_ret = rtMalloc(&custom_info_.input_addrs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

if (!input_data_addrs.empty()) {
@@ -958,7 +966,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

@@ -966,14 +974,14 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rt_ret = rtMalloc(&custom_info_.output_descs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
for (std::size_t i = 0; i < output_size; ++i) {
rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.output_descs) + i, sizeof(opTensor_t),
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

@@ -981,7 +989,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rt_ret = rtMalloc(&custom_info_.output_addrs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

if (!output_data_addrs.empty()) {
@@ -989,7 +997,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

@@ -1051,8 +1059,8 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl
Status status =
CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def);
if (status != SUCCESS) {
GELOGE(FAILED, "Call cce api failed");
return FAILED;
GELOGE(status, "Call cce api failed");
return status;
}
return SUCCESS;
}
@@ -1118,14 +1126,14 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe
rtError_t rt_ret = rtMalloc(&flowtable_, flowtable.size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "flowtable refresh of cce scence.", flowtable.size())

rt_ret = rtMemcpy(flowtable_, flowtable.size(), flowtable.data(), flowtable.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

// modify flowtable addr in args


+ 1
- 0
src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h View File

@@ -165,6 +165,7 @@ class KernelTaskInfo : public TaskInfo {
void *aicpu_ext_info_addr_ = nullptr;

// For super kernel
void *skt_dump_args_ = nullptr;
uint32_t skt_id_;
std::string stub_func_name_;
bool is_l1_fusion_enable_;


+ 1
- 1
src/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc View File

@@ -59,7 +59,7 @@ Status LabelGotoExTaskInfo::Distribute() {
rtError_t rt_ret = rtLabelGotoEx(label_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

GELOGI("LabelGotoExTaskInfo Distribute Success.");


+ 1
- 1
src/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc View File

@@ -59,7 +59,7 @@ Status LabelSetTaskInfo::Distribute() {
rtError_t rt_ret = rtLabelSet(label_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

GELOGI("LabelSetTaskInfo Distribute Success.");


+ 2
- 2
src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc View File

@@ -98,13 +98,13 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
rtError_t rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

GELOGI("LabelSwitchByIndexTaskInfo Init success, branch max: %u.", branch_max_);


+ 16
- 11
src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc View File

@@ -19,6 +19,10 @@
#include "framework/common/debug/ge_log.h"
#include "graph/load/new_model_manager/davinci_model.h"

namespace {
const uint32_t kAlignBytes = 64;
}

namespace ge {
Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("MemcpyAddrAsyncTaskInfo Init Start");
@@ -55,39 +59,40 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel

// malloc args memory
size_t args_size = sizeof(void *) * io_addrs.size();
rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM);
rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

args_align_ = reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(args_) / kAlignBytes + 1) * kAlignBytes);
// copy orign src/dst
GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_, args_size, src_,
static_cast<uint8_t *>(args_) + args_size, dst_, io_addrs.size());
rt_ret = rtMemcpy(args_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE);
GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_align_, args_size, src_,
static_cast<uint8_t *>(args_align_) + args_size, dst_, io_addrs.size());
rt_ret = rtMemcpy(args_align_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api for src failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

count_ = memcpy_async.count();
kind_ = memcpy_async.kind();
dst_max_ = memcpy_async.dst_max();
GELOGI("InitMemcpyAddrAsyncTaskInfo, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu, args:%p, size:%zu",
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_, args_size);
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_align_, args_size);

davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_, args_size, 0);
davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_align_, args_size, 0);
return SUCCESS;
}

Status MemcpyAddrAsyncTaskInfo::Distribute() {
GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start, dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_);

rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + sizeof(void *)),
dst_max_, args_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_align_) + sizeof(void *)),
dst_max_, args_align_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;


+ 3
- 1
src/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h View File

@@ -22,7 +22,8 @@
namespace ge {
class MemcpyAddrAsyncTaskInfo : public TaskInfo {
public:
MemcpyAddrAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), count_(0), kind_(0) {}
MemcpyAddrAsyncTaskInfo()
: dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), args_align_(nullptr), count_(0), kind_(0) {}

~MemcpyAddrAsyncTaskInfo() override {
src_ = nullptr;
@@ -46,6 +47,7 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo {
uint64_t dst_max_;
uint8_t *src_;
void *args_;
void *args_align_;
uint64_t count_;
uint32_t kind_;
};


+ 2
- 3
src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc View File

@@ -68,7 +68,7 @@ Status MemcpyAsyncTaskInfo::Distribute() {
rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

GELOGI("MemcpyAsyncTaskInfo Distribute Success");
@@ -102,8 +102,7 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() {
io_addrs.emplace_back(reinterpret_cast<void *>(src_));
io_addrs.emplace_back(reinterpret_cast<void *>(dst_));

GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_),
"update memcpyasync in known node zero copy addr failed.");
davinci_model_->SetTotalIOAddrs(io_addrs);

GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success.");
return SUCCESS;


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save