@@ -18,6 +18,7 @@ Thanks goes to these wonderful people: | |||||
wangcong,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong | wangcong,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong | ||||
Contributions of any kind are welcome! | Contributions of any kind are welcome! | ||||
# Release 0.5.0-beta | # Release 0.5.0-beta | ||||
## Major Features and Improvements | ## Major Features and Improvements | ||||
@@ -63,5 +63,12 @@ struct HcomOpertion { | |||||
int32_t root; | int32_t root; | ||||
}; | }; | ||||
struct HcomRemoteAccessAddrInfo { | |||||
uint32_t remotetRankID; | |||||
uint64_t remoteAddr; // host embedding table address | |||||
uint64_t localAddr; // device HBM address | |||||
uint64_t length; // memory Length in Bytes | |||||
}; | |||||
} // namespace ge | } // namespace ge | ||||
#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ | #endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ |
@@ -31,27 +31,37 @@ class ErrorManager { | |||||
/// | /// | ||||
/// @brief init | /// @brief init | ||||
/// @param [in] path current so path | |||||
/// @param [in] path: current so path | |||||
/// @return int 0(success) -1(fail) | /// @return int 0(success) -1(fail) | ||||
/// | /// | ||||
int Init(std::string path); | int Init(std::string path); | ||||
/// | /// | ||||
/// @brief Report error message | /// @brief Report error message | ||||
/// @param [in] errCode error code | |||||
/// @param [in] mapArgs parameter map | |||||
/// @param [in] error_code: error code | |||||
/// @param [in] args_map: parameter map | |||||
/// @return int 0(success) -1(fail) | /// @return int 0(success) -1(fail) | ||||
/// | /// | ||||
int ReportErrMessage(std::string error_code, const std::map<std::string, std::string> &args_map); | int ReportErrMessage(std::string error_code, const std::map<std::string, std::string> &args_map); | ||||
/// | |||||
/// @brief output error message | /// @brief output error message | ||||
/// @param [in] handle print handle | |||||
/// @param [in] handle: print handle | |||||
/// @return int 0(success) -1(fail) | /// @return int 0(success) -1(fail) | ||||
/// | /// | ||||
int OutputErrMessage(int handle); | int OutputErrMessage(int handle); | ||||
/// | |||||
/// @brief output message | |||||
/// @param [in] handle: print handle | |||||
/// @return int 0(success) -1(fail) | |||||
/// | |||||
int OutputMessage(int handle); | |||||
/// | |||||
/// @brief Report error message | /// @brief Report error message | ||||
/// @param [in] vector parameter key, vector parameter value | |||||
/// @param [in] key: vector parameter key | |||||
/// @param [in] value: vector parameter value | |||||
/// | /// | ||||
void ATCReportErrMessage(std::string error_code, const std::vector<std::string> &key = {}, | void ATCReportErrMessage(std::string error_code, const std::vector<std::string> &key = {}, | ||||
const std::vector<std::string> &value = {}); | const std::vector<std::string> &value = {}); | ||||
@@ -60,7 +70,7 @@ class ErrorManager { | |||||
struct ErrorInfo { | struct ErrorInfo { | ||||
std::string error_id; | std::string error_id; | ||||
std::string error_message; | std::string error_message; | ||||
std::vector<std::string> arglist; | |||||
std::vector<std::string> arg_list; | |||||
}; | }; | ||||
ErrorManager() {} | ErrorManager() {} | ||||
@@ -77,7 +87,8 @@ class ErrorManager { | |||||
bool is_init_ = false; | bool is_init_ = false; | ||||
std::map<std::string, ErrorInfo> error_map_; | std::map<std::string, ErrorInfo> error_map_; | ||||
std::vector<std::string> error_message_evc_; | |||||
std::vector<std::string> error_messages_; | |||||
std::vector<std::string> warning_messages_; | |||||
}; | }; | ||||
#endif // ERROR_MANAGER_H_ | #endif // ERROR_MANAGER_H_ |
@@ -82,6 +82,8 @@ class PlatformInfoManager { | |||||
void ParseVectorCoreMemoryRates(map<string, string> &vectorCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); | void ParseVectorCoreMemoryRates(map<string, string> &vectorCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); | ||||
void ParseCPUCache(map<string, string> &CPUCacheMap, PlatformInfo &platformInfoTemp); | |||||
void ParseVectorCoreintrinsicDtypeMap(map<string, string> &vectorCoreintrinsicDtypeMap, | void ParseVectorCoreintrinsicDtypeMap(map<string, string> &vectorCoreintrinsicDtypeMap, | ||||
PlatformInfo &platformInfoTemp); | PlatformInfo &platformInfoTemp); | ||||
@@ -73,6 +73,8 @@ typedef struct tagAiCoreSpec { | |||||
typedef struct tagAiCoreMemoryRates { | typedef struct tagAiCoreMemoryRates { | ||||
double ddrRate; | double ddrRate; | ||||
double ddrReadRate; | |||||
double ddrWriteRate; | |||||
double l2Rate; | double l2Rate; | ||||
double l2ReadRate; | double l2ReadRate; | ||||
double l2WriteRate; | double l2WriteRate; | ||||
@@ -86,6 +88,7 @@ typedef struct tagAiCoreMemoryRates { | |||||
} AiCoreMemoryRates; | } AiCoreMemoryRates; | ||||
typedef struct tagVectorCoreSpec { | typedef struct tagVectorCoreSpec { | ||||
double vecFreq; | |||||
uint64_t vecCalcSize; | uint64_t vecCalcSize; | ||||
uint64_t smaskBuffer; | uint64_t smaskBuffer; | ||||
uint64_t ubSize; | uint64_t ubSize; | ||||
@@ -94,10 +97,15 @@ typedef struct tagVectorCoreSpec { | |||||
uint64_t ubbankNum; | uint64_t ubbankNum; | ||||
uint64_t ubburstInOneBlock; | uint64_t ubburstInOneBlock; | ||||
uint64_t ubbankGroupNum; | uint64_t ubbankGroupNum; | ||||
uint64_t vectorRegSize; | |||||
uint64_t predicateRegSize; | |||||
uint64_t addressRegSize; | |||||
} VectorCoreSpec; | } VectorCoreSpec; | ||||
typedef struct tagVectorCoreMemoryRates { | typedef struct tagVectorCoreMemoryRates { | ||||
double ddrRate; | double ddrRate; | ||||
double ddrReadRate; | |||||
double ddrWriteRate; | |||||
double l2Rate; | double l2Rate; | ||||
double l2ReadRate; | double l2ReadRate; | ||||
double l2WriteRate; | double l2WriteRate; | ||||
@@ -105,6 +113,11 @@ typedef struct tagVectorCoreMemoryRates { | |||||
double ubToDdrRate; | double ubToDdrRate; | ||||
} VectorCoreMemoryRates; | } VectorCoreMemoryRates; | ||||
typedef struct tagCPUCache { | |||||
uint32_t AICPUSyncBySW; | |||||
uint32_t TSCPUSyncBySW; | |||||
} CPUCache; | |||||
typedef struct tagPlatformInfo { | typedef struct tagPlatformInfo { | ||||
StrInfo strInfo; | StrInfo strInfo; | ||||
SoCInfo socInfo; | SoCInfo socInfo; | ||||
@@ -113,6 +126,7 @@ typedef struct tagPlatformInfo { | |||||
map<string, vector<string>> aiCoreIntrinsicDtypeMap; | map<string, vector<string>> aiCoreIntrinsicDtypeMap; | ||||
VectorCoreSpec vectorCoreSpec; | VectorCoreSpec vectorCoreSpec; | ||||
VectorCoreMemoryRates vectorCoreMemoryRates; | VectorCoreMemoryRates vectorCoreMemoryRates; | ||||
CPUCache cpucache; | |||||
map<string, vector<string>> vectorCoreIntrinsicDtypeMap; | map<string, vector<string>> vectorCoreIntrinsicDtypeMap; | ||||
} PlatformInfo; | } PlatformInfo; | ||||
@@ -46,7 +46,6 @@ const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; | |||||
const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; | const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; | ||||
const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; | const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; | ||||
const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; | const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; | ||||
const char *const OPTION_EXEC_OP_DEBUG_LEVEL = "ge.exec.opDebugLevel"; | |||||
const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; | const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; | ||||
const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; | const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; | ||||
const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; | const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; | ||||
@@ -174,6 +173,9 @@ const char *const kDynamicBatchSize = "ge.dynamicBatchSize"; | |||||
// configure whether to use dynamic image size | // configure whether to use dynamic image size | ||||
const char *const kDynamicImageSize = "ge.dynamicImageSize"; | const char *const kDynamicImageSize = "ge.dynamicImageSize"; | ||||
// Configure whether to use dynamic dims | |||||
const char *const kDynamicDims = "ge.dynamicDims"; | |||||
// Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, | // Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, | ||||
// example: GA|RL, support configure multiple, split by | | // example: GA|RL, support configure multiple, split by | | ||||
const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; | const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; | ||||
@@ -269,6 +271,7 @@ static const char *const INPUT_SHAPE = "input_shape"; | |||||
static const char *const OP_NAME_MAP = "op_name_map"; | static const char *const OP_NAME_MAP = "op_name_map"; | ||||
static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; | static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; | ||||
static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; | static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; | ||||
static const char *const DYNAMIC_DIMS = kDynamicDims; | |||||
static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); | static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); | ||||
static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); | static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); | ||||
static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; | static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; | ||||
@@ -291,10 +294,11 @@ static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c | |||||
// for interface: aclgrphBuildModel | // for interface: aclgrphBuildModel | ||||
const std::set<std::string> ir_builder_suppported_options = { | const std::set<std::string> ir_builder_suppported_options = { | ||||
INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, DYNAMIC_BATCH_SIZE, | |||||
DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, | |||||
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES, | |||||
LOG_LEVEL}; | |||||
INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, | |||||
DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, | |||||
INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, | |||||
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, | |||||
INPUT_FP16_NODES, LOG_LEVEL}; | |||||
// for interface: aclgrphBuildInitialize | // for interface: aclgrphBuildInitialize | ||||
const std::set<std::string> global_options = {CORE_TYPE, | const std::set<std::string> global_options = {CORE_TYPE, | ||||
SOC_VERSION, | SOC_VERSION, | ||||
@@ -343,6 +343,7 @@ class OpReg { | |||||
auto x_type = op.GetInputDesc(in_name).GetDataType(); \ | auto x_type = op.GetInputDesc(in_name).GetDataType(); \ | ||||
TensorDesc op_output_desc = op.GetOutputDesc(out_name); \ | TensorDesc op_output_desc = op.GetOutputDesc(out_name); \ | ||||
op_output_desc.SetShape(ge::Shape(x_shape)); \ | op_output_desc.SetShape(ge::Shape(x_shape)); \ | ||||
op_output_desc.SetOriginShape(ge::Shape(x_shape)); \ | |||||
op_output_desc.SetDataType(x_type); \ | op_output_desc.SetDataType(x_type); \ | ||||
return op.UpdateOutputDesc(out_name, op_output_desc); \ | return op.UpdateOutputDesc(out_name, op_output_desc); \ | ||||
} | } | ||||
@@ -232,7 +232,7 @@ | |||||
rtError_t _rt_ret = (expr); \ | rtError_t _rt_ret = (expr); \ | ||||
if (_rt_ret != RT_ERROR_NONE) { \ | if (_rt_ret != RT_ERROR_NONE) { \ | ||||
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | ||||
return ge::RT_FAILED; \ | |||||
return RT_ERROR_TO_GE_STATUS(_rt_ret); \ | |||||
} \ | } \ | ||||
} while (0); | } while (0); | ||||
@@ -280,8 +280,25 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r | |||||
// Executor module error code definition | // Executor module error code definition | ||||
GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); | GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); | ||||
GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist."); | |||||
GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PATH_INVALID, 2, "Model file path is invalid."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_KEY_PATH_INVALID, 3, "Key file path of model is invalid."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_ID_INVALID, 4, "Model id is invalid."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_DATA_SIZE_INVALID, 5, "Data size of model is invalid."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_WEIGHT_SIZE_INVALID, 6, "Weight size of model is invalid."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PARTITION_NUM_INVALID, 7, "Partition number of model is invalid."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_QUEUE_ID_INVALID, 8, "Queue id of model is invalid."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, 9, "Model does not support encryption."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_READ_MODEL_FILE_FAILED, 10, "Failed to read model file."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_REPEATED, 11, "The model is loaded repeatedly."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_PARTITION_FAILED, 12, "Failed to load model partition."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, 13, "Failed to load weight partition."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_TASK_PARTITION_FAILED, 14, "Failed to load task partition."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, 15, "Failed to load kernel partition."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, 16, "Failed to allocate feature map memory."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, 17, "Failed to allocate weight memory."); | |||||
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_VAR_MEM_FAILED, 18, "Failed to allocate variable memory."); | |||||
GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 19, "GE AIPP is not exist."); | |||||
GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 20, "GE Dynamic AIPP is not support to query temporarily."); | |||||
// Generator module error code definition | // Generator module error code definition | ||||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); | GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); | ||||
@@ -289,6 +306,8 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, 2, "Graph mana | |||||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph manager build graph failed."); | GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph manager build graph failed."); | ||||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); | GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); | ||||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); | GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); | ||||
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR) | |||||
} // namespace ge | } // namespace ge | ||||
#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ | #endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ |
@@ -339,6 +339,7 @@ REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); | |||||
REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); | REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); | ||||
REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") | REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") | ||||
REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); | REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); | ||||
REGISTER_OPTYPE_DECLARE(BITCAST, "Bitcast"); | |||||
// ANN dedicated operator | // ANN dedicated operator | ||||
REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean"); | REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean"); | ||||
@@ -432,6 +433,8 @@ REGISTER_OPTYPE_DECLARE(HCOMALLREDUCE, "HcomAllReduce"); | |||||
REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter"); | REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter"); | ||||
REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend"); | REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend"); | ||||
REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); | REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); | ||||
REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); | |||||
REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | |||||
REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); | REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); | ||||
REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | ||||
@@ -559,6 +562,16 @@ enum ModelCheckType { | |||||
}; | }; | ||||
/// | /// | ||||
/// @brief dynamic input type | |||||
/// | |||||
enum DynamicInputType { | |||||
FIXED = 0, // default mode | |||||
DYNAMIC_BATCH = 1, | |||||
DYNAMIC_IMAGE = 2, | |||||
DYNAMIC_DIMS = 3 | |||||
}; | |||||
/// | |||||
/// @brief magic number of the model file | /// @brief magic number of the model file | ||||
/// | /// | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_MAGIC_NUM; | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_MAGIC_NUM; | ||||
@@ -26,23 +26,26 @@ | |||||
#include "common/ge_types.h" | #include "common/ge_types.h" | ||||
#include "common/types.h" | #include "common/types.h" | ||||
#include "graph/tensor.h" | #include "graph/tensor.h" | ||||
#include "graph/ge_tensor.h" | |||||
#include "runtime/base.h" | #include "runtime/base.h" | ||||
namespace ge { | namespace ge { | ||||
class ModelListenerAdapter; | class ModelListenerAdapter; | ||||
class SingleOp; | class SingleOp; | ||||
class DynamicSingleOp; | |||||
struct RunModelData { | struct RunModelData { | ||||
uint32_t index; // Data index | uint32_t index; // Data index | ||||
uint32_t modelId; | uint32_t modelId; | ||||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||||
uint32_t timestamp; // Data creation time | |||||
uint32_t timeout; // Processing timeout | |||||
uint64_t request_id = 0; // Request ID | |||||
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||||
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 | |||||
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 | |||||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||||
uint32_t timestamp; // Data creation time | |||||
uint32_t timeout; // Processing timeout | |||||
uint64_t request_id = 0; // Request ID | |||||
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||||
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 | |||||
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 | |||||
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty | |||||
}; | }; | ||||
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | ||||
@@ -87,16 +90,52 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
/// | /// | ||||
ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, | ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, | ||||
uint64_t image_width); | uint64_t image_width); | ||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Set dynamic dims info | |||||
/// @param [in] model_id: model id allocate from manager | |||||
/// @param [in] dynamic_input_addr: dynamic input addr created by user | |||||
/// @param [in] length: length of dynamic input addr | |||||
/// @param [in] dynamic_dim_num: number of dynamic dimension | |||||
/// @param [in] dynamic_dims: array of dynamic dimensions | |||||
/// @return execute result | |||||
/// | |||||
ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||||
const std::vector<uint64_t> &dynamic_dims); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get current dynamic dims info by combined dims | |||||
/// @param [in] model_id: model id allocate from manager | |||||
/// @param [in] combined_dims: array of combined dimensions | |||||
/// @param [out] cur_dynamic_dims: current dynamic dims | |||||
/// @return execute result | |||||
/// | |||||
ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &combined_dims, | |||||
std::vector<uint64_t> &cur_dynamic_dims); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
/// @param [in] model_id | /// @param [in] model_id | ||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @param [out] dynamic_type | |||||
/// @return execute result | |||||
/// | |||||
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||||
int32_t &dynamic_type); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get combined dynamic dims info | |||||
/// @param [in] model_id | |||||
/// @param [out] batch_info | |||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||||
ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -209,6 +248,13 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | ||||
std::vector<DataBuffer> &outputs); | std::vector<DataBuffer> &outputs); | ||||
static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||||
DynamicSingleOp **single_op); | |||||
static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | |||||
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, | |||||
std::vector<DataBuffer> &outputs); | |||||
static ge::Status ReleaseSingleOpResource(void *stream); | static ge::Status ReleaseSingleOpResource(void *stream); | ||||
ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); | ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); | ||||
@@ -0,0 +1,56 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||||
#define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||||
#include <string> | |||||
#include <vector> | |||||
#include "ge/ge_api_error_codes.h" | |||||
#include "runtime/mem.h" | |||||
namespace ge { | |||||
enum MemStorageType { | |||||
HBM = 0, | |||||
RDMA_HBM, | |||||
}; | |||||
struct HostVarInfo { | |||||
uint64_t base_addr; | |||||
uint64_t var_size; | |||||
}; | |||||
/// | |||||
/// \param size [in] rdma pool memory size to be allocated. | |||||
/// \param mem_type [in] memory type for rdma pool. | |||||
/// \return Status result of function | |||||
Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
/// | |||||
/// \param var_info [in] host variable addr infos. | |||||
/// \param mem_type [in] memory type for rdma pool. | |||||
/// \return Status result of function | |||||
Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); | |||||
/// | |||||
/// \param var_name [in] var_name name of host variable. | |||||
/// \param base_addr [out] base_addr vase addr of host variable. | |||||
/// \param var_size [out] var_size memory_size of host variable. | |||||
/// \return Status result of function | |||||
Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_MEMORY_MEMORY_API_H_ |
@@ -96,10 +96,6 @@ Status CheckCustomAiCpuOpLib(); | |||||
Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | ||||
Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | |||||
Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info); | |||||
void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | ||||
std::vector<std::string> &output_nodes_name); | std::vector<std::string> &output_nodes_name); | ||||
@@ -120,6 +120,7 @@ struct OmgContext { | |||||
bool is_dynamic_input = false; | bool is_dynamic_input = false; | ||||
std::string dynamic_batch_size; | std::string dynamic_batch_size; | ||||
std::string dynamic_image_size; | std::string dynamic_image_size; | ||||
std::string dynamic_dims; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -0,0 +1,110 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ | |||||
#define INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ | |||||
#include <google/protobuf/message.h> | |||||
#include "framework/common/types.h" | |||||
#include "framework/omg/omg_inner_types.h" | |||||
#include "graph/attr_value.h" | |||||
#include "graph/compute_graph.h" | |||||
#include "graph/ge_tensor.h" | |||||
#include "graph/graph.h" | |||||
#include "graph/op_desc.h" | |||||
#include "graph/operator.h" | |||||
#include "graph/range_vistor.h" | |||||
#include "graph/utils/attr_utils.h" | |||||
#include "graph/utils/graph_utils.h" | |||||
#include "graph/utils/op_desc_utils.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
using Status = domi::Status; | |||||
namespace domi { | |||||
using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>( | |||||
const google::protobuf::Message *root_proto, const std::string &graph)>; | |||||
class ModelParser { | |||||
public: | |||||
ModelParser() {} | |||||
virtual ~ModelParser() {} | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Analyze network model data | |||||
* @param [in] file Network model file path | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status Parse(const char *file, ge::Graph &graph) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Parse relevant data from memory and save it to graph | |||||
* @param [in] input Model file memory data | |||||
* @param [in|out] graph A graph for saving the model information after analysis | |||||
* @return SUCCESS | |||||
* @return FAILED | |||||
* @author | |||||
*/ | |||||
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Analyze network model data | |||||
* @param [in] proto network model | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Analyze callback model data in subgraph | |||||
* @param [in] proto network model | |||||
* @param [in] callback callback of subgraph | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, | |||||
ge::ComputeGraphPtr &graph) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Convert model files to JSON format | |||||
* @param [in] model_file Model file path to be converted | |||||
* @param [out] json_file Converted JSON file path | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } | |||||
/* | |||||
* @ingroup domi_omg | |||||
* @brief Convert network data type | |||||
* @param [in] type Data type to be converted | |||||
* @return ge::DataType | |||||
*/ | |||||
virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; | |||||
virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; | |||||
}; | |||||
} // namespace domi | |||||
#endif // INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ |
@@ -0,0 +1,92 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ | |||||
#define INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ | |||||
#include <google/protobuf/text_format.h> | |||||
#include "common/types.h" | |||||
#include "omg/omg_inner_types.h" | |||||
#include "proto/om.pb.h" | |||||
#include "graph/ge_tensor.h" | |||||
#include "graph/op_desc.h" | |||||
#include "graph/utils/op_desc_utils.h" | |||||
using google::protobuf::Message; | |||||
using Status = domi::Status; | |||||
namespace ge { | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Used to analyze operator information | |||||
* | |||||
*/ | |||||
class OpParser { | |||||
public: | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Deconstructor | |||||
*/ | |||||
virtual ~OpParser() {} | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Analytic operator parameters | |||||
* @param [in] op_src Parameter data to be resolved | |||||
* @param [out] graph Parsed parameter data | |||||
* @return SUCCESS | |||||
* @return FAILED | |||||
*/ | |||||
virtual Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Analytic operator parameters | |||||
* @param [in] op_src Parameter data to be resolved | |||||
* @param [out] Operator parameter data | |||||
* @return SUCCESS | |||||
* @return FAILED | |||||
*/ | |||||
virtual Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Analytic operator weight information | |||||
* @param [in] op_src Weight data to be resolved | |||||
* @param [out] op_dest Weight data after analysis | |||||
* @return SUCCESS | |||||
* @return FAILED | |||||
*/ | |||||
virtual Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Get the format information according to the parameters in the operator | |||||
* @param [in] op_src Parameter data to be resolved | |||||
* @param [out] format Output the parsed format | |||||
* @return SUCCESS | |||||
* @return FAILED | |||||
*/ | |||||
virtual Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) { | |||||
(void)op_src; | |||||
// Indicates that the op does not provide a value for format | |||||
format = domi::DOMI_TENSOR_RESERVED; | |||||
return domi::SUCCESS; | |||||
} | |||||
}; | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ |
@@ -0,0 +1,31 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ | |||||
#define INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ | |||||
#include <iostream> | |||||
#include <map> | |||||
#include <string> | |||||
#include "ge/ge_api_error_codes.h" | |||||
namespace ge { | |||||
// Initialize parser | |||||
Status ParserInitialize(const std::map<std::string, std::string>& options); | |||||
// Finalize parser, release all resources | |||||
Status ParserFinalize(); | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ |
@@ -0,0 +1,138 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ | |||||
#define INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ | |||||
#include <map> | |||||
#include <memory> | |||||
#include <mutex> | |||||
#include <string> | |||||
#include "framework/common/types.h" | |||||
#include "framework/omg/omg_inner_types.h" | |||||
using Status = domi::Status; | |||||
namespace domi { | |||||
class WeightsParser; | |||||
class ModelParser; | |||||
typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void); | |||||
// Create modelparser for different frameworks | |||||
class ModelParserFactory { | |||||
public: | |||||
static ModelParserFactory *Instance(); | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Create a modelparser based on the type entered | |||||
* @param [in] type Framework type | |||||
* @return Created modelparser | |||||
*/ | |||||
std::shared_ptr<ModelParser> CreateModelParser(const domi::FrameworkType type); | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Register create function | |||||
* @param [in] type Framework type | |||||
* @param [in] fun ModelParser's create function | |||||
*/ | |||||
void RegisterCreator(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun); | |||||
protected: | |||||
ModelParserFactory() {} | |||||
~ModelParserFactory(); | |||||
private: | |||||
std::map<domi::FrameworkType, MODEL_PARSER_CREATOR_FUN> creator_map_; | |||||
}; // end class ModelParserFactory | |||||
class ModelParserRegisterar { | |||||
public: | |||||
ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) { | |||||
ModelParserFactory::Instance()->RegisterCreator(type, fun); | |||||
} | |||||
~ModelParserRegisterar() {} | |||||
}; | |||||
// Registration macros for model parsers | |||||
#define REGISTER_MODEL_PARSER_CREATOR(type, clazz) \ | |||||
std::shared_ptr<ModelParser> Creator_##type##_Model_Parser() { \ | |||||
std::shared_ptr<clazz> ptr = nullptr; \ | |||||
try { \ | |||||
ptr = make_shared<clazz>(); \ | |||||
} catch (...) { \ | |||||
ptr = nullptr; \ | |||||
} \ | |||||
return std::shared_ptr<ModelParser>(ptr); \ | |||||
} \ | |||||
ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) | |||||
typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void); | |||||
// Create weightsparser for different frameworks | |||||
class WeightsParserFactory { | |||||
public: | |||||
static WeightsParserFactory *Instance(); | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Create weightsparser based on the type entered | |||||
* @param [in] type Framework type | |||||
* @return Created weightsparser | |||||
*/ | |||||
std::shared_ptr<WeightsParser> CreateWeightsParser(const domi::FrameworkType type); | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Register create function | |||||
* @param [in] type Framework type | |||||
* @param [in] fun WeightsParser's create function | |||||
*/ | |||||
void RegisterCreator(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun); | |||||
protected: | |||||
WeightsParserFactory() {} | |||||
~WeightsParserFactory(); | |||||
private: | |||||
std::map<domi::FrameworkType, WEIGHTS_PARSER_CREATOR_FUN> creator_map_; | |||||
}; // end class WeightsParserFactory | |||||
class WeightsParserRegisterar { | |||||
public: | |||||
WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) { | |||||
WeightsParserFactory::Instance()->RegisterCreator(type, fun); | |||||
} | |||||
~WeightsParserRegisterar() {} | |||||
}; | |||||
// Register macro of weight resolver | |||||
#define REGISTER_WEIGHTS_PARSER_CREATOR(type, clazz) \ | |||||
std::shared_ptr<WeightsParser> Creator_##type##_Weights_Parser() { \ | |||||
std::shared_ptr<clazz> ptr = nullptr; \ | |||||
try { \ | |||||
ptr = make_shared<clazz>(); \ | |||||
} catch (...) { \ | |||||
ptr = nullptr; \ | |||||
} \ | |||||
return std::shared_ptr<WeightsParser>(ptr); \ | |||||
} \ | |||||
WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) | |||||
}; // namespace domi | |||||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ |
@@ -0,0 +1,43 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ | |||||
#define INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ | |||||
#include <map> | |||||
#include <memory> | |||||
#include <string> | |||||
#include <unordered_map> | |||||
#include <utility> | |||||
#include <vector> | |||||
#include "external/register/register_fmk_types.h" | |||||
#include "external/register/register_types.h" | |||||
#include "framework/omg/omg_inner_types.h" | |||||
namespace ge { | |||||
struct ParserContext { | |||||
std::unordered_map<std::string, std::vector<int64_t>> input_dims; | |||||
domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; | |||||
; | |||||
RunMode run_mode = ONLY_PRE_CHECK; | |||||
std::string custom_proto_path; // save caffe custom proto path, used by caffe parse | |||||
std::string caffe_proto_path; // save caffe proto path, used by caffe parse | |||||
}; | |||||
ParserContext &GetParserContext(); | |||||
} // namespace ge | |||||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ |
@@ -0,0 +1,74 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ | |||||
#define INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ | |||||
#include "graph/graph.h" | |||||
#include "graph/attr_value.h" | |||||
#include "graph/compute_graph.h" | |||||
#include "graph/ge_tensor.h" | |||||
#include "graph/op_desc.h" | |||||
#include "graph/operator.h" | |||||
#include "graph/range_vistor.h" | |||||
#include "graph/utils/attr_utils.h" | |||||
#include "graph/utils/op_desc_utils.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
namespace domi { | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Weight information resolver | |||||
* | |||||
*/ | |||||
class WeightsParser { | |||||
public: | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Constructor | |||||
*/ | |||||
WeightsParser() {} | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Deconstructor | |||||
*/ | |||||
virtual ~WeightsParser() {} | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Analyze weight data | |||||
* @param [in] file Path of weight file after training | |||||
* @param [in|out] graph Graph for saving weight information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status Parse(const char *file, ge::Graph &graph) = 0; | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Parse relevant data from memory and save it to graph | |||||
* @param [in] input Model file memory data | |||||
* @param [in|out] graph A graph for saving the model information after analysis | |||||
* @return SUCCESS | |||||
* @return FAILED | |||||
* @author | |||||
*/ | |||||
virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; | |||||
}; | |||||
} // namespace domi | |||||
#endif // INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ |
@@ -87,11 +87,14 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A | |||||
// AddNode with NodePtr | // AddNode with NodePtr | ||||
NodePtr AddNode(NodePtr node); | NodePtr AddNode(NodePtr node); | ||||
NodePtr AddNode(OpDescPtr op); | NodePtr AddNode(OpDescPtr op); | ||||
NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize. | |||||
NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize | |||||
NodePtr AddNodeFront(NodePtr node); | NodePtr AddNodeFront(NodePtr node); | ||||
NodePtr AddNodeFront(const OpDescPtr &op); | NodePtr AddNodeFront(const OpDescPtr &op); | ||||
NodePtr AddInputNode(NodePtr node); | NodePtr AddInputNode(NodePtr node); | ||||
NodePtr AddOutputNode(NodePtr node); | NodePtr AddOutputNode(NodePtr node); | ||||
// insert node with specific pre_node | |||||
NodePtr AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node); | |||||
NodePtr AddNodeAfter(NodePtr node, const NodePtr &pre_node); | |||||
graphStatus RemoveNode(const NodePtr &node); | graphStatus RemoveNode(const NodePtr &node); | ||||
graphStatus RemoveInputNode(const NodePtr &node); | graphStatus RemoveInputNode(const NodePtr &node); | ||||
@@ -185,6 +185,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT; | |||||
// to be deleted | // to be deleted | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; | ||||
@@ -934,12 +937,14 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PRED_VALUE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PRED_VALUE; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH; | |||||
// Control flow | // Control flow | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ACTIVE_STREAM_LIST; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ACTIVE_STREAM_LIST; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCHN_PRED_VALUE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCHN_PRED_VALUE; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG; | ||||
@@ -983,6 +988,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NEE | |||||
// For mutil-batch | // For mutil-batch | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERT_BY_MBATCH; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERT_BY_MBATCH; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_TYPE; | |||||
// For inserted op | // For inserted op | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERTED_BY_GE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERTED_BY_GE; | ||||
@@ -1022,6 +1028,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE; | ||||
// for unregistered op | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_OPPATH; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_ATTRLIST; | |||||
// op overflow dump | // op overflow dump | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | ||||
@@ -1075,8 +1085,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX; | ||||
// atc user def dtype&format | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_DATATYPE; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_FORMAT; | |||||
// for fusion op plugin | // for fusion op plugin | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; | ||||
// graph partition for aicpu | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME; | |||||
// input and output memory type | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_VARIABLE_PLACEMENT; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INPUT_MEMORY_TYPE; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OUTPUT_MEMORY_TYPE; | |||||
// input_output_offset | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_BASIC_OFFSET; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET; | |||||
} // namespace ge | } // namespace ge | ||||
#endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ | #endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ |
@@ -67,6 +67,9 @@ class ModelSerializeImp { | |||||
bool HandleNodeNameRef(); | bool HandleNodeNameRef(); | ||||
bool UnserializeOpDesc(OpDescPtr &opDesc, proto::OpDef &opDefProto); | bool UnserializeOpDesc(OpDescPtr &opDesc, proto::OpDef &opDefProto); | ||||
void AttrDefToOpDesc(OpDescPtr &op_desc, std::vector<string> &key_in, std::vector<string> &key_out, | |||||
std::vector<uint32_t> &value_in, std::vector<uint32_t> &value_out, std::vector<string> &opt); | |||||
void OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto); | |||||
bool UnserializeNode(ComputeGraphPtr &graph, proto::OpDef &opDefProto); | bool UnserializeNode(ComputeGraphPtr &graph, proto::OpDef &opDefProto); | ||||
@@ -159,10 +159,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||||
std::map<string, uint32_t> GetAllInputName() const; | std::map<string, uint32_t> GetAllInputName() const; | ||||
void SetAllInputName(const std::map<string, uint32_t> &input_name_idx); | |||||
std::vector<string> GetAllOptionalInputName() const; | |||||
std::map<string, uint32_t> GetAllOutputName(); | std::map<string, uint32_t> GetAllOutputName(); | ||||
bool UpdateInputName(std::map<string, uint32_t> inputNameIdx); | bool UpdateInputName(std::map<string, uint32_t> inputNameIdx); | ||||
@@ -300,6 +296,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||||
std::map<std::string, SubgraphType> subgraph_ir_names_to_type_; | std::map<std::string, SubgraphType> subgraph_ir_names_to_type_; | ||||
vector<GeTensorDescPtr> inputs_desc_{}; | vector<GeTensorDescPtr> inputs_desc_{}; | ||||
map<string, uint32_t> input_name_idx_{}; | |||||
std::unordered_set<string> optional_input_names_{}; | |||||
vector<GeTensorDescPtr> outputs_desc_{}; | vector<GeTensorDescPtr> outputs_desc_{}; | ||||
map<string, uint32_t> output_name_idx_{}; | map<string, uint32_t> output_name_idx_{}; | ||||
std::function<graphStatus(Operator &)> infer_func_ = nullptr; | std::function<graphStatus(Operator &)> infer_func_ = nullptr; | ||||
@@ -62,18 +62,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string ComputeGraph::GetName() co | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ComputeGraph::SetName(const string &name) { name_ = name; } | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ComputeGraph::SetName(const string &name) { name_ = name; } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY size_t ComputeGraph::GetAllNodesSize() const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY size_t ComputeGraph::GetAllNodesSize() const { | ||||
size_t s = nodes_.size(); | |||||
for (const auto &sub_graph : sub_graph_) { | |||||
s += sub_graph->GetAllNodesSize(); | |||||
} | |||||
return s; | |||||
return GetAllNodes().size(); | |||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::Vistor<NodePtr> ComputeGraph::GetAllNodes() const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::Vistor<NodePtr> ComputeGraph::GetAllNodes() const { | ||||
if (sub_graph_.empty()) { | |||||
return Vistor<NodePtr>(shared_from_this(), nodes_); | |||||
} | |||||
std::vector<std::shared_ptr<ComputeGraph>> subgraphs; | std::vector<std::shared_ptr<ComputeGraph>> subgraphs; | ||||
return AllGraphNodes(subgraphs); | return AllGraphNodes(subgraphs); | ||||
} | } | ||||
@@ -277,7 +269,7 @@ NodePtr ComputeGraph::AddNodeFront(NodePtr node) { | |||||
NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) { | NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) { | ||||
if (op == nullptr) { | if (op == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
op->SetId(nodes_.size()); | op->SetId(nodes_.size()); | ||||
@@ -287,9 +279,38 @@ NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) { | |||||
return AddNodeFront(node_ptr); | return AddNodeFront(node_ptr); | ||||
} | } | ||||
NodePtr ComputeGraph::AddNodeAfter(NodePtr node, const NodePtr &pre_node) { | |||||
if (node == nullptr || node->GetOpDesc() == nullptr || pre_node == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "The node ptr or op desc should not be null."); | |||||
return nullptr; | |||||
} | |||||
node->GetOpDesc()->SetId(nodes_.size()); | |||||
auto node_iter = std::find(nodes_.begin(), nodes_.end(), pre_node); | |||||
if (node_iter != nodes_.end()) { | |||||
nodes_.insert(node_iter + 1, node); | |||||
} else { | |||||
GELOGE(GRAPH_FAILED, "Cannot find pre_node in nodes_."); | |||||
return nullptr; | |||||
} | |||||
return node; | |||||
} | |||||
NodePtr ComputeGraph::AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node) { | |||||
if (op == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||||
return nullptr; | |||||
} | |||||
op->SetId(nodes_.size()); | |||||
NodePtr node_ptr = shared_ptr<Node>(new (std::nothrow) Node(op, shared_from_this())); | |||||
GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GRAPH_FAILED, "node_ptr is NULL!!!"); return nullptr); | |||||
GE_IF_BOOL_EXEC(node_ptr->Init() != GRAPH_SUCCESS, GELOGE(GRAPH_FAILED, "node init failed."); return nullptr); | |||||
return AddNodeAfter(node_ptr, pre_node); | |||||
} | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(NodePtr node) { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(NodePtr node) { | ||||
if (node == nullptr || node->GetOpDesc() == nullptr) { | if (node == nullptr || node->GetOpDesc() == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
node->GetOpDesc()->SetId((int64_t)GetDirectNodesSize()); | node->GetOpDesc()->SetId((int64_t)GetDirectNodesSize()); | ||||
@@ -299,7 +320,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(Nod | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpDescPtr op) { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpDescPtr op) { | ||||
if (op == nullptr) { | if (op == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
op->SetId(GetDirectNodesSize()); | op->SetId(GetDirectNodesSize()); | ||||
@@ -311,7 +332,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpD | |||||
NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize. | NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize. | ||||
if (op == nullptr) { | if (op == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
op->SetId(id); | op->SetId(id); | ||||
@@ -324,7 +345,7 @@ NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize. | |||||
NodePtr ComputeGraph::AddInputNode(NodePtr node) { | NodePtr ComputeGraph::AddInputNode(NodePtr node) { | ||||
if (node == nullptr) { | if (node == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
input_nodes_.push_back(node); | input_nodes_.push_back(node); | ||||
@@ -336,7 +357,7 @@ NodePtr ComputeGraph::AddInputNode(NodePtr node) { | |||||
NodePtr ComputeGraph::AddOutputNode(NodePtr node) { | NodePtr ComputeGraph::AddOutputNode(NodePtr node) { | ||||
if (node == nullptr || node->GetOpDesc() == nullptr) { | if (node == nullptr || node->GetOpDesc() == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The node ptr or opdesc should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The node ptr or opdesc should not be null."); | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
@@ -372,7 +393,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) { | |||||
if (out_anchor->GetOwnerNode()->GetType() == CONSTANT || out_anchor->GetOwnerNode()->GetType() == CONSTANTOP) { | if (out_anchor->GetOwnerNode()->GetType() == CONSTANT || out_anchor->GetOwnerNode()->GetType() == CONSTANTOP) { | ||||
GE_CHK_BOOL_RET_STATUS(GraphUtils::RemoveEdge(out_anchor, in_anchor) == GRAPH_SUCCESS, GRAPH_FAILED, | GE_CHK_BOOL_RET_STATUS(GraphUtils::RemoveEdge(out_anchor, in_anchor) == GRAPH_SUCCESS, GRAPH_FAILED, | ||||
"Remove edge from const op failed."); | "Remove edge from const op failed."); | ||||
if (out_anchor->GetOwnerNode()->GetOutDataNodes().size() == 0) { | |||||
if (out_anchor->GetOwnerNode()->GetOutNodes().size() == 0) { | |||||
GELOGI("Remove const op %s.", out_anchor->GetOwnerNode()->GetName().c_str()); | GELOGI("Remove const op %s.", out_anchor->GetOwnerNode()->GetName().c_str()); | ||||
auto iter = find(nodes_.begin(), nodes_.end(), out_anchor->GetOwnerNode()); | auto iter = find(nodes_.begin(), nodes_.end(), out_anchor->GetOwnerNode()); | ||||
if (iter != nodes_.end()) { | if (iter != nodes_.end()) { | ||||
@@ -386,7 +407,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) { | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveNode(const NodePtr &node) { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveNode(const NodePtr &node) { | ||||
if (node == nullptr) { | if (node == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
@@ -415,7 +436,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveN | |||||
// Used in sub_graph scenes | // Used in sub_graph scenes | ||||
graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) { | graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) { | ||||
if (node == nullptr) { | if (node == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
@@ -430,7 +451,7 @@ graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) { | |||||
// Used in sub_graph scenes | // Used in sub_graph scenes | ||||
graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) { | graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) { | ||||
if (node == nullptr) { | if (node == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
@@ -451,7 +472,7 @@ graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) { | |||||
std::shared_ptr<ComputeGraph> ComputeGraph::AddSubGraph(std::shared_ptr<ComputeGraph> sub_graph) { | std::shared_ptr<ComputeGraph> ComputeGraph::AddSubGraph(std::shared_ptr<ComputeGraph> sub_graph) { | ||||
if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The graph ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The graph ptr should not be null."); | |||||
return nullptr; | return nullptr; | ||||
} | } | ||||
sub_graph_.push_back(sub_graph); | sub_graph_.push_back(sub_graph); | ||||
@@ -461,7 +482,7 @@ std::shared_ptr<ComputeGraph> ComputeGraph::AddSubGraph(std::shared_ptr<ComputeG | |||||
graphStatus ComputeGraph::RemoveSubGraph(const std::shared_ptr<ComputeGraph> &sub_graph) { | graphStatus ComputeGraph::RemoveSubGraph(const std::shared_ptr<ComputeGraph> &sub_graph) { | ||||
if (sub_graph == nullptr) { | if (sub_graph == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "The graph ptr should be not null."); | |||||
GELOGE(GRAPH_FAILED, "The graph ptr should not be null."); | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
@@ -500,8 +521,7 @@ ComputeGraph::AddSubgraph(const std::string &name, const std::shared_ptr<Compute | |||||
return GRAPH_PARAM_INVALID; | return GRAPH_PARAM_INVALID; | ||||
} | } | ||||
if (!this->parent_graph_.expired()) { | if (!this->parent_graph_.expired()) { | ||||
GE_LOGE("The subgraphs can only be added to the root graph"); | |||||
return GRAPH_PARAM_INVALID; | |||||
GELOGW("The subgraphs should only be added to the root graph"); | |||||
} | } | ||||
if (name != subgraph->GetName()) { | if (name != subgraph->GetName()) { | ||||
GELOGW("The subgraph name %s is different with input %s", subgraph->GetName().c_str(), name.c_str()); | GELOGW("The subgraph name %s is different with input %s", subgraph->GetName().c_str(), name.c_str()); | ||||
@@ -653,7 +673,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::InsertE | |||||
GELOGW("node or OpDescPtr is nullptr."); | GELOGW("node or OpDescPtr is nullptr."); | ||||
continue; | continue; | ||||
} | } | ||||
GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should be not null."); return GRAPH_FAILED); | |||||
GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should not be null."); return GRAPH_FAILED); | |||||
if (node->GetOpDesc()->GetType() == RECV) { | if (node->GetOpDesc()->GetType() == RECV) { | ||||
auto iter = find(node_vec.begin(), node_vec.end(), node); | auto iter = find(node_vec.begin(), node_vec.end(), node); | ||||
if (iter == node_vec.end()) { | if (iter == node_vec.end()) { | ||||
@@ -799,7 +819,8 @@ graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map<No | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::TopologicalSorting() { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::TopologicalSorting() { | ||||
auto ret = TopologicalSortingGraph(); | auto ret = TopologicalSortingGraph(); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Sub graph partition Failed"); | |||||
GraphUtils::DumpGEGraphToOnnx(*this, "black_box"); | |||||
GELOGE(ret, "Graph [%s] topological sort failed, saved to file black_box", name_.c_str()); | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -1117,9 +1138,11 @@ graphStatus ComputeGraph::RemoveExtraOutEdge(const NodePtr &node) { | |||||
} | } | ||||
graphStatus ComputeGraph::Verify() { | graphStatus ComputeGraph::Verify() { | ||||
bool is_unknown_graph = GetGraphUnknownFlag(); | |||||
for (const auto &node_ptr : GetAllNodes()) { | for (const auto &node_ptr : GetAllNodes()) { | ||||
GE_CHECK_NOTNULL(node_ptr); | GE_CHECK_NOTNULL(node_ptr); | ||||
GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | ||||
GE_IF_BOOL_EXEC(is_unknown_graph, continue); | |||||
GE_CHK_BOOL_EXEC(node_ptr->GetOpDesc()->CommonVerify() == GRAPH_SUCCESS, return GRAPH_FAILED, | GE_CHK_BOOL_EXEC(node_ptr->GetOpDesc()->CommonVerify() == GRAPH_SUCCESS, return GRAPH_FAILED, | ||||
"Verifying %s failed.", node_ptr->GetName().c_str()); | "Verifying %s failed.", node_ptr->GetName().c_str()); | ||||
} | } | ||||
@@ -158,6 +158,10 @@ const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; | |||||
const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | ||||
const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; | const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; | ||||
// Identify node connecting to input and output | |||||
const std::string ATTR_NAME_NODE_CONNECT_INPUT = "_is_connected_to_data"; | |||||
const std::string ATTR_NAME_NODE_CONNECT_OUTPUT = "_is_connected_to_netoutput"; | |||||
// To be deleted | // To be deleted | ||||
const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | ||||
const std::string PERMUTE_RESHAPE_FUSION = "permute_reshape_fusion"; | const std::string PERMUTE_RESHAPE_FUSION = "permute_reshape_fusion"; | ||||
@@ -905,6 +909,7 @@ const std::string ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE = "is_end_of_inputmem_l | |||||
const std::string ATTR_NAME_PRED_VALUE = "_pred_value"; | const std::string ATTR_NAME_PRED_VALUE = "_pred_value"; | ||||
const std::string ATTR_NAME_BATCH_NUM = "_batch_num"; | const std::string ATTR_NAME_BATCH_NUM = "_batch_num"; | ||||
const std::string ATTR_NAME_BATCH_LABEL = "_batch_label"; | const std::string ATTR_NAME_BATCH_LABEL = "_batch_label"; | ||||
const std::string ATTR_NAME_COMBINED_BATCH = "_combined_batch"; | |||||
// Control flow | // Control flow | ||||
const std::string ATTR_NAME_STREAM_SWITCH_COND = "switch_condition"; | const std::string ATTR_NAME_STREAM_SWITCH_COND = "switch_condition"; | ||||
@@ -914,6 +919,7 @@ const std::string ATTR_NAME_SWITCHN_PRED_VALUE = "switch_pred_value"; | |||||
const std::string ATTR_NAME_ITERATORS_PER_LOOP = "iterations_per_loop"; | const std::string ATTR_NAME_ITERATORS_PER_LOOP = "iterations_per_loop"; | ||||
const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG = "is_flow_ctrl_node"; | const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG = "is_flow_ctrl_node"; | ||||
const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE = "subgraph_first_active"; | const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE = "subgraph_first_active"; | ||||
const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS = "combined_dynamic_dims"; | |||||
const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL = "_switch_branch_node_label"; | const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL = "_switch_branch_node_label"; | ||||
const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG = "_switch_true_branch_flag"; | const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG = "_switch_true_branch_flag"; | ||||
@@ -983,6 +989,8 @@ const std::string ATTR_INSERT_BY_MBATCH = "mbatch-inserted-node"; | |||||
const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS = "_mbatch_origin_input_dims"; | const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS = "_mbatch_origin_input_dims"; | ||||
const std::string ATTR_DYNAMIC_TYPE = "mbatch_dynamic_type"; | |||||
// For inserted op | // For inserted op | ||||
const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge"; | const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge"; | ||||
@@ -1021,6 +1029,10 @@ const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_ | |||||
const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; | const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; | ||||
const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list"; | const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list"; | ||||
// for unregistered op | |||||
const std::string ATTR_NAME_UNREGST_OPPATH = "_unregst_oppath"; | |||||
const std::string ATTR_NAME_UNREGST_ATTRLIST = "_unregst_attrlist"; | |||||
// used for Horovod | // used for Horovod | ||||
const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id"; | const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id"; | ||||
const std::string ATTR_HOROVOD_ATTR_REDUCE_TYPE = "reduce_op"; | const std::string ATTR_HOROVOD_ATTR_REDUCE_TYPE = "reduce_op"; | ||||
@@ -1032,6 +1044,23 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node"; | |||||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | ||||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | ||||
// atc user def dtype&format | |||||
const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; | |||||
const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; | |||||
// for fusion op plugin | // for fusion op plugin | ||||
const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE = "_fusionop_original_type"; | const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE = "_fusionop_original_type"; | ||||
// graph partition for aicpu | |||||
const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME = "pld_front_node_engine_name"; | |||||
const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME = "end_rear_node_engine_name"; | |||||
// input and output memory type | |||||
const std::string ATTR_VARIABLE_PLACEMENT = "_variable_placement"; | |||||
const std::string ATTR_INPUT_MEMORY_TYPE = "_input_memory_type"; | |||||
const std::string ATTR_OUTPUT_MEMORY_TYPE = "_output_memory_type"; | |||||
// input_output_offset | |||||
const std::string ATTR_ZERO_COPY_BASIC_OFFSET = "_zero_copy_basic_offset"; | |||||
const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET = "_zero_copy_relative_offset"; | |||||
} // namespace ge | } // namespace ge |
@@ -1216,27 +1216,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc( | |||||
GE_CHK_BOOL_EXEC(imp.UnserializeOpDesc(op_desc, *op_def), return op_desc, "op_desc unserialize failed"); | GE_CHK_BOOL_EXEC(imp.UnserializeOpDesc(op_desc, *op_def), return op_desc, "op_desc unserialize failed"); | ||||
op_desc->extAttrs_ = org_op_desc->extAttrs_; | op_desc->extAttrs_ = org_op_desc->extAttrs_; | ||||
if (op_desc->HasAttr("_input_name_idx_key")) { | |||||
if (op_desc->DelAttr("_input_name_idx_key") != SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_key failed."); | |||||
} | |||||
// This function may be called by some passes of fusion engine, in this condition, do not need these attribute | |||||
if (!op_desc->input_name_idx_.empty()) { | |||||
op_desc->input_name_idx_.clear(); | |||||
} | } | ||||
if (op_desc->HasAttr("_input_name_idx_value")) { | |||||
if (op_desc->DelAttr("_input_name_idx_value") != SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_value failed."); | |||||
} | |||||
} | |||||
if (op_desc->HasAttr("_opt_input")) { | |||||
if (op_desc->DelAttr("_opt_input") != SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed."); | |||||
} | |||||
} | |||||
if (!op_desc->output_name_idx_.empty()) { | if (!op_desc->output_name_idx_.empty()) { | ||||
op_desc->output_name_idx_.clear(); | op_desc->output_name_idx_.clear(); | ||||
} | } | ||||
if (!op_desc->optional_input_names_.empty()) { | |||||
op_desc->optional_input_names_.clear(); | |||||
} | |||||
return op_desc; | return op_desc; | ||||
} | } | ||||
@@ -1260,6 +1249,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CopyOpDesc(c | |||||
op_desc->extAttrs_ = org_op_desc->extAttrs_; | op_desc->extAttrs_ = org_op_desc->extAttrs_; | ||||
op_desc->input_name_idx_.insert(org_op_desc->input_name_idx_.begin(), org_op_desc->input_name_idx_.end()); | |||||
op_desc->optional_input_names_.insert(org_op_desc->optional_input_names_.begin(), | |||||
org_op_desc->optional_input_names_.end()); | |||||
op_desc->output_name_idx_.insert(org_op_desc->output_name_idx_.begin(), org_op_desc->output_name_idx_.end()); | op_desc->output_name_idx_.insert(org_op_desc->output_name_idx_.begin(), org_op_desc->output_name_idx_.end()); | ||||
op_desc->infer_func_ = org_op_desc->infer_func_; | op_desc->infer_func_ = org_op_desc->infer_func_; | ||||
@@ -124,6 +124,7 @@ LOCAL_SRC_FILES := \ | |||||
../../out/graph/lib64/stub/operator.cc \ | ../../out/graph/lib64/stub/operator.cc \ | ||||
../../out/graph/lib64/stub/operator_factory.cc \ | ../../out/graph/lib64/stub/operator_factory.cc \ | ||||
../../out/graph/lib64/stub/tensor.cc \ | ../../out/graph/lib64/stub/tensor.cc \ | ||||
../../out/graph/lib64/stub/inference_context.cc \ | |||||
LOCAL_SHARED_LIBRARIES := | LOCAL_SHARED_LIBRARIES := | ||||
@@ -201,6 +202,7 @@ LOCAL_SRC_FILES := \ | |||||
../../out/graph/lib64/stub/operator.cc \ | ../../out/graph/lib64/stub/operator.cc \ | ||||
../../out/graph/lib64/stub/operator_factory.cc \ | ../../out/graph/lib64/stub/operator_factory.cc \ | ||||
../../out/graph/lib64/stub/tensor.cc \ | ../../out/graph/lib64/stub/tensor.cc \ | ||||
../../out/graph/lib64/stub/inference_context.cc \ | |||||
LOCAL_SHARED_LIBRARIES := | LOCAL_SHARED_LIBRARIES := | ||||
@@ -128,21 +128,42 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op | |||||
for (const std::string &name : op_desc->GetSubgraphInstanceNames()) { | for (const std::string &name : op_desc->GetSubgraphInstanceNames()) { | ||||
op_def_proto->add_subgraph_name(name); | op_def_proto->add_subgraph_name(name); | ||||
} | } | ||||
if (!op_desc->output_name_idx_.empty()) { | |||||
proto::AttrDef key; | |||||
proto::AttrDef value; | |||||
for (auto &item : op_desc->output_name_idx_) { | |||||
key.mutable_list()->add_s(item.first); | |||||
value.mutable_list()->add_i(item.second); | |||||
} | |||||
auto op_desc_attr = op_def_proto->mutable_attr(); | |||||
op_desc_attr->insert({"_output_name_key", key}); | |||||
op_desc_attr->insert({"_output_name_value", value}); | |||||
} | |||||
OpDescToAttrDef(op_desc, op_def_proto); | |||||
} | } | ||||
return true; | return true; | ||||
} | } | ||||
void ModelSerializeImp::OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto) { | |||||
proto::AttrDef key_in; | |||||
proto::AttrDef value_in; | |||||
auto op_desc_attr = op_def_proto->mutable_attr(); | |||||
if (!op_desc->input_name_idx_.empty()) { | |||||
for (auto &item : op_desc->input_name_idx_) { | |||||
key_in.mutable_list()->add_s(item.first); | |||||
value_in.mutable_list()->add_i(item.second); | |||||
} | |||||
op_desc_attr->insert({"_input_name_key", key_in}); | |||||
op_desc_attr->insert({"_input_name_value", value_in}); | |||||
} | |||||
proto::AttrDef key_out; | |||||
proto::AttrDef value_out; | |||||
if (!op_desc->output_name_idx_.empty()) { | |||||
for (auto &item : op_desc->output_name_idx_) { | |||||
key_out.mutable_list()->add_s(item.first); | |||||
value_out.mutable_list()->add_i(item.second); | |||||
} | |||||
op_desc_attr->insert({"_output_name_key", key_out}); | |||||
op_desc_attr->insert({"_output_name_value", value_out}); | |||||
} | |||||
proto::AttrDef opt_input; | |||||
if (!op_desc->optional_input_names_.empty()) { | |||||
for (auto &item : op_desc->optional_input_names_) { | |||||
opt_input.mutable_list()->add_s(item); | |||||
} | |||||
op_desc_attr->insert({"_opt_input", opt_input}); | |||||
} | |||||
} | |||||
bool ModelSerializeImp::SerializeNode(const NodePtr &node, proto::OpDef *op_def_proto, bool is_dump) { | bool ModelSerializeImp::SerializeNode(const NodePtr &node, proto::OpDef *op_def_proto, bool is_dump) { | ||||
if (node == nullptr || op_def_proto == nullptr) { | if (node == nullptr || op_def_proto == nullptr) { | ||||
GELOGE(GRAPH_FAILED, "Input Para Node Invalid"); | GELOGE(GRAPH_FAILED, "Input Para Node Invalid"); | ||||
@@ -236,13 +257,70 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali | |||||
} | } | ||||
} | } | ||||
void ModelSerializeImp::AttrDefToOpDesc(OpDescPtr &op_desc, std::vector<string> &key_in, std::vector<string> &key_out, | |||||
std::vector<uint32_t> &value_in, std::vector<uint32_t> &value_out, | |||||
std::vector<string> &opt_input) { | |||||
if (!key_in.empty()) { | |||||
if (key_in.size() != value_in.size()) { | |||||
GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(), | |||||
value_in.size()); | |||||
} else { | |||||
for (uint32_t i = 0; i < key_in.size(); ++i) { | |||||
op_desc->input_name_idx_.insert(std::pair<string, uint32_t>(key_in.at(i), value_in.at(i))); | |||||
} | |||||
} | |||||
} | |||||
if (!key_out.empty()) { | |||||
if (key_out.size() != value_out.size()) { | |||||
GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(), | |||||
value_out.size()); | |||||
} else { | |||||
for (uint32_t i = 0; i < key_out.size(); ++i) { | |||||
op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key_out.at(i), value_out.at(i))); | |||||
} | |||||
} | |||||
} | |||||
if (!opt_input.empty()) { | |||||
for (const auto &i : opt_input) { | |||||
op_desc->optional_input_names_.insert(i); | |||||
} | |||||
} | |||||
} | |||||
bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) { | bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) { | ||||
std::vector<string> key; | |||||
std::vector<uint32_t> value; | |||||
std::vector<string> opt_input; | |||||
std::vector<string> key_in; | |||||
std::vector<uint32_t> value_in; | |||||
if (op_def_proto.attr().count("_opt_input") > 0) { | |||||
auto &name_list = op_def_proto.attr().at("_opt_input").list(); | |||||
for (const auto &item_s : name_list.s()) { | |||||
opt_input.push_back(item_s); | |||||
} | |||||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||||
op_desc_attr->erase("_opt_input"); | |||||
} | |||||
if (op_def_proto.attr().count("_input_name_key") > 0) { | |||||
auto &output_name_key_list = op_def_proto.attr().at("_input_name_key").list(); | |||||
for (const auto &item_s : output_name_key_list.s()) { | |||||
key_in.push_back(item_s); | |||||
} | |||||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||||
op_desc_attr->erase("_input_name_key"); | |||||
} | |||||
if (op_def_proto.attr().count("_input_name_value") > 0) { | |||||
auto &input_name_value_list = op_def_proto.attr().at("_input_name_value").list(); | |||||
for (const auto &item_i : input_name_value_list.i()) { | |||||
value_in.push_back(static_cast<uint32_t>(item_i)); | |||||
} | |||||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||||
op_desc_attr->erase("_input_name_value"); | |||||
} | |||||
std::vector<string> key_out; | |||||
std::vector<uint32_t> value_out; | |||||
if (op_def_proto.attr().count("_output_name_key") > 0) { | if (op_def_proto.attr().count("_output_name_key") > 0) { | ||||
auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list(); | auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list(); | ||||
for (const auto &item_s : output_name_key_list.s()) { | for (const auto &item_s : output_name_key_list.s()) { | ||||
key.push_back(item_s); | |||||
key_out.push_back(item_s); | |||||
} | } | ||||
auto op_desc_attr = op_def_proto.mutable_attr(); | auto op_desc_attr = op_def_proto.mutable_attr(); | ||||
op_desc_attr->erase("_output_name_key"); | op_desc_attr->erase("_output_name_key"); | ||||
@@ -250,7 +328,7 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d | |||||
if (op_def_proto.attr().count("_output_name_value") > 0) { | if (op_def_proto.attr().count("_output_name_value") > 0) { | ||||
auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list(); | auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list(); | ||||
for (const auto &item_i : output_name_value_list.i()) { | for (const auto &item_i : output_name_value_list.i()) { | ||||
value.push_back(static_cast<uint32_t>(item_i)); | |||||
value_out.push_back(static_cast<uint32_t>(item_i)); | |||||
} | } | ||||
auto op_desc_attr = op_def_proto.mutable_attr(); | auto op_desc_attr = op_def_proto.mutable_attr(); | ||||
op_desc_attr->erase("_output_name_value"); | op_desc_attr->erase("_output_name_value"); | ||||
@@ -281,15 +359,8 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d | |||||
op_desc->SetSubgraphInstanceName(graph_index++, name); | op_desc->SetSubgraphInstanceName(graph_index++, name); | ||||
} | } | ||||
if (key.size() != 0) { | |||||
if (key.size() != value.size()) { | |||||
GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); | |||||
} else { | |||||
for (uint32_t i = 0; i < key.size(); ++i) { | |||||
op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key.at(i), value.at(i))); | |||||
} | |||||
} | |||||
} | |||||
// insert name index by key and value | |||||
AttrDefToOpDesc(op_desc, key_in, key_out, value_in, value_out, opt_input); | |||||
return true; | return true; | ||||
} | } | ||||
@@ -449,9 +449,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InDataAnchorPtr Node::GetInDataAn | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AnchorPtr Node::GetInAnchor(int idx) const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AnchorPtr Node::GetInAnchor(int idx) const { | ||||
// Idx can't be less than -1 or >= in_data_anchors_.size(), -1 means index of control anchor_ | // Idx can't be less than -1 or >= in_data_anchors_.size(), -1 means index of control anchor_ | ||||
if (idx < -1 || idx >= static_cast<int>(in_data_anchors_.size())) { | if (idx < -1 || idx >= static_cast<int>(in_data_anchors_.size())) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage( | |||||
"E19019", {"opname", "index", "anchorname", "optype"}, | |||||
{GetName().c_str(), std::to_string(idx), "in_anchor", GetType().c_str()}); | |||||
GELOGW("Op[%s] doesn't have index[%d]'s in_anchor which optype is %s.", GetName().c_str(), idx, GetType().c_str()); | GELOGW("Op[%s] doesn't have index[%d]'s in_anchor which optype is %s.", GetName().c_str(), idx, GetType().c_str()); | ||||
return nullptr; | return nullptr; | ||||
} else { | } else { | ||||
@@ -743,26 +740,27 @@ graphStatus Node::Verify() const { | |||||
const string aipp_data_type = "AippData"; | const string aipp_data_type = "AippData"; | ||||
const string const_type = "Const"; | const string const_type = "Const"; | ||||
const string variable_type = "Variable"; | const string variable_type = "Variable"; | ||||
bool is_unknown_graph = GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||||
GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr"); | GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr"); | ||||
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | |||||
if (in_anchor_ptr == nullptr) { | |||||
GELOGW("in anchor ptr is null"); | |||||
continue; | |||||
} | |||||
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || | |||||
op_->GetType() == const_type || op_->GetType() == variable_type || | |||||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||||
if (!valid_anchor) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"name", "index"}, | |||||
{GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | |||||
GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); | |||||
return GRAPH_FAILED; | |||||
if (!is_unknown_graph) { | |||||
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | |||||
GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); | |||||
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || | |||||
op_->GetType() == const_type || op_->GetType() == variable_type || | |||||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||||
if (!valid_anchor) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, | |||||
{GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | |||||
GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); | |||||
return GRAPH_FAILED; | |||||
} | |||||
} | } | ||||
} | } | ||||
string frameworkop_type = "FrameworkOp"; | string frameworkop_type = "FrameworkOp"; | ||||
if (op_->GetType() != frameworkop_type) { | |||||
bool need_update_name = op_->GetType() != frameworkop_type && !is_unknown_graph; | |||||
if (need_update_name) { | |||||
auto node_op = ge::OperatorFactoryImpl::CreateOperator("node_op", op_->GetType()); | auto node_op = ge::OperatorFactoryImpl::CreateOperator("node_op", op_->GetType()); | ||||
if (node_op.IsEmpty()) { | if (node_op.IsEmpty()) { | ||||
GELOGW("get op from OperatorFactory fail. opType: %s", op_->GetType().c_str()); | GELOGW("get op from OperatorFactory fail. opType: %s", op_->GetType().c_str()); | ||||
@@ -782,7 +780,7 @@ graphStatus Node::Verify() const { | |||||
} | } | ||||
node_op.BreakConnect(); | node_op.BreakConnect(); | ||||
} | } | ||||
GE_IF_BOOL_EXEC(is_unknown_graph, return GRAPH_SUCCESS;); | |||||
if (op_->CommonVerify() == GRAPH_SUCCESS) { | if (op_->CommonVerify() == GRAPH_SUCCESS) { | ||||
Operator op_proxy = ge::OpDescUtils::CreateOperatorFromNode(shared_from_this()); | Operator op_proxy = ge::OpDescUtils::CreateOperatorFromNode(shared_from_this()); | ||||
auto verify_func = op_->GetVerifyFunc(); | auto verify_func = op_->GetVerifyFunc(); | ||||
@@ -64,12 +64,6 @@ const std::string ATTR_NAME_IS_INPUT_CONST = "is_input_const"; | |||||
const std::string ATTR_NAME_OP_INFER_DEPENDS = "_op_infer_depends"; | const std::string ATTR_NAME_OP_INFER_DEPENDS = "_op_infer_depends"; | ||||
const std::string ATTR_NAME_OPT_INPUT = "_opt_input"; | |||||
const std::string ATTR_NAME_INPUT_NAME_IDX_KEY = "_input_name_idx_key"; | |||||
const std::string ATTR_NAME_INPUT_NAME_IDX_VALUE = "_input_name_idx_value"; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDesc::OpDesc() { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDesc::OpDesc() { | ||||
op_def_.InitDefault(); | op_def_.InitDefault(); | ||||
if (op_def_.GetProtoMsg() != nullptr) { | if (op_def_.GetProtoMsg() != nullptr) { | ||||
@@ -211,8 +205,7 @@ graphStatus OpDesc::AddInputDesc(uint32_t index, const ge::GeTensorDesc &input_d | |||||
} | } | ||||
graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { | graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { | ||||
auto input_name_idx = GetAllInputName(); | |||||
if (input_name_idx.find(name) != input_name_idx.end()) { | |||||
if (input_name_idx_.find(name) != input_name_idx_.end()) { | |||||
GELOGI("input %s is exist, update it", name.c_str()); | GELOGI("input %s is exist, update it", name.c_str()); | ||||
graphStatus ret = UpdateInputDesc(name, input_desc); | graphStatus ret = UpdateInputDesc(name, input_desc); | ||||
return ret; | return ret; | ||||
@@ -224,17 +217,15 @@ graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &inp | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
inputs_desc_.push_back(in_desc); | inputs_desc_.push_back(in_desc); | ||||
(void)input_name_idx.insert(make_pair(name, index)); | |||||
SetAllInputName(input_name_idx); | |||||
(void)input_name_idx_.insert(make_pair(name, index)); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
} | } | ||||
graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int num, size_t index) { | graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int num, size_t index) { | ||||
auto input_name_idx = GetAllInputName(); | |||||
for (unsigned int i = 0; i < num; i++) { | for (unsigned int i = 0; i < num; i++) { | ||||
string input_name = name + std::to_string(i); | string input_name = name + std::to_string(i); | ||||
GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED, | |||||
GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED, | |||||
"Add input tensor_desc is existed. name[%s]", input_name.c_str()); | "Add input tensor_desc is existed. name[%s]", input_name.c_str()); | ||||
std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc()); | std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc()); | ||||
@@ -251,24 +242,22 @@ graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int nu | |||||
(void)inputs_desc_.insert(inputs_desc_.begin() + index + i, in_desc); | (void)inputs_desc_.insert(inputs_desc_.begin() + index + i, in_desc); | ||||
// Update index in input_name_idx | // Update index in input_name_idx | ||||
for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) { | |||||
for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) { | |||||
if (it->second >= (index + i)) { | if (it->second >= (index + i)) { | ||||
it->second += 1; | it->second += 1; | ||||
} | } | ||||
} | } | ||||
(void)input_name_idx.insert(make_pair(input_name, i + index)); | |||||
(void)input_name_idx_.insert(make_pair(input_name, i + index)); | |||||
} | } | ||||
SetAllInputName(input_name_idx); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int num) { | graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int num) { | ||||
auto input_name_idx = GetAllInputName(); | |||||
for (unsigned int i = 0; i < num; i++) { | for (unsigned int i = 0; i < num; i++) { | ||||
string input_name = name + std::to_string(i); | string input_name = name + std::to_string(i); | ||||
GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED, | |||||
GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED, | |||||
"Add input tensor_desc is existed. name[%s]", input_name.c_str()); | "Add input tensor_desc is existed. name[%s]", input_name.c_str()); | ||||
std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc()); | std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc()); | ||||
@@ -279,13 +268,12 @@ graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int n | |||||
(void)inputs_desc_.insert(inputs_desc_.begin(), in_desc); | (void)inputs_desc_.insert(inputs_desc_.begin(), in_desc); | ||||
// Update index in input_name_idx | // Update index in input_name_idx | ||||
for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) { | |||||
for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) { | |||||
it->second += 1; | it->second += 1; | ||||
} | } | ||||
(void)input_name_idx.insert(make_pair(input_name, 0)); | |||||
(void)input_name_idx_.insert(make_pair(input_name, 0)); | |||||
} | } | ||||
SetAllInputName(input_name_idx); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -316,19 +304,10 @@ graphStatus OpDesc::AddOutputDescForward(const string &name, const unsigned int | |||||
graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { | graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { | ||||
if (OpDesc::AddInputDesc(name, input_desc) == GRAPH_FAILED) return GRAPH_FAILED; | if (OpDesc::AddInputDesc(name, input_desc) == GRAPH_FAILED) return GRAPH_FAILED; | ||||
vector<string> optional_input_names; | |||||
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||||
optional_input_names.push_back(name); | |||||
(void)AttrUtils::SetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||||
(void)optional_input_names_.insert(name); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
std::vector<string> OpDesc::GetAllOptionalInputName() const { | |||||
vector<string> optional_input_names; | |||||
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||||
return optional_input_names; | |||||
} | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | ||||
OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | ||||
GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); | GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); | ||||
@@ -343,12 +322,11 @@ OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | |||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescMembersAreEqual(const OpDesc &r_op_desc) const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescMembersAreEqual(const OpDesc &r_op_desc) const { | ||||
return ( | |||||
IsEqual(this->GetAllInputName(), r_op_desc.GetAllInputName(), "OpDesc.GetAllInputName()") && | |||||
IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") && | |||||
IsEqual(this->GetAllOptionalInputName(), r_op_desc.GetAllOptionalInputName(), "OpDesc.GetAllOptionalInputName()") && | |||||
IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") && | |||||
IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_")); | |||||
return (IsEqual(this->input_name_idx_, r_op_desc.input_name_idx_, "OpDesc.input_name_idx_") && | |||||
IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") && | |||||
IsEqual(this->optional_input_names_, r_op_desc.optional_input_names_, "OpDesc.optional_input_names_") && | |||||
IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") && | |||||
IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_")); | |||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescAttrsAreEqual(const OpDesc &r_op_desc) const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescAttrsAreEqual(const OpDesc &r_op_desc) const { | ||||
@@ -422,9 +400,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::operator==(const OpD | |||||
} | } | ||||
graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc &tensor_Desc) { | graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc &tensor_Desc) { | ||||
auto input_name_idx = GetAllInputName(); | |||||
auto it = input_name_idx.find(name); | |||||
if (it == input_name_idx.end()) { | |||||
auto it = input_name_idx_.find(name); | |||||
if (it == input_name_idx_.end()) { | |||||
GELOGW("Cann't find the input desc. name[%s]", name.c_str()); | GELOGW("Cann't find the input desc. name[%s]", name.c_str()); | ||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
@@ -444,9 +421,8 @@ graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc & | |||||
} | } | ||||
bool OpDesc::InputIsSet(const string &name) const { | bool OpDesc::InputIsSet(const string &name) const { | ||||
auto input_name_idx = GetAllInputName(); | |||||
auto it = input_name_idx.find(name); | |||||
if (it != input_name_idx.end()) { | |||||
auto it = input_name_idx_.find(name); | |||||
if (it != input_name_idx_.end()) { | |||||
GE_IF_BOOL_EXEC(it->second >= inputs_desc_.size(), GELOGE(GRAPH_FAILED, "it->second is invalid."); return false); | GE_IF_BOOL_EXEC(it->second >= inputs_desc_.size(), GELOGE(GRAPH_FAILED, "it->second is invalid."); return false); | ||||
auto tensor_desc = inputs_desc_[it->second]; | auto tensor_desc = inputs_desc_[it->second]; | ||||
GE_IF_BOOL_EXEC(tensor_desc == nullptr, GELOGE(GRAPH_FAILED, "tensor_desc is null."); return false); | GE_IF_BOOL_EXEC(tensor_desc == nullptr, GELOGE(GRAPH_FAILED, "tensor_desc is null."); return false); | ||||
@@ -464,9 +440,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc OpDesc::GetInputDesc | |||||
} | } | ||||
GeTensorDesc OpDesc::GetInputDesc(const string &name) const { | GeTensorDesc OpDesc::GetInputDesc(const string &name) const { | ||||
auto input_name_idx = GetAllInputName(); | |||||
auto it = input_name_idx.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), GeTensorDesc()); | |||||
auto it = input_name_idx_.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), GeTensorDesc()); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it->second < inputs_desc_.size(), GeTensorDesc()); | GE_CHK_BOOL_RET_STATUS_NOLOG(it->second < inputs_desc_.size(), GeTensorDesc()); | ||||
return *(inputs_desc_[it->second].get()); | return *(inputs_desc_[it->second].get()); | ||||
} | } | ||||
@@ -476,7 +451,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDescPtr OpDesc::MutableIn | |||||
if (inputs_desc_[index] == nullptr) { | if (inputs_desc_[index] == nullptr) { | ||||
return nullptr; | return nullptr; | ||||
} | } | ||||
GE_CHK_BOOL_RET_STATUS(inputs_desc_[index]->IsValid() == GRAPH_SUCCESS, nullptr, "input desc is invalid"); | |||||
if (inputs_desc_[index]->IsValid() != GRAPH_SUCCESS) { | |||||
GELOGW("input desc is invalid"); | |||||
return nullptr; | |||||
} | |||||
return inputs_desc_[index]; | return inputs_desc_[index]; | ||||
} | } | ||||
@@ -491,12 +469,11 @@ GeTensorDescPtr OpDesc::MutableInputDesc(const string &name) const { | |||||
} | } | ||||
GE_FUNC_HOST_VISIBILITY OpDesc::Vistor<string> OpDesc::GetAllInputNames() const { | GE_FUNC_HOST_VISIBILITY OpDesc::Vistor<string> OpDesc::GetAllInputNames() const { | ||||
auto input_name_idx = GetAllInputName(); | |||||
vector<string> names; | vector<string> names; | ||||
if (input_name_idx.empty()) { | |||||
if (input_name_idx_.empty()) { | |||||
return OpDesc::Vistor<string>(shared_from_this(), names); | return OpDesc::Vistor<string>(shared_from_this(), names); | ||||
} | } | ||||
for (std::pair<string, uint32_t> input : input_name_idx) { | |||||
for (std::pair<string, uint32_t> input : input_name_idx_) { | |||||
names.push_back(input.first); | names.push_back(input.first); | ||||
} | } | ||||
return OpDesc::Vistor<string>(shared_from_this(), names); | return OpDesc::Vistor<string>(shared_from_this(), names); | ||||
@@ -672,9 +649,8 @@ OpDesc::GetInputDescPtrDfault(uint32_t index) const { | |||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ConstGeTensorDescPtr OpDesc::GetInputDescPtr(const string &name) const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ConstGeTensorDescPtr OpDesc::GetInputDescPtr(const string &name) const { | ||||
auto input_name_idx = GetAllInputName(); | |||||
auto it = input_name_idx.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), shared_ptr<const GeTensorDesc>()); | |||||
auto it = input_name_idx_.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), shared_ptr<const GeTensorDesc>()); | |||||
return inputs_desc_[it->second]; | return inputs_desc_[it->second]; | ||||
} | } | ||||
@@ -708,45 +684,12 @@ graphStatus OpDesc::AddDynamicOutputDesc(const string &name, const unsigned int | |||||
} | } | ||||
bool OpDesc::IsOptionalInput(const string &name) const { | bool OpDesc::IsOptionalInput(const string &name) const { | ||||
vector<string> optional_input_names; | |||||
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||||
for (auto &item : optional_input_names) { | |||||
if (item == name) { | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
return optional_input_names_.find(name) != optional_input_names_.end(); | |||||
} | } | ||||
bool OpDesc::IsOptionalInput(uint32_t index) const { return IsOptionalInput(GetInputNameByIndex(index)); } | bool OpDesc::IsOptionalInput(uint32_t index) const { return IsOptionalInput(GetInputNameByIndex(index)); } | ||||
std::map<string, uint32_t> OpDesc::GetAllInputName() const { | |||||
std::map<string, uint32_t> input_name_idx; | |||||
std::vector<string> key; | |||||
std::vector<uint32_t> value; | |||||
(void)AttrUtils::GetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key); | |||||
(void)AttrUtils::GetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value); | |||||
if (key.size() != value.size()) { | |||||
GE_LOGE("twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); | |||||
} else { | |||||
for (uint32_t i = 0; i < key.size(); ++i) { | |||||
input_name_idx.insert(std::pair<string, uint32_t>(key.at(i), value.at(i))); | |||||
} | |||||
} | |||||
return input_name_idx; | |||||
} | |||||
void OpDesc::SetAllInputName(const std::map<string, uint32_t> &input_name_idx) { | |||||
std::vector<string> key; | |||||
std::vector<uint32_t> value; | |||||
for (auto &item : input_name_idx) { | |||||
key.emplace_back(item.first); | |||||
value.emplace_back(item.second); | |||||
} | |||||
(void)AttrUtils::SetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key); | |||||
(void)AttrUtils::SetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value); | |||||
} | |||||
std::map<string, uint32_t> OpDesc::GetAllInputName() const { return input_name_idx_; } | |||||
std::map<string, uint32_t> OpDesc::GetAllOutputName() { return output_name_idx_; } | std::map<string, uint32_t> OpDesc::GetAllOutputName() { return output_name_idx_; } | ||||
@@ -757,7 +700,6 @@ bool OpDesc::UpdateInputName(std::map<string, uint32_t> input_name_idx) { | |||||
auto factory_map_size = input_name_idx.size(); | auto factory_map_size = input_name_idx.size(); | ||||
// It indicates that some inputs have no optionalname. | // It indicates that some inputs have no optionalname. | ||||
// The redundant optionalname of factory needs to be deleted and then assigned | // The redundant optionalname of factory needs to be deleted and then assigned | ||||
auto all_input_name_idx = GetAllInputName(); | |||||
if (input_map_size < factory_map_size) { | if (input_map_size < factory_map_size) { | ||||
GELOGI("UpdateInputName org inputname map size: %zu, factory inputname map size: %zu", input_map_size, | GELOGI("UpdateInputName org inputname map size: %zu, factory inputname map size: %zu", input_map_size, | ||||
factory_map_size); | factory_map_size); | ||||
@@ -770,18 +712,17 @@ bool OpDesc::UpdateInputName(std::map<string, uint32_t> input_name_idx) { | |||||
} | } | ||||
if (input_name_idx.size() == input_map_size) { | if (input_name_idx.size() == input_map_size) { | ||||
GELOGI("UpdateInputName"); | GELOGI("UpdateInputName"); | ||||
all_input_name_idx = input_name_idx; | |||||
input_name_idx_ = input_name_idx; | |||||
} else { | } else { | ||||
ret = false; | ret = false; | ||||
GELOGW("after UpdateInputName factoryName map size : %zu", input_name_idx.size()); | GELOGW("after UpdateInputName factoryName map size : %zu", input_name_idx.size()); | ||||
} | } | ||||
} else if (input_map_size == factory_map_size) { | } else if (input_map_size == factory_map_size) { | ||||
all_input_name_idx = input_name_idx; | |||||
input_name_idx_ = input_name_idx; | |||||
} else { | } else { | ||||
ret = false; | ret = false; | ||||
GELOGW("org inputname map size: %zu, factory inputname map size: %zu", input_map_size, factory_map_size); | GELOGW("org inputname map size: %zu, factory inputname map size: %zu", input_map_size, factory_map_size); | ||||
} | } | ||||
SetAllInputName(all_input_name_idx); | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -924,21 +865,19 @@ graphStatus OpDesc::CommonVerify() const { | |||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetInputNameByIndex(uint32_t index) const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetInputNameByIndex(uint32_t index) const { | ||||
auto input_name_idx = GetAllInputName(); | |||||
auto it = input_name_idx.begin(); | |||||
for (; it != input_name_idx.end(); ++it) { | |||||
auto it = input_name_idx_.begin(); | |||||
for (; it != input_name_idx_.end(); ++it) { | |||||
if (it->second == index) { | if (it->second == index) { | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), ""); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), ""); | |||||
return it->first; | return it->first; | ||||
} | } | ||||
int OpDesc::GetInputIndexByName(const string &name) const { | int OpDesc::GetInputIndexByName(const string &name) const { | ||||
auto input_name_idx = GetAllInputName(); | |||||
auto it_find = input_name_idx.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx.end(), -1); | |||||
auto it_find = input_name_idx_.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx_.end(), -1); | |||||
return static_cast<int>(it_find->second); | return static_cast<int>(it_find->second); | ||||
} | } | ||||
@@ -1231,12 +1170,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector<bool> OpDesc::GetIsInputCo | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::RestoreInputNameIdx(const string &name, | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::RestoreInputNameIdx(const string &name, | ||||
const int &index) { | const int &index) { | ||||
auto input_name_idx = GetAllInputName(); | |||||
if (input_name_idx.find(name) != input_name_idx.end()) { | |||||
if (input_name_idx_.find(name) != input_name_idx_.end()) { | |||||
GELOGI("Restore input name index is existed. name[%s]", name.c_str()); | GELOGI("Restore input name index is existed. name[%s]", name.c_str()); | ||||
} | } | ||||
(void)input_name_idx.insert(make_pair(name, index)); | |||||
SetAllInputName(input_name_idx); | |||||
(void)input_name_idx_.insert(make_pair(name, index)); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -170,6 +170,7 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile( | |||||
// data_nodes has been sorted | // data_nodes has been sorted | ||||
// for while, input num must be same as output num | // for while, input num must be same as output num | ||||
auto input_num = root_node->GetAllInDataAnchorsSize(); | auto input_num = root_node->GetAllInDataAnchorsSize(); | ||||
NodePtr netoutput = nullptr; | |||||
size_t ref_i = 0; | size_t ref_i = 0; | ||||
while (ref_i < input_num) { | while (ref_i < input_num) { | ||||
@@ -212,10 +213,44 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile( | |||||
cell_netoutput_in.in_out = NODE_IN; | cell_netoutput_in.in_out = NODE_IN; | ||||
cell_netoutput_in.in_out_idx = ele.second; | cell_netoutput_in.in_out_idx = ele.second; | ||||
ref_i_all_refs.emplace_back(cell_netoutput_in); | ref_i_all_refs.emplace_back(cell_netoutput_in); | ||||
netoutput = ele.first; | |||||
} | } | ||||
node_refs.emplace_back(ref_i_all_refs); | node_refs.emplace_back(ref_i_all_refs); | ||||
ref_i++; | ref_i++; | ||||
} | } | ||||
/* There exist scene like the follows, it means data0 data1 netoutput 0'th | |||||
* and 1'th tensor should be the same addr. | |||||
* Data0 Data1 | |||||
* \/ | |||||
* /\ | |||||
* netoutput | |||||
*/ | |||||
if (netoutput == nullptr) { | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
for (const auto &in_anchor : netoutput->GetAllInDataAnchors()) { | |||||
auto peer_out_data_anchor = in_anchor->GetPeerOutAnchor(); | |||||
if (peer_out_data_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto peer_out_data_node = peer_out_data_anchor->GetOwnerNode(); | |||||
if (peer_out_data_node == nullptr || peer_out_data_node->GetOpDesc() == nullptr) { | |||||
GELOGW("Node[%s]\'s peer_out_data_node or peer_out_data_node desc is null", (netoutput->GetName()).c_str()); | |||||
continue; | |||||
} | |||||
if (peer_out_data_node->GetType() != DATA) { | |||||
continue; | |||||
} | |||||
auto in_data_anchor_idx = in_anchor->GetIdx(); | |||||
auto net_in_desc = netoutput->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx)); | |||||
int ref_d; | |||||
int ref_n; | |||||
(void)AttrUtils::GetInt(peer_out_data_node->GetOpDesc(), kRefIndex, ref_d); | |||||
(void)AttrUtils::GetInt(net_in_desc, kRefIndex, ref_n); | |||||
node_refs[ref_d].insert(node_refs[ref_d].end(), node_refs[ref_n].begin(), node_refs[ref_n].end()); | |||||
node_refs[ref_n].insert(node_refs[ref_n].end(), node_refs[ref_d].begin(), node_refs[ref_d].end()); | |||||
} | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -49,10 +49,6 @@ graphStatus ReverseBrushWhileBodySubGraph(const ConstNodePtr &node) { | |||||
} | } | ||||
for (const auto &node_sub : sub_graph_body->GetAllNodes()) { | for (const auto &node_sub : sub_graph_body->GetAllNodes()) { | ||||
if (node_sub->GetInDataNodes().size() == 0) { | |||||
continue; | |||||
} | |||||
for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) { | for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) { | ||||
auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i); | auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i); | ||||
(void)input_desc->SetUnknownDimNumShape(); | (void)input_desc->SetUnknownDimNumShape(); | ||||
@@ -303,11 +299,11 @@ graphStatus UpdateParentNodeOutTensor(const ConstNodePtr &node) { | |||||
} | } | ||||
} // namespace | } // namespace | ||||
void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) { | void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) { | ||||
if (node == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "node is null"); | |||||
if (!IsLogEnable(GE, DLOG_DEBUG)) { | |||||
return; | return; | ||||
} | } | ||||
if (!IsLogEnable(GE, DLOG_DEBUG)) { | |||||
if (node == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "node is null"); | |||||
return; | return; | ||||
} | } | ||||
ge::OpDescPtr op_desc = node->GetOpDesc(); | ge::OpDescPtr op_desc = node->GetOpDesc(); | ||||
@@ -325,6 +321,18 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str | |||||
TypeUtils::FormatToSerialString(input_desc->GetFormat()) + " "; | TypeUtils::FormatToSerialString(input_desc->GetFormat()) + " "; | ||||
} | } | ||||
str += input_desc_str; | str += input_desc_str; | ||||
input_desc_str = "input origin shape: "; | |||||
for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | |||||
input_desc_str += "["; | |||||
for (int64_t dim : input_desc->GetOriginShape().GetDims()) { | |||||
input_desc_str += std::to_string(dim) + " "; | |||||
} | |||||
input_desc_str += "]"; | |||||
input_desc_str += ":" + TypeUtils::DataTypeToSerialString(input_desc->GetOriginDataType()) + ":" + | |||||
TypeUtils::FormatToSerialString(input_desc->GetOriginFormat()) + " "; | |||||
} | |||||
str += input_desc_str; | |||||
} | } | ||||
if (op_desc->GetAllOutputsDescSize() != 0) { | if (op_desc->GetAllOutputsDescSize() != 0) { | ||||
@@ -342,6 +350,21 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str | |||||
TypeUtils::FormatToSerialString(output_desc->GetFormat()) + " "; | TypeUtils::FormatToSerialString(output_desc->GetFormat()) + " "; | ||||
} | } | ||||
str += output_desc_str; | str += output_desc_str; | ||||
output_desc_str = "output origin shape: "; | |||||
for (const auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||||
if (output_desc == nullptr) { | |||||
continue; | |||||
} | |||||
output_desc_str += "["; | |||||
for (int64_t dim : output_desc->GetOriginShape().GetDims()) { | |||||
output_desc_str += std::to_string(dim) + " "; | |||||
} | |||||
output_desc_str += "]"; | |||||
output_desc_str += ":" + TypeUtils::DataTypeToSerialString(output_desc->GetOriginDataType()) + ":" + | |||||
TypeUtils::FormatToSerialString(output_desc->GetOriginFormat()) + " "; | |||||
} | |||||
str += output_desc_str; | |||||
} | } | ||||
GELOGD("Shape dump [%s], Node name: [%s]. %s", phase.c_str(), node->GetName().c_str(), str.c_str()); | GELOGD("Shape dump [%s], Node name: [%s]. %s", phase.c_str(), node->GetName().c_str(), str.c_str()); | ||||
} | } | ||||
@@ -362,7 +385,6 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & | |||||
return ret; | return ret; | ||||
} | } | ||||
} | } | ||||
// Get infer func and execute | // Get infer func and execute | ||||
ret = op_desc->CallInferFunc(op); | ret = op_desc->CallInferFunc(op); | ||||
if (ret == GRAPH_PARAM_INVALID) { | if (ret == GRAPH_PARAM_INVALID) { | ||||
@@ -479,19 +501,20 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh | |||||
GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str()); | GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str()); | ||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
PrintInOutTensorShape(node, "before_infershape"); | |||||
Operator op = OpDescUtils::CreateOperatorFromNode(node); | |||||
auto inference_context = CreateInferenceContext(context_map, node); | |||||
if (inference_context == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "inference context is null"); | |||||
return GRAPH_FAILED; | |||||
bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||||
if (!is_unknown_graph) { | |||||
auto inference_context = CreateInferenceContext(context_map, node); | |||||
if (inference_context == nullptr) { | |||||
GELOGE(GRAPH_FAILED, "inference context is null"); | |||||
return GRAPH_FAILED; | |||||
} | |||||
GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size()); | |||||
op.SetInferenceContext(inference_context); | |||||
} | } | ||||
GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size()); | |||||
PrintInOutTensorShape(node, "before_infershape"); | |||||
Operator op = OpDescUtils::CreateOperatorFromNode(node); | |||||
op.SetInferenceContext(inference_context); | |||||
graphStatus status = InferShapeAndType(node, op, before_subgraph); | graphStatus status = InferShapeAndType(node, op, before_subgraph); | ||||
if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) { | if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) { | ||||
(void)ge::NodeUtils::UpdatePeerNodeInputDesc(node); | (void)ge::NodeUtils::UpdatePeerNodeInputDesc(node); | ||||
@@ -499,16 +522,17 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh | |||||
GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str()); | GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str()); | ||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
auto ctx_after_infer = op.GetInferenceContext(); | |||||
if (ctx_after_infer != nullptr) { | |||||
GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); | |||||
if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) { | |||||
GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); | |||||
(void)context_map.emplace(node, ctx_after_infer); | |||||
if (!is_unknown_graph) { | |||||
auto ctx_after_infer = op.GetInferenceContext(); | |||||
if (ctx_after_infer != nullptr) { | |||||
GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); | |||||
if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) { | |||||
GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(), | |||||
ctx_after_infer->GetMarks().size()); | |||||
(void)context_map.emplace(node, ctx_after_infer); | |||||
} | |||||
} | } | ||||
} | } | ||||
PrintInOutTensorShape(node, "after_infershape"); | PrintInOutTensorShape(node, "after_infershape"); | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
@@ -1,18 +1,18 @@ | |||||
/** | /** | ||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
* You may obtain a copy of the License at | * You may obtain a copy of the License at | ||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | * http://www.apache.org/licenses/LICENSE-2.0 | ||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | * Unless required by applicable law or agreed to in writing, software | ||||
* distributed under the License is distributed on an "AS IS" BASIS, | * distributed under the License is distributed on an "AS IS" BASIS, | ||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
* See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
* limitations under the License. | * limitations under the License. | ||||
*/ | |||||
*/ | |||||
#ifndef COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ | #ifndef COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ | ||||
#define COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ | #define COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ | ||||
@@ -295,14 +295,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||||
if (is_unknown_graph) { | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) { | for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) { | ||||
auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | ||||
ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size())); | ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size())); | ||||
bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||||
if (!is_unknown_graph) { | |||||
output_tensor->SetOriginShape(output_tensor->GetShape()); | |||||
output_tensor->SetOriginDataType(output_tensor->GetDataType()); | |||||
} | |||||
output_tensor->SetOriginShape(output_tensor->GetShape()); | |||||
output_tensor->SetOriginDataType(output_tensor->GetDataType()); | |||||
GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s", | GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s", | ||||
node_ptr->GetName().c_str(), output_tensor->GetOriginShape().GetShapeSize(), | node_ptr->GetName().c_str(), output_tensor->GetOriginShape().GetShapeSize(), | ||||
TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(), | TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(), | ||||
@@ -321,8 +323,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||||
GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", | GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", | ||||
peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), | peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), | ||||
output_tensor->GetDataType(), output_tensor->GetOriginDataType()); | output_tensor->GetDataType(), output_tensor->GetOriginDataType()); | ||||
peer_input_desc->SetShape(output_tensor->GetShape()); | |||||
peer_input_desc->SetOriginShape(output_tensor->GetOriginShape()); | peer_input_desc->SetOriginShape(output_tensor->GetOriginShape()); | ||||
peer_input_desc->SetShape(output_tensor->GetShape()); | |||||
peer_input_desc->SetDataType(output_tensor->GetDataType()); | peer_input_desc->SetDataType(output_tensor->GetDataType()); | ||||
peer_input_desc->SetOriginDataType(output_tensor->GetOriginDataType()); | peer_input_desc->SetOriginDataType(output_tensor->GetOriginDataType()); | ||||
std::vector<std::pair<int64_t, int64_t>> shape_range; | std::vector<std::pair<int64_t, int64_t>> shape_range; | ||||
@@ -337,6 +339,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||||
} | } | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
bool NodeUtils::IsInNodesEmpty(const Node &node) { | bool NodeUtils::IsInNodesEmpty(const Node &node) { | ||||
for (const auto &in_anchor : node.in_data_anchors_) { | for (const auto &in_anchor : node.in_data_anchors_) { | ||||
if (in_anchor != nullptr) { | if (in_anchor != nullptr) { | ||||
@@ -446,6 +449,7 @@ std::string NodeUtils::GetNodeType(const Node &node) { | |||||
(void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type); | (void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type); | ||||
return type; | return type; | ||||
} | } | ||||
ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | ||||
auto op_desc = node.GetOpDesc(); | auto op_desc = node.GetOpDesc(); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -498,6 +502,14 @@ bool NodeUtils::IsSubgraphInput(const NodePtr &node) { | |||||
return false; | return false; | ||||
} | } | ||||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | ||||
bool is_unknown_shape = false; | |||||
(void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||||
if (is_unknown_shape) return false; | |||||
} | |||||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && | |||||
kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && | |||||
kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { | |||||
return false; | return false; | ||||
} | } | ||||
@@ -519,7 +531,16 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) { | |||||
if (parent_op_desc == nullptr) { | if (parent_op_desc == nullptr) { | ||||
return false; | return false; | ||||
} | } | ||||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | ||||
bool is_unknown_shape = false; | |||||
(void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||||
if (is_unknown_shape) return false; | |||||
} | |||||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && | |||||
kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && | |||||
kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { | |||||
return false; | return false; | ||||
} | } | ||||
@@ -95,7 +95,18 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | ||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | ||||
"graph/load/new_model_manager/task_info/task_info.cc" | "graph/load/new_model_manager/task_info/task_info.cc" | ||||
"graph/manager/*.cc" | |||||
"graph/manager/graph_caching_allocator.cc" | |||||
"graph/manager/graph_context.cc" | |||||
"graph/manager/graph_manager.cc" | |||||
"graph/manager/graph_manager_utils.cc" | |||||
"graph/manager/graph_mem_allocator.cc" | |||||
"graph/manager/graph_var_manager.cc" | |||||
"graph/manager/model_manager/event_manager.cc" | |||||
"graph/manager/trans_var_data_utils.cc" | |||||
"graph/manager/util/debug.cc" | |||||
"graph/manager/util/hcom_util.cc" | |||||
"graph/manager/util/rt_context_util.cc" | |||||
"graph/manager/util/variable_accelerate_ctrl.cc" | |||||
"graph/manager/model_manager/event_manager.cc" | "graph/manager/model_manager/event_manager.cc" | ||||
"graph/manager/util/debug.cc" | "graph/manager/util/debug.cc" | ||||
"graph/manager/util/hcom_util.cc" | "graph/manager/util/hcom_util.cc" | ||||
@@ -240,7 +251,17 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | "graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | ||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | "graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | ||||
"graph/load/new_model_manager/task_info/task_info.cc" | "graph/load/new_model_manager/task_info/task_info.cc" | ||||
"graph/manager/*.cc" | |||||
"graph/manager/graph_caching_allocator.cc" | |||||
"graph/manager/graph_context.cc" | |||||
"graph/manager/graph_manager.cc" | |||||
"graph/manager/graph_manager_utils.cc" | |||||
"graph/manager/graph_mem_allocator.cc" | |||||
"graph/manager/graph_var_manager.cc" | |||||
"graph/manager/model_manager/event_manager.cc" | |||||
"graph/manager/trans_var_data_utils.cc" | |||||
"graph/manager/util/debug.cc" | |||||
"graph/manager/util/rt_context_util.cc" | |||||
"graph/manager/util/variable_accelerate_ctrl.cc" | |||||
"graph/manager/model_manager/event_manager.cc" | "graph/manager/model_manager/event_manager.cc" | ||||
"graph/manager/util/debug.cc" | "graph/manager/util/debug.cc" | ||||
"graph/manager/util/rt_context_util.cc" | "graph/manager/util/rt_context_util.cc" | ||||
@@ -54,6 +54,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"helper/om_file_helper.cc" | "helper/om_file_helper.cc" | ||||
"math/fp16_math.cc" | "math/fp16_math.cc" | ||||
"model_parser/base.cc" | "model_parser/base.cc" | ||||
# "model_parser/graph_parser_util.cc" | |||||
"model_saver.cc" | "model_saver.cc" | ||||
"op/attr_value_util.cc" | "op/attr_value_util.cc" | ||||
"op/ge_op_utils.cc" | "op/ge_op_utils.cc" | ||||
@@ -21,7 +21,6 @@ | |||||
#include <sstream> | #include <sstream> | ||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
#include "external/graph/types.h" | #include "external/graph/types.h" | ||||
#include "graph/ge_tensor.h" | #include "graph/ge_tensor.h" | ||||
@@ -182,7 +182,7 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { | |||||
} | } | ||||
void TBEPluginManager::LoadCustomOpLib() { | void TBEPluginManager::LoadCustomOpLib() { | ||||
LoadPluginSo(); | |||||
LoadPluginSo(options_); | |||||
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | ||||
GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | ||||
@@ -193,10 +193,13 @@ void TBEPluginManager::LoadCustomOpLib() { | |||||
} | } | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() { | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo( | |||||
const std::map<string, string> &options) { | |||||
vector<string> file_list; | vector<string> file_list; | ||||
string caffe_parser_path; | string caffe_parser_path; | ||||
std::string plugin_path; | std::string plugin_path; | ||||
options_ = options; | |||||
GetCustomOpPath(plugin_path); | GetCustomOpPath(plugin_path); | ||||
// Whether there are files in the plugin so path | // Whether there are files in the plugin so path | ||||
@@ -48,7 +48,7 @@ class TBEPluginManager { | |||||
static void InitPreparation(const std::map<string, string> &options); | static void InitPreparation(const std::map<string, string> &options); | ||||
void LoadPluginSo(); | |||||
void LoadPluginSo(const std::map<string, string> &options); | |||||
private: | private: | ||||
TBEPluginManager() = default; | TBEPluginManager() = default; | ||||
@@ -36,6 +36,7 @@ GE_COMMON_LOCAL_SRC_FILES := \ | |||||
properties_manager.cc \ | properties_manager.cc \ | ||||
types.cc\ | types.cc\ | ||||
model_parser/base.cc \ | model_parser/base.cc \ | ||||
model_parser/graph_parser_util.cc \ | |||||
tbe_kernel_store.cc \ | tbe_kernel_store.cc \ | ||||
op/attr_value_util.cc \ | op/attr_value_util.cc \ | ||||
op/ge_op_utils.cc \ | op/ge_op_utils.cc \ | ||||
@@ -91,9 +91,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||||
} | } | ||||
auto ge_model_weight = ge_model->GetWeight(); | auto ge_model_weight = ge_model->GetWeight(); | ||||
GELOGI("WEIGHTS_DATA size is %zu , %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); | GELOGI("WEIGHTS_DATA size is %zu , %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); | ||||
if (SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, ge_model_weight.GetData(), | |||||
ge_model_weight.GetSize()) != SUCCESS) { | |||||
GELOGW("Add weight partition failed"); // weight is not necessary | |||||
// weight is not necessary | |||||
if (ge_model_weight.GetSize() > 0) { | |||||
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, | |||||
ge_model_weight.GetData(), ge_model_weight.GetSize()), | |||||
"Add weight partition failed"); | |||||
} | } | ||||
TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); | TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); | ||||
@@ -239,45 +241,48 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(const ge::ModelData &model_data) { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(const ge::ModelData &model_data) { | ||||
if (model_data.model_data == nullptr || model_data.model_len == 0) { | if (model_data.model_data == nullptr || model_data.model_len == 0) { | ||||
GELOGE(FAILED, "Model_data is nullptr, or model_data_size is 0"); | |||||
return FAILED; | |||||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); | |||||
return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
} | } | ||||
if (is_assign_model_) { | if (is_assign_model_) { | ||||
GELOGE(FAILED, "Model helper has already loaded!"); | |||||
return FAILED; | |||||
GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||||
return GE_EXEC_LOAD_MODEL_REPEATED; | |||||
} | } | ||||
if (ReleaseLocalModelData() != SUCCESS) { | if (ReleaseLocalModelData() != SUCCESS) { | ||||
GELOGE(FAILED, "ReleaseLocalModelData failed."); | |||||
return FAILED; | |||||
GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed."); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||||
if (ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_) != SUCCESS) { | if (ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_) != SUCCESS) { | ||||
GELOGE(FAILED, "Parse model content failed!"); | |||||
return FAILED; | |||||
GELOGE(status, "Parse model content failed!"); | |||||
return status; | |||||
} | } | ||||
file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | ||||
OmFileLoadHelper om_load_helper; | OmFileLoadHelper om_load_helper; | ||||
if (om_load_helper.Init(model_addr_tmp_, model_len_tmp_) != SUCCESS) { | |||||
GELOGE(FAILED, "Om_load_helper init failed"); | |||||
status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Om_load_helper init failed"); | |||||
model_addr_tmp_ = nullptr; | model_addr_tmp_ = nullptr; | ||||
return FAILED; | |||||
return status; | |||||
} | } | ||||
auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_addr_tmp_); | auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_addr_tmp_); | ||||
if (partition_table->num == kOriginalOmPartitionNum) { | if (partition_table->num == kOriginalOmPartitionNum) { | ||||
model_addr_tmp_ = nullptr; | model_addr_tmp_ = nullptr; | ||||
GELOGE(FAILED, "om model is error,please use executable om model"); | |||||
return FAILED; | |||||
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model"); | |||||
return GE_EXEC_MODEL_PARTITION_NUM_INVALID; | |||||
} | } | ||||
// Encrypt model need to del temp model/no encrypt model don't need to del model | // Encrypt model need to del temp model/no encrypt model don't need to del model | ||||
model_addr_tmp_ = nullptr; | model_addr_tmp_ = nullptr; | ||||
if (GenerateGeModel(om_load_helper) != SUCCESS) { | |||||
GELOGE(FAILED, "GenerateGeModel failed"); | |||||
return FAILED; | |||||
status = GenerateGeModel(om_load_helper); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "GenerateGeModel failed"); | |||||
return status; | |||||
} | } | ||||
is_assign_model_ = true; | is_assign_model_ = true; | ||||
@@ -289,19 +294,19 @@ Status ModelHelper::GenerateGeModel(OmFileLoadHelper &om_load_helper) { | |||||
GE_CHECK_NOTNULL(model_); | GE_CHECK_NOTNULL(model_); | ||||
Status ret = LoadModelData(om_load_helper); | Status ret = LoadModelData(om_load_helper); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | |||||
return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||||
} | } | ||||
ret = LoadWeights(om_load_helper); | ret = LoadWeights(om_load_helper); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | |||||
return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||||
} | } | ||||
ret = LoadTask(om_load_helper); | ret = LoadTask(om_load_helper); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | |||||
return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||||
} | } | ||||
ret = LoadTBEKernelStore(om_load_helper); | ret = LoadTBEKernelStore(om_load_helper); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return ret; | |||||
return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -41,8 +41,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(c | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data, | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data, | ||||
const uint32_t model_data_size) { | const uint32_t model_data_size) { | ||||
if (LoadModelPartitionTable(model_data, model_data_size) != SUCCESS) { | |||||
return FAILED; | |||||
Status status = LoadModelPartitionTable(model_data, model_data_size); | |||||
if (status != SUCCESS) { | |||||
return status; | |||||
} | } | ||||
is_inited_ = true; | is_inited_ = true; | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -66,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod | |||||
} | } | ||||
if (!found) { | if (!found) { | ||||
if (type != ModelPartitionType::TBE_KERNELS) { | |||||
if (type != ModelPartitionType::TBE_KERNELS && type != ModelPartitionType::WEIGHTS_DATA) { | |||||
GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type)); | GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type)); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -83,7 +84,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { | |||||
// Model length too small | // Model length too small | ||||
if (model.model_len < (sizeof(ModelFileHeader) + sizeof(ModelPartitionTable))) { | if (model.model_len < (sizeof(ModelFileHeader) + sizeof(ModelPartitionTable))) { | ||||
GELOGE(PARAM_INVALID, "Invalid model. length < sizeof(ModelFileHeader) + sizeof(ModelPartitionTable)."); | |||||
GELOGE(PARAM_INVALID, | |||||
"Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].", | |||||
model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable)); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -93,9 +96,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { | |||||
if ((model_header->length != model.model_len - sizeof(ModelFileHeader)) || | if ((model_header->length != model.model_len - sizeof(ModelFileHeader)) || | ||||
(MODEL_FILE_MAGIC_NUM != model_header->magic)) { | (MODEL_FILE_MAGIC_NUM != model_header->magic)) { | ||||
GELOGE(PARAM_INVALID, | GELOGE(PARAM_INVALID, | ||||
"Invalid model. file_header->length(%u) + sizeof(ModelFileHeader)(%zu) != model->model_len(%u) || " | |||||
"MODEL_FILE_MAGIC_NUM != file_header->magic", | |||||
model_header->length, sizeof(ModelFileHeader), model.model_len); | |||||
"Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " | |||||
"MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", | |||||
model_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, model_header->magic); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -112,16 +115,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
// Original model partition include graph-info | // Original model partition include graph-info | ||||
if ((partition_table->num != PARTITION_SIZE) && (partition_table->num != (PARTITION_SIZE - 1)) && | if ((partition_table->num != PARTITION_SIZE) && (partition_table->num != (PARTITION_SIZE - 1)) && | ||||
(partition_table->num != 1)) { | (partition_table->num != 1)) { | ||||
GELOGE(PARAM_INVALID, "Invalid partition_table->num:%u", partition_table->num); | |||||
return PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "Invalid partition_table->num:%u", partition_table->num); | |||||
return GE_EXEC_MODEL_PARTITION_NUM_INVALID; | |||||
} | } | ||||
size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | ||||
GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | ||||
partition_table->num, sizeof(ModelFileHeader), mem_offset); | partition_table->num, sizeof(ModelFileHeader), mem_offset); | ||||
if (model_data_size <= mem_offset) { | if (model_data_size <= mem_offset) { | ||||
GELOGE(PARAM_INVALID, "invalid model data, partition_table->num:%u, model data size %u", partition_table->num, | |||||
model_data_size); | |||||
return PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||||
partition_table->num, model_data_size); | |||||
return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
} | } | ||||
for (uint32_t i = 0; i < partition_table->num; i++) { | for (uint32_t i = 0; i < partition_table->num; i++) { | ||||
ModelPartition partition; | ModelPartition partition; | ||||
@@ -131,9 +134,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||||
context_.partition_datas_.push_back(partition); | context_.partition_datas_.push_back(partition); | ||||
if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { | if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { | ||||
GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.", | |||||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||||
partition.size + mem_offset, model_data_size); | partition.size + mem_offset, model_data_size); | ||||
return PARAM_INVALID; | |||||
return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||||
} | } | ||||
mem_offset += partition.size; | mem_offset += partition.size; | ||||
GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | ||||
@@ -35,15 +35,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||||
ge::ModelData &model_data) { | ge::ModelData &model_data) { | ||||
std::string real_path = RealPath(model_path); | std::string real_path = RealPath(model_path); | ||||
if (real_path.empty()) { | if (real_path.empty()) { | ||||
GELOGE(PARAM_INVALID, "Model file path '%s' is invalid", model_path); | |||||
return PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||||
return GE_EXEC_MODEL_PATH_INVALID; | |||||
} | } | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return FAILED, "File size not valid."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return GE_EXEC_READ_MODEL_FILE_FAILED, | |||||
"File size not valid."); | |||||
std::ifstream fs(real_path.c_str(), std::ifstream::binary); | std::ifstream fs(real_path.c_str(), std::ifstream::binary); | ||||
GE_CHK_BOOL_RET_STATUS(fs.is_open(), FAILED, "Open file failed! path:%s", model_path); | |||||
GE_CHK_BOOL_RET_STATUS(fs.is_open(), GE_EXEC_READ_MODEL_FILE_FAILED, "Open file failed! path:%s", model_path); | |||||
// get length of file: | // get length of file: | ||||
(void)fs.seekg(0, std::ifstream::end); | (void)fs.seekg(0, std::ifstream::end); | ||||
@@ -55,7 +56,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||||
char *data = new (std::nothrow) char[len]; | char *data = new (std::nothrow) char[len]; | ||||
if (data == nullptr) { | if (data == nullptr) { | ||||
GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%ld)", len); | |||||
GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%u)", len); | |||||
return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
} | } | ||||
@@ -79,31 +80,33 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo | |||||
GE_CHECK_NOTNULL(model.model_data); | GE_CHECK_NOTNULL(model.model_data); | ||||
// Model length too small | // Model length too small | ||||
GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), PARAM_INVALID, | |||||
"Invalid model. length < sizeof(ModelFileHeader)."); | |||||
GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||||
"Invalid model. Model data size %u must be greater than or equal to %zu.", model.model_len, | |||||
sizeof(ModelFileHeader)); | |||||
// Get file header | // Get file header | ||||
auto file_header = reinterpret_cast<ModelFileHeader *>(model.model_data); | auto file_header = reinterpret_cast<ModelFileHeader *>(model.model_data); | ||||
// Determine whether the file length and magic number match | // Determine whether the file length and magic number match | ||||
GE_CHK_BOOL_RET_STATUS( | GE_CHK_BOOL_RET_STATUS( | ||||
file_header->length == model.model_len - sizeof(ModelFileHeader) && file_header->magic == MODEL_FILE_MAGIC_NUM, | file_header->length == model.model_len - sizeof(ModelFileHeader) && file_header->magic == MODEL_FILE_MAGIC_NUM, | ||||
PARAM_INVALID, | |||||
"Invalid model. file_header->length + sizeof(ModelFileHeader) != model->model_len || MODEL_FILE_MAGIC_NUM != " | |||||
"file_header->magic"); | |||||
GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||||
"Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " | |||||
"MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", | |||||
file_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, file_header->magic); | |||||
Status res = SUCCESS; | Status res = SUCCESS; | ||||
// Get data address | // Get data address | ||||
uint8_t *data = reinterpret_cast<uint8_t *>(model.model_data) + sizeof(ModelFileHeader); | uint8_t *data = reinterpret_cast<uint8_t *>(model.model_data) + sizeof(ModelFileHeader); | ||||
if (file_header->is_encrypt == ModelEncryptType::UNENCRYPTED) { // Unencrypted model | if (file_header->is_encrypt == ModelEncryptType::UNENCRYPTED) { // Unencrypted model | ||||
GE_CHK_BOOL_RET_STATUS(model.key.empty(), PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(model.key.empty(), GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, | |||||
"Invalid param. model is unencrypted, but key is not empty."); | "Invalid param. model is unencrypted, but key is not empty."); | ||||
model_data = data; | model_data = data; | ||||
model_len = file_header->length; | model_len = file_header->length; | ||||
GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); | GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); | ||||
} else { | } else { | ||||
GELOGE(PARAM_INVALID, "Invalid model. ModelEncryptType not supported."); | |||||
res = PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported."); | |||||
res = GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION; | |||||
} | } | ||||
return res; | return res; | ||||
@@ -0,0 +1,483 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "graph_parser_util.h" | |||||
#include <memory> | |||||
#include "common/auth/file_saver.h" | |||||
#include "common/convert/pb2json.h" | |||||
#include "common/debug/log.h" | |||||
#include "common/debug/memory_dumper.h" | |||||
#include "common/model_parser/base.h" | |||||
#include "common/model_saver.h" | |||||
#include "common/properties_manager.h" | |||||
#include "common/string_util.h" | |||||
#include "common/types.h" | |||||
#include "common/util.h" | |||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "framework/omg/parser/parser_inner_ctx.h" | |||||
#include "graph/compute_graph.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/optimize/common/params.h" | |||||
#include "graph/utils/type_utils.h" | |||||
#include "omg/omg_inner_types.h" | |||||
#include "omg/parser/model_parser.h" | |||||
#include "omg/parser/parser_factory.h" | |||||
#include "omg/parser/weights_parser.h" | |||||
#include "parser/common/pre_checker.h" | |||||
#include "proto/ge_ir.pb.h" | |||||
#include "register/op_registry.h" | |||||
#include "external/register/register_types.h" | |||||
namespace ge { | |||||
namespace { | |||||
// The function is incomplete. Currently, only l2_optimize, off_optimize is supported. | |||||
const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; | |||||
const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; | |||||
const char *const kSplitError1 = "size not equal to 2 split by \":\""; | |||||
const char *const kEmptyError = "can not be empty"; | |||||
const char *const kFloatNumError = "exist float number"; | |||||
const char *const kDigitError = "is not digit"; | |||||
vector<string> SplitInputShape(const std::string &input_shape) { | |||||
vector<string> shape_pair_vec; | |||||
size_t pos = input_shape.rfind(":"); | |||||
if (pos != std::string::npos) { | |||||
shape_pair_vec.emplace_back(input_shape.substr(0, pos)); | |||||
shape_pair_vec.emplace_back(input_shape.substr(pos + 1, input_shape.size() - pos)); | |||||
} | |||||
return shape_pair_vec; | |||||
} | |||||
static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | |||||
{"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; | |||||
static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) { | |||||
if ((s == "true") || (s == "false")) { | |||||
return true; | |||||
} else { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, {atc_param, s}); | |||||
GELOGE(PARAM_INVALID, "Input parameter[--%s]'s value[%s] must be true or false.", atc_param.c_str(), s.c_str()); | |||||
return false; | |||||
} | |||||
} | |||||
bool CheckDigitStr(std::string &str) { | |||||
for (char c : str) { | |||||
if (!isdigit(c)) { | |||||
GELOGE(domi::FAILED, "value[%s] is not positive integer", str.c_str()); | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
Status StringToInt(std::string &str, int32_t &value) { | |||||
try { | |||||
if (!CheckDigitStr(str)) { | |||||
GELOGE(PARAM_INVALID, "Invalid of digit string: %s ", str.c_str()); | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); | |||||
return PARAM_INVALID; | |||||
} | |||||
value = stoi(str); | |||||
} catch (std::invalid_argument &) { | |||||
GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch invalid_argument.", str.c_str()); | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); | |||||
return PARAM_INVALID; | |||||
} catch (std::out_of_range &) { | |||||
GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch out_of_range.", str.c_str()); | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"output_type", str}); | |||||
return PARAM_INVALID; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VerifyOutputTypeAndOutNodes(std::vector<std::string> &out_type_vec) { | |||||
std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes; | |||||
std::set<std::string> out_nodes_info; | |||||
for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { | |||||
// out_nodes set should include output_type and output_format | |||||
std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second); | |||||
out_nodes_info.emplace(tmp); | |||||
} | |||||
for (uint32_t i = 0; i < out_type_vec.size(); ++i) { | |||||
if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10059", {"value"}, {out_type_vec[i]}); | |||||
GELOGE(domi::FAILED, "Can not find this node (%s) in out_nodes.", out_type_vec[i].c_str()); | |||||
return domi::FAILED; | |||||
} | |||||
} | |||||
return domi::SUCCESS; | |||||
} | |||||
Status ParseOutputType(const std::string &output_type, std::map<std::string, vector<uint32_t>> &out_type_index_map, | |||||
std::map<std::string, vector<ge::DataType>> &out_type_dt_map) { | |||||
if (output_type.find(':') == std::string::npos) { | |||||
GELOGI("output_type is not multiple nodes, means all out nodes"); | |||||
auto it = output_type_str_to_datatype.find(output_type); | |||||
if (it == output_type_str_to_datatype.end()) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {output_type}); | |||||
GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], only support DT_FLOAT, DT_FLOAT16, DT_UINT8!!", | |||||
output_type.c_str()); | |||||
return domi::FAILED; | |||||
} | |||||
return domi::SUCCESS; | |||||
} | |||||
std::vector<std::string> out_type_vec; | |||||
vector<string> nodes_v = StringUtils::Split(output_type, ';'); | |||||
for (const string &node : nodes_v) { | |||||
vector<string> node_index_type_v = StringUtils::Split(node, ':'); | |||||
if (node_index_type_v.size() != 3) { // The size must be 3. | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10058", {"value"}, {node}); | |||||
GELOGE(PARAM_INVALID, | |||||
"The param of output_type is invalid, the correct format is [opname:index:dtype]," | |||||
"while the actual input is %s.", | |||||
node.c_str()); | |||||
return domi::FAILED; | |||||
} | |||||
ge::DataType tmp_dt; | |||||
std::string node_name = StringUtils::Trim(node_index_type_v[0]); | |||||
std::string index_str = StringUtils::Trim(node_index_type_v[1]); | |||||
int32_t index; | |||||
if (StringToInt(index_str, index) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s.", index_str.c_str()); | |||||
return domi::FAILED; | |||||
} | |||||
std::string dt_value = StringUtils::Trim(node_index_type_v[2]); | |||||
auto it = output_type_str_to_datatype.find(dt_value); | |||||
if (it == output_type_str_to_datatype.end()) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {dt_value}); | |||||
GELOGE(ge::PARAM_INVALID, "output_type [%s] is invalid.", dt_value.c_str()); | |||||
return domi::FAILED; | |||||
} else { | |||||
tmp_dt = it->second; | |||||
} | |||||
out_type_vec.push_back(node_name + ":" + index_str); | |||||
auto it_index = out_type_index_map.find(node_name); | |||||
if (it_index == out_type_index_map.end()) { | |||||
vector<uint32_t> tmp_vec; | |||||
tmp_vec.push_back(index); | |||||
out_type_index_map.emplace(node_name, tmp_vec); | |||||
} else { | |||||
it_index->second.push_back(index); | |||||
} | |||||
auto it_dt = out_type_dt_map.find(node_name); | |||||
if (it_dt == out_type_dt_map.end()) { | |||||
vector<ge::DataType> tmp_vec; | |||||
tmp_vec.push_back(tmp_dt); | |||||
out_type_dt_map.emplace(node_name, tmp_vec); | |||||
} else { | |||||
it_dt->second.push_back(tmp_dt); | |||||
} | |||||
} | |||||
return VerifyOutputTypeAndOutNodes(out_type_vec); | |||||
} | |||||
Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { | |||||
if (op_desc->GetType() == DATA) { | |||||
GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", op_desc->GetName().c_str()); | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10068", {"parameter", "value", "reason"}, | |||||
{"out_nodes", op_desc->GetName(), "it can not be set input data"}); | |||||
return domi::FAILED; | |||||
} | |||||
int32_t out_size = op_desc->GetOutputsSize(); | |||||
if (index < 0 || index >= out_size) { | |||||
GELOGE(domi::FAILED, | |||||
"out_node [%s] output index:%d must be smaller " | |||||
"than node output size:%d and can not be negative!", | |||||
op_desc->GetName().c_str(), index, out_size); | |||||
std::string fail_reason = "output index:" + to_string(index) + | |||||
" must be smaller than output size:" + to_string(out_size) + " and can not be negative!"; | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10060", {"parameter", "value", "reason"}, | |||||
{"out_nodes", op_desc->GetName(), fail_reason}); | |||||
return domi::FAILED; | |||||
} | |||||
return domi::SUCCESS; | |||||
} | |||||
Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||||
std::vector<std::string> &output_nodes_name) { | |||||
ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); | |||||
if (tmpDescPtr == nullptr) { | |||||
GELOGE(domi::FAILED, "Get outnode op desc fail."); | |||||
return domi::FAILED; | |||||
} | |||||
size_t size = tmpDescPtr->GetOutputsSize(); | |||||
if (node->GetType() != NETOUTPUT) { | |||||
for (size_t index = 0; index < size; ++index) { | |||||
output_nodes_info.push_back(std::make_pair(node, index)); | |||||
output_nodes_name.push_back(node->GetName()); | |||||
} | |||||
} else { | |||||
const auto in_anchors = node->GetAllInDataAnchors(); | |||||
for (auto in_anchor : in_anchors) { | |||||
auto out_anchor = in_anchor->GetPeerOutAnchor(); | |||||
if (out_anchor == nullptr) { | |||||
GELOGE(domi::FAILED, "Get leaf node op desc fail."); | |||||
return domi::FAILED; | |||||
} | |||||
auto out_node = out_anchor->GetOwnerNode(); | |||||
output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); | |||||
output_nodes_name.push_back(out_node->GetName()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputFp16NodesFormat(const string &is_output_fp16) { | |||||
if (is_output_fp16.empty()) { | |||||
return SUCCESS; | |||||
} | |||||
vector<domiTensorFormat_t> &output_formats = domi::GetContext().output_formats; | |||||
output_formats.clear(); | |||||
vector<string> node_format_vec = StringUtils::Split(is_output_fp16, ','); | |||||
for (auto &is_fp16 : node_format_vec) { | |||||
StringUtils::Trim(is_fp16); | |||||
if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) { | |||||
GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]", | |||||
is_output_fp16.c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (is_fp16 == "false") { | |||||
output_formats.push_back(DOMI_TENSOR_ND); | |||||
} else if (is_fp16 == "true") { | |||||
output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, | |||||
const std::string &output_type, | |||||
const std::string &output) { | |||||
ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); | |||||
GE_CHECK_NOTNULL(compute_graph); | |||||
std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes; | |||||
std::vector<domiTensorFormat_t> output_formats = domi::GetContext().output_formats; | |||||
std::vector<std::pair<ge::NodePtr, int32_t>> output_nodes_info; | |||||
std::vector<std::string> output_nodes_name; | |||||
std::map<std::string, vector<uint32_t>> out_type_index_map; | |||||
std::map<std::string, vector<ge::DataType>> out_type_dt_map; | |||||
if (!output_type.empty()) { | |||||
if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) { | |||||
GELOGE(domi::FAILED, "Parse output_type failed."); | |||||
return domi::FAILED; | |||||
} | |||||
} | |||||
// User declared outputs | |||||
for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { | |||||
ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first); | |||||
if (out_node == nullptr) { | |||||
GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); | |||||
return domi::FAILED; | |||||
} | |||||
auto op_desc = out_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
if (CheckOutNode(op_desc, user_out_nodes[i].second) != SUCCESS) { | |||||
GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); | |||||
return domi::FAILED; | |||||
} | |||||
if (i < output_formats.size()) { | |||||
if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { | |||||
GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); | |||||
if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) { | |||||
GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str()); | |||||
} | |||||
} | |||||
} | |||||
auto it_index = out_type_index_map.find(user_out_nodes[i].first); | |||||
auto it_dt = out_type_dt_map.find(user_out_nodes[i].first); | |||||
if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) { | |||||
GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str()); | |||||
(void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second); | |||||
(void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); | |||||
} | |||||
output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); | |||||
output_nodes_name.push_back(out_node->GetName()); | |||||
} | |||||
// default output node (leaf) | |||||
if (user_out_nodes.empty()) { | |||||
for (ge::NodePtr node : compute_graph->GetDirectNode()) { | |||||
if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { | |||||
Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name); | |||||
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); | |||||
} | |||||
} | |||||
} | |||||
compute_graph->SetGraphOutNodesInfo(output_nodes_info); | |||||
domi::GetContext().net_out_nodes = output_nodes_name; | |||||
return domi::SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ParseInputShape( | |||||
const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||||
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | |||||
vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | |||||
const int DEFAULT_SHAPE_PAIR_SIZE = 2; | |||||
for (const auto &shape : shape_vec) { | |||||
vector<string> shape_pair_vec = SplitInputShape(shape); | |||||
if (shape_pair_vec.size() != DEFAULT_SHAPE_PAIR_SIZE) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
{shape, kSplitError1, kInputShapeSample1}); | |||||
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||||
shape.c_str(), kSplitError1, kInputShapeSample1); | |||||
return false; | |||||
} | |||||
if (shape_pair_vec[1].empty()) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
{shape, kEmptyError, kInputShapeSample1}); | |||||
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||||
shape.c_str(), kEmptyError, kInputShapeSample1); | |||||
return false; | |||||
} | |||||
vector<string> shape_value_strs = StringUtils::Split(shape_pair_vec[1], ','); | |||||
vector<int64_t> shape_values; | |||||
for (auto &shape_value_str : shape_value_strs) { | |||||
// stoul: The method may throw an exception: invalid_argument/out_of_range | |||||
if (std::string::npos != shape_value_str.find('.')) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
{shape, kFloatNumError, kInputShapeSample2}); | |||||
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||||
shape.c_str(), kFloatNumError, kInputShapeSample2); | |||||
return false; | |||||
} | |||||
long left_result = 0; | |||||
try { | |||||
left_result = stol(StringUtils::Trim(shape_value_str)); | |||||
if (!shape_value_str.empty() && (shape_value_str.front() == '-')) { | |||||
// The value maybe dynamic shape [-1], need substr it and verify isdigit. | |||||
shape_value_str = shape_value_str.substr(1); | |||||
} | |||||
for (char c : shape_value_str) { | |||||
if (!isdigit(c)) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||||
{shape, kDigitError, kInputShapeSample2}); | |||||
GELOGE(PARAM_INVALID, "--input_shape's shape value[%s] is not digit", shape_value_str.c_str()); | |||||
return false; | |||||
} | |||||
} | |||||
} catch (const std::out_of_range &) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, | |||||
{"input_shape", shape_value_str}); | |||||
GELOGW("Input parameter[--input_shape]’s value[%s] cause out of range execption!", shape_value_str.c_str()); | |||||
return false; | |||||
} catch (const std::invalid_argument &) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, | |||||
{"input_shape", shape_value_str}); | |||||
GELOGW("Input parameter[--input_shape]’s value[%s] cause invalid argument!", shape_value_str.c_str()); | |||||
return false; | |||||
} catch (...) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10015", {"parameter", "value"}, | |||||
{"input_shape", shape_value_str}); | |||||
GELOGW("Input parameter[--input_shape]’s value[%s] cause unkown execption!", shape_value_str.c_str()); | |||||
return false; | |||||
} | |||||
int64_t result = left_result; | |||||
// - 1 is not currently supported | |||||
if (!is_dynamic_input && result <= 0) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10011", {"shape", "result"}, {shape, std::to_string(result)}); | |||||
GELOGW( | |||||
"Input parameter[--input_shape]’s shape value[%s] is invalid, " | |||||
"expect positive integer, but value is %ld.", | |||||
shape.c_str(), result); | |||||
return false; | |||||
} | |||||
shape_values.push_back(result); | |||||
} | |||||
shape_map.emplace(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); | |||||
user_shape_map.push_back(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); | |||||
} | |||||
return true; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputNodes(const string &out_nodes) { | |||||
try { | |||||
// parse output node | |||||
if (!out_nodes.empty()) { | |||||
domi::GetContext().out_nodes_map.clear(); | |||||
domi::GetContext().user_out_nodes.clear(); | |||||
vector<string> nodes_v = StringUtils::Split(out_nodes, ';'); | |||||
for (const string &node : nodes_v) { | |||||
vector<string> key_value_v = StringUtils::Split(node, ':'); | |||||
if (key_value_v.size() != 2) { // The size must be 2. | |||||
ErrorManager::GetInstance().ATCReportErrMessage( | |||||
"E10068", {"parameter", "value", "reason"}, | |||||
{"out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); | |||||
GELOGE(PARAM_INVALID, | |||||
"The input format of --out_nodes is invalid, the correct format is " | |||||
"\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", | |||||
node.c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); | |||||
// stoi: The method may throw an exception: invalid_argument/out_of_range | |||||
if (!CheckDigitStr(key_value_v[1])) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||||
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
int32_t index = stoi(StringUtils::Trim(key_value_v[1])); | |||||
if (iter != domi::GetContext().out_nodes_map.end()) { | |||||
iter->second.emplace_back(index); | |||||
} else { | |||||
std::vector<int32_t> index_v; | |||||
index_v.emplace_back(index); | |||||
domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v); | |||||
} | |||||
domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index)); | |||||
} | |||||
} | |||||
} catch (std::invalid_argument &) { | |||||
GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||||
return PARAM_INVALID; | |||||
} catch (std::out_of_range &) { | |||||
GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||||
return PARAM_INVALID; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOpConf(const char *op_conf) { | |||||
if (op_conf != nullptr && *op_conf != '\0') { | |||||
// divided by ":" | |||||
PropertiesManager::Instance().SetPropertyDelimiter(OP_CONF_DELIMITER); | |||||
// Parsing the op_conf configuration item file | |||||
if (!PropertiesManager::Instance().Init(op_conf)) { | |||||
GELOGE(FAILED, "op_name_map init failed!"); | |||||
return FAILED; | |||||
} | |||||
// Return map and put it into ATC global variable | |||||
domi::GetContext().op_conf_map = PropertiesManager::Instance().GetPropertyMap(); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge |
@@ -0,0 +1,68 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_COMMON_GRAPH_PARSER_UTIL_H_ | |||||
#define GE_COMMON_GRAPH_PARSER_UTIL_H_ | |||||
#include <google/protobuf/message.h> | |||||
#include <string> | |||||
#include <unordered_map> | |||||
#include <vector> | |||||
#include "framework/common/types.h" | |||||
#include "framework/omg/omg_inner_types.h" | |||||
#include "framework/omg/parser/parser_inner_ctx.h" | |||||
#include "proto/ge_ir.pb.h" | |||||
#include "proto/om.pb.h" | |||||
#include "graph/compute_graph.h" | |||||
#include "graph/graph.h" | |||||
#include "graph/model.h" | |||||
#include "runtime/kernel.h" | |||||
using domi::Status; | |||||
using std::pair; | |||||
using std::string; | |||||
using std::unordered_map; | |||||
using std::vector; | |||||
namespace ge { | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief init omg context | |||||
* @return void | |||||
*/ | |||||
Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | |||||
Status ParseOutputFp16NodesFormat(const string &is_output_fp16); | |||||
Status ParseOutputNodes(const string &out_nodes); | |||||
bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||||
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input); | |||||
Status ParseOpConf(const char *op_conf); | |||||
} // namespace ge | |||||
namespace domi { | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief get omg context | |||||
* @return reference of OmgContext | |||||
*/ | |||||
ge::OmgContext &GetContext(); | |||||
} // namespace domi | |||||
#endif // GE_COMMON_GRAPH_PARSER_UTIL_H_ |
@@ -16,15 +16,12 @@ | |||||
#include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
#include <nlohmann/json.hpp> | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "framework/common/string_util.h" | #include "framework/common/string_util.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "runtime/base.h" | #include "runtime/base.h" | ||||
using Json = nlohmann::json; | |||||
namespace { | namespace { | ||||
const char *const kJobID = "jobID"; | const char *const kJobID = "jobID"; | ||||
const char *const kDeviceID = "deviceID"; | const char *const kDeviceID = "deviceID"; | ||||
@@ -35,6 +32,7 @@ const char *const kEvents = "events"; | |||||
const char *const kAiCoreEvents = "ai_core_events"; | const char *const kAiCoreEvents = "ai_core_events"; | ||||
const char *const kName = "name"; | const char *const kName = "name"; | ||||
const char *const kTraceID = "traceId"; | const char *const kTraceID = "traceId"; | ||||
const char *const kProfDir = "resultPath"; | |||||
const size_t kReportMaxLen = 2048; | const size_t kReportMaxLen = 2048; | ||||
} // namespace | } // namespace | ||||
@@ -100,6 +98,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
Json start_prof_conf = Json::parse(config); | Json start_prof_conf = Json::parse(config); | ||||
Json &prof_conf = start_prof_conf[kStartCfg][0]; | Json &prof_conf = start_prof_conf[kStartCfg][0]; | ||||
job_id_ = prof_conf[kJobID]; | job_id_ = prof_conf[kJobID]; | ||||
auto iter = prof_conf.find(kProfDir); | |||||
if (iter != prof_conf.end()) { | |||||
prof_dir_ = prof_conf[kProfDir]; | |||||
} | |||||
Json &device_id = prof_conf[kDeviceID]; | Json &device_id = prof_conf[kDeviceID]; | ||||
if (device_id.size() != 0) { | if (device_id.size() != 0) { | ||||
vector<int32_t>().swap(device_id_); | vector<int32_t>().swap(device_id_); | ||||
@@ -126,23 +128,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
} | } | ||||
} | } | ||||
GELOGI("Profiling json config from acl:%s", config.c_str()); | |||||
Json &features = prof_conf[kFeatures]; | Json &features = prof_conf[kFeatures]; | ||||
if (ParseFeaturesFromAclCfg(features) != SUCCESS) { | |||||
GELOGE(FAILED, "Parse feature from acl cfg failed."); | |||||
return FAILED; | |||||
} | |||||
is_profiling_ = true; | |||||
} catch (...) { | |||||
GELOGE(FAILED, "Json conf is not invalid !"); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
#endif | |||||
return ge::SUCCESS; | |||||
} | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( | |||||
const Json &features) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | |||||
try { | |||||
for (size_t i = 0; i < features.size(); ++i) { | for (size_t i = 0; i < features.size(); ++i) { | ||||
Json &feature = features[i]; | |||||
const Json &feature = features[i]; | |||||
if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { | if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { | ||||
continue; | continue; | ||||
} | } | ||||
const std::string &name = feature[kName]; | const std::string &name = feature[kName]; | ||||
if (name == "op_trace") { | if (name == "op_trace") { | ||||
GELOGI("Op trace config from acl"); | |||||
Json &conf = feature[kConf]; | |||||
Json &events = conf[0][kEvents]; | |||||
const Json &conf = feature[kConf]; | |||||
const Json &events = conf[0][kEvents]; | |||||
const std::string &ai_core_events = events[0][kAiCoreEvents]; | const std::string &ai_core_events = events[0][kAiCoreEvents]; | ||||
GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); | GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); | ||||
is_op_trace_ = true; | is_op_trace_ = true; | ||||
// op trace get conf | |||||
ProfMgrConf prof_mgr_conf; | ProfMgrConf prof_mgr_conf; | ||||
int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); | int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); | ||||
if (result != 0) { | if (result != 0) { | ||||
@@ -154,10 +169,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); | GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); | ||||
} else if (name == "task_trace") { | } else if (name == "task_trace") { | ||||
is_op_trace_ = false; | is_op_trace_ = false; | ||||
if (feature.find(kConf) != feature.end()) { | |||||
const Json &conf = feature[kConf]; | |||||
std::stringstream task_trace_conf; | |||||
task_trace_conf << conf; | |||||
task_trace_conf_ = task_trace_conf.str(); | |||||
} | |||||
GELOGI("Task trace config from acl"); | GELOGI("Task trace config from acl"); | ||||
} else if (name == "system_trace") { | } else if (name == "system_trace") { | ||||
is_op_trace_ = false; | is_op_trace_ = false; | ||||
Json &conf = feature[kConf]; | |||||
const Json &conf = feature[kConf]; | |||||
std::stringstream system_trace_conf; | std::stringstream system_trace_conf; | ||||
system_trace_conf << conf; | system_trace_conf << conf; | ||||
system_trace_conf_ = system_trace_conf.str(); | system_trace_conf_ = system_trace_conf.str(); | ||||
@@ -165,10 +186,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
} | } | ||||
profiling_opts_.push_back(name); | profiling_opts_.push_back(name); | ||||
} | } | ||||
is_profiling_ = true; | |||||
} catch (...) { | } catch (...) { | ||||
GELOGE(FAILED, "Json conf is not invalid !"); | |||||
GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !"); | |||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
#endif | #endif | ||||
@@ -235,6 +254,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||||
p_device[kDeviceID] = std::to_string(device_id); | p_device[kDeviceID] = std::to_string(device_id); | ||||
p_device[kJobID] = job_id_; | p_device[kJobID] = job_id_; | ||||
p_device[kTraceID] = std::to_string(GetContext().TraceId()); | p_device[kTraceID] = std::to_string(GetContext().TraceId()); | ||||
if (!prof_dir_.empty()) { | |||||
p_device[kProfDir] = prof_dir_; | |||||
GELOGI("Prof dir: %s.", prof_dir_.c_str()); | |||||
} | |||||
Json features; | Json features; | ||||
if (is_op_trace_) { | if (is_op_trace_) { | ||||
@@ -258,6 +281,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||||
Json f; | Json f; | ||||
if (profiling_opts_[i] == "system_trace") { | if (profiling_opts_[i] == "system_trace") { | ||||
f[kConf] = nlohmann::json::parse(system_trace_conf_); | f[kConf] = nlohmann::json::parse(system_trace_conf_); | ||||
} else if (profiling_opts_[i] == "task_trace") { | |||||
if (!task_trace_conf_.empty()) { | |||||
f[kConf] = nlohmann::json::parse(task_trace_conf_); | |||||
} | |||||
} | } | ||||
f[kName] = profiling_opts_[i]; | f[kName] = profiling_opts_[i]; | ||||
features[i] = f; | features[i] = f; | ||||
@@ -17,6 +17,7 @@ | |||||
#ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | #ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | ||||
#define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | ||||
#include <nlohmann/json.hpp> | |||||
#include <map> | #include <map> | ||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
@@ -30,6 +31,7 @@ | |||||
using std::map; | using std::map; | ||||
using std::string; | using std::string; | ||||
using std::vector; | using std::vector; | ||||
using Json = nlohmann::json; | |||||
namespace ge { | namespace ge { | ||||
const std::string GE_PROFILING_MODULE = "Framework"; | const std::string GE_PROFILING_MODULE = "Framework"; | ||||
@@ -84,11 +86,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
void PluginUnInit(const std::string &module) const; | void PluginUnInit(const std::string &module) const; | ||||
private: | private: | ||||
ge::Status ParseFeaturesFromAclCfg(const Json &feature); | |||||
bool is_profiling_ = false; | bool is_profiling_ = false; | ||||
bool is_op_trace_ = false; | bool is_op_trace_ = false; | ||||
bool is_load_ = false; | bool is_load_ = false; | ||||
int32_t op_trace_iter_num_ = 0; | int32_t op_trace_iter_num_ = 0; | ||||
string job_id_; | string job_id_; | ||||
string prof_dir_; | |||||
vector<int32_t> device_id_; | vector<int32_t> device_id_; | ||||
vector<string> op_trace_conf_; | vector<string> op_trace_conf_; | ||||
vector<string> profiling_opts_; | vector<string> profiling_opts_; | ||||
@@ -96,6 +100,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
string recv_profiling_config_; | string recv_profiling_config_; | ||||
string send_profiling_config_; | string send_profiling_config_; | ||||
string system_trace_conf_; | string system_trace_conf_; | ||||
string task_trace_conf_; | |||||
const ProfilingEngineImpl engine_; | const ProfilingEngineImpl engine_; | ||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -292,6 +292,7 @@ REGISTER_OPTYPE_DEFINE(BASICLSTMCELL, "BasicLSTMCell"); | |||||
REGISTER_OPTYPE_DEFINE(GETNEXT, "GetNext"); | REGISTER_OPTYPE_DEFINE(GETNEXT, "GetNext"); | ||||
REGISTER_OPTYPE_DEFINE(INITDATA, "InitData"); | REGISTER_OPTYPE_DEFINE(INITDATA, "InitData"); | ||||
REGISTER_OPTYPE_DEFINE(REFIDENTITY, "RefIdentity"); | REGISTER_OPTYPE_DEFINE(REFIDENTITY, "RefIdentity"); | ||||
REGISTER_OPTYPE_DEFINE(BITCAST, "Bitcast"); | |||||
/***************Ann special operator*************************/ | /***************Ann special operator*************************/ | ||||
REGISTER_OPTYPE_DEFINE(ANN_MEAN, "AnnMean"); | REGISTER_OPTYPE_DEFINE(ANN_MEAN, "AnnMean"); | ||||
@@ -382,6 +383,8 @@ REGISTER_OPTYPE_DEFINE(HCOMALLREDUCE, "HcomAllReduce"); | |||||
REGISTER_OPTYPE_DEFINE(HCOMREDUCESCATTER, "HcomReduceScatter"); | REGISTER_OPTYPE_DEFINE(HCOMREDUCESCATTER, "HcomReduceScatter"); | ||||
REGISTER_OPTYPE_DEFINE(HCOMSEND, "HcomSend"); | REGISTER_OPTYPE_DEFINE(HCOMSEND, "HcomSend"); | ||||
REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); | REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); | ||||
REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); | |||||
REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | |||||
REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); | REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); | ||||
REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | ||||
@@ -363,7 +363,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||||
std::map<std::string, std::string> args_map; | std::map<std::string, std::string> args_map; | ||||
if (file_path.empty()) { | if (file_path.empty()) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param}); | ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param}); | ||||
GELOGW("Input parameter's value is empty."); | |||||
GELOGW("Input parameter %s is empty.", file_path.c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
std::string real_path = RealPath(file_path.c_str()); | std::string real_path = RealPath(file_path.c_str()); | ||||
@@ -181,13 +181,12 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||||
GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); | GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); | ||||
return ""; | return ""; | ||||
} | } | ||||
string ge_core_type; | |||||
std::string ge_core_type; | |||||
Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); | Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); | ||||
if (ret != SUCCESS) { | |||||
GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"); | |||||
} | |||||
string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine; | |||||
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); | |||||
std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine; | |||||
GELOGD("engine type will exclude: %s", exclude_core_Type.c_str()); | GELOGD("engine type will exclude: %s", exclude_core_Type.c_str()); | ||||
std::map<std::string, std::string> unsupported_reasons; | std::map<std::string, std::string> unsupported_reasons; | ||||
for (const auto &it : op_infos) { | for (const auto &it : op_infos) { | ||||
if (it.engine == exclude_core_Type) { | if (it.engine == exclude_core_Type) { | ||||
@@ -204,7 +203,7 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||||
checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; | checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; | ||||
op_desc->SetOpEngineName(it.engine); | op_desc->SetOpEngineName(it.engine); | ||||
op_desc->SetOpKernelLibName(kernel_name); | op_desc->SetOpKernelLibName(kernel_name); | ||||
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(), | |||||
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(), | |||||
it.engine.c_str(), op_desc->GetName().c_str()); | it.engine.c_str(), op_desc->GetName().c_str()); | ||||
return it.engine; | return it.engine; | ||||
} else { | } else { | ||||
@@ -222,6 +221,9 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||||
unsupported_reasons.emplace(kernel_name, unsupported_reason); | unsupported_reasons.emplace(kernel_name, unsupported_reason); | ||||
GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s", | GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s", | ||||
kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); | kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); | ||||
if (!op_desc->HasAttr("_is_ge_op")) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()}); | |||||
} | |||||
} | } | ||||
} else { | } else { | ||||
GELOGW( | GELOGW( | ||||
@@ -371,7 +373,7 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std: | |||||
} | } | ||||
Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) { | Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) { | ||||
GELOGI("Begin to read json file"); | |||||
GELOGD("Begin to read json file"); | |||||
if (file_path.empty()) { | if (file_path.empty()) { | ||||
GELOGE(FAILED, "Json path %s is not valid", file_path.c_str()); | GELOGE(FAILED, "Json path %s is not valid", file_path.c_str()); | ||||
return FAILED; | return FAILED; | ||||
@@ -406,12 +408,12 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
ifs.close(); | ifs.close(); | ||||
GELOGI("Read json file success"); | |||||
GELOGD("Read json file success"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DNNEngineManager::CheckJsonFile() { | Status DNNEngineManager::CheckJsonFile() { | ||||
GELOGI("Begin to check json file"); | |||||
GELOGD("Begin to check json file"); | |||||
for (auto &it : engines_map_) { | for (auto &it : engines_map_) { | ||||
std::string engine_name = it.first; | std::string engine_name = it.first; | ||||
int count = 0; | int count = 0; | ||||
@@ -431,7 +433,7 @@ Status DNNEngineManager::CheckJsonFile() { | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
GELOGI("Check json file success"); | |||||
GELOGD("Check json file success"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -60,6 +60,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||||
"../graph/load/new_model_manager/task_info/task_info.cc" | "../graph/load/new_model_manager/task_info/task_info.cc" | ||||
"../graph/load/new_model_manager/tbe_handle_store.cc" | "../graph/load/new_model_manager/tbe_handle_store.cc" | ||||
"../graph/load/new_model_manager/zero_copy_task.cc" | "../graph/load/new_model_manager/zero_copy_task.cc" | ||||
"../graph/load/new_model_manager/zero_copy_offset.cc" | |||||
"../graph/manager/graph_caching_allocator.cc" | "../graph/manager/graph_caching_allocator.cc" | ||||
"../graph/manager/graph_manager_utils.cc" | "../graph/manager/graph_manager_utils.cc" | ||||
"../graph/manager/graph_mem_allocator.cc" | "../graph/manager/graph_mem_allocator.cc" | ||||
@@ -36,6 +36,9 @@ | |||||
#include "mmpa/mmpa_api.h" | #include "mmpa/mmpa_api.h" | ||||
#include "single_op/single_op_manager.h" | #include "single_op/single_op_manager.h" | ||||
using std::string; | |||||
using std::vector; | |||||
namespace { | namespace { | ||||
const size_t kDynamicBatchSizeVecSize = 1; | const size_t kDynamicBatchSizeVecSize = 1; | ||||
const size_t kStaticBatchInfoSize = 1; | const size_t kStaticBatchInfoSize = 1; | ||||
@@ -102,20 +105,36 @@ void SetDynamicInputDataFlag(const ge::RunModelData &input_data, const std::vect | |||||
ge::InputData &inputs) { | ge::InputData &inputs) { | ||||
inputs.is_dynamic_batch = true; | inputs.is_dynamic_batch = true; | ||||
std::string batch_label; | std::string batch_label; | ||||
size_t match_idx = 0; | |||||
for (size_t i = 0; i < batch_info.size(); ++i) { | for (size_t i = 0; i < batch_info.size(); ++i) { | ||||
if (batch_info[i].size() == kDynamicBatchSizeVecSize && | |||||
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_batch_size)) { | |||||
batch_label = kBatchLabel + std::to_string(i); | |||||
inputs.batch_label = batch_label; | |||||
// dynamic_dims | |||||
if (input_data.dynamic_dims.size() != 0) { | |||||
bool is_match = true; | |||||
for (size_t j = 0; j < static_cast<size_t>(input_data.dynamic_dims.size()); ++j) { | |||||
if (static_cast<uint64_t>(batch_info[i][j]) != input_data.dynamic_dims[j]) { | |||||
is_match = false; | |||||
break; | |||||
} | |||||
} | |||||
if (is_match) { | |||||
match_idx = i; | |||||
break; | |||||
} | |||||
// dynamic_batch_size | |||||
} else if (batch_info[i].size() == kDynamicBatchSizeVecSize && | |||||
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_batch_size)) { | |||||
match_idx = i; | |||||
break; | break; | ||||
// dynamic_image_size | |||||
} else if (batch_info[i].size() == kDynamicImageSizeVecSize && | } else if (batch_info[i].size() == kDynamicImageSizeVecSize && | ||||
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_image_height) && | batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_image_height) && | ||||
batch_info[i][1] == static_cast<int64_t>(input_data.dynamic_image_width)) { | batch_info[i][1] == static_cast<int64_t>(input_data.dynamic_image_width)) { | ||||
batch_label = kBatchLabel + std::to_string(i); | |||||
inputs.batch_label = batch_label; | |||||
match_idx = i; | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
batch_label = kBatchLabel + std::to_string(match_idx); | |||||
inputs.batch_label = batch_label; | |||||
GELOGI("current batch label:%s", batch_label.c_str()); | GELOGI("current batch label:%s", batch_label.c_str()); | ||||
} | } | ||||
@@ -225,39 +244,41 @@ Status GeExecutor::Finalize() { | |||||
Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | ||||
uint64_t batch_size) { | uint64_t batch_size) { | ||||
if (dynamic_input_addr == nullptr) { | if (dynamic_input_addr == nullptr) { | ||||
GELOGE(FAILED, "Dynamic input addr is nullptr!"); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!"); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
uint64_t size = sizeof(uint64_t); | uint64_t size = sizeof(uint64_t); | ||||
if (length < size) { | if (length < size) { | ||||
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, size); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
// Verify whether the input dynamic batch matches the model gear | // Verify whether the input dynamic batch matches the model gear | ||||
std::vector<std::vector<int64_t>> batch_info; | std::vector<std::vector<int64_t>> batch_info; | ||||
std::vector<uint64_t> batch_num{batch_size}; | std::vector<uint64_t> batch_num{batch_size}; | ||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Get dynamic input info failed."); | |||||
return FAILED; | |||||
GELOGE(ret, "Get dynamic input info failed."); | |||||
return ret; | |||||
} | } | ||||
if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) { | if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) { | ||||
GELOGE(FAILED, "The current dynamic input does not match the gear of the model."); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num); | |||||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_BATCH)); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Set dynamic size failed"); | |||||
return FAILED; | |||||
GELOGE(ret, "Set dynamic size failed"); | |||||
return ret; | |||||
} | } | ||||
// memcpy dynamic_batch_size from host to device | // memcpy dynamic_batch_size from host to device | ||||
if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "memcpy dynamic batch input data failed!"); | |||||
return FAILED; | |||||
rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "memcpy dynamic batch input data failed! ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -265,40 +286,42 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||||
Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | ||||
uint64_t image_height, uint64_t image_width) { | uint64_t image_height, uint64_t image_width) { | ||||
if (dynamic_input_addr == nullptr) { | if (dynamic_input_addr == nullptr) { | ||||
GELOGE(FAILED, "Dynamic input addr is nullptr!"); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!"); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint64_t); | uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint64_t); | ||||
if (length < dynamic_input_size) { | if (length < dynamic_input_size) { | ||||
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
// Verify whether the input dynamic resolution matches the model gear | // Verify whether the input dynamic resolution matches the model gear | ||||
std::vector<std::vector<int64_t>> batch_info; | std::vector<std::vector<int64_t>> batch_info; | ||||
std::vector<uint64_t> batch_num{image_height, image_width}; | std::vector<uint64_t> batch_num{image_height, image_width}; | ||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Get dynamic input info failed."); | |||||
return FAILED; | |||||
GELOGE(ret, "Get dynamic input info failed."); | |||||
return ret; | |||||
} | } | ||||
if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) { | if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) { | ||||
GELOGE(FAILED, "The current dynamic input does not match the gear of the model."); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num); | |||||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_IMAGE)); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Set dynamic size failed"); | |||||
return FAILED; | |||||
GELOGE(ret, "Set dynamic size failed"); | |||||
return ret; | |||||
} | } | ||||
// Memcpy dynamic resolution height from host to device | // Memcpy dynamic resolution height from host to device | ||||
if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != | |||||
RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); | |||||
return FAILED; | |||||
rtError_t rt_ret = | |||||
rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
uint64_t remain_size = length - sizeof(uint64_t); | uint64_t remain_size = length - sizeof(uint64_t); | ||||
@@ -311,16 +334,109 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||||
Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||||
const vector<uint64_t> &dynamic_dims) { | |||||
if (dynamic_input_addr == nullptr) { | |||||
GELOGE(FAILED, "Dynamic input addr is nullptr!"); | |||||
return FAILED; | |||||
} | |||||
Status ret = GraphExecutor::SetDynamicSize(model_id, dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS)); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "Set dynamic size failed"); | |||||
return FAILED; | |||||
} | |||||
vector<uint64_t> cur_dynamic_dims; | |||||
if (GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims) != SUCCESS) { | |||||
GELOGE(FAILED, "GetCurDynamicDims failed."); | |||||
return FAILED; | |||||
} | |||||
size_t dynamic_dim_num = cur_dynamic_dims.size(); | |||||
uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint64_t)); | |||||
if (length < dynamic_input_size) { | |||||
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||||
return FAILED; | |||||
} | |||||
for (uint32_t i = 0; i < dynamic_dim_num; ++i) { | |||||
// Memcpy dynamic dim[i] from host to device | |||||
if (rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + sizeof(uint64_t) * i), | |||||
length - sizeof(uint64_t) * i, &cur_dynamic_dims[i], sizeof(uint64_t), | |||||
RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> &combined_dims, | |||||
vector<uint64_t> &cur_dynamic_dims) { | |||||
vector<vector<int64_t>> combined_batch; | |||||
if (GraphExecutor::GetCombinedDynamicDims(model_id, combined_batch) != SUCCESS) { | |||||
GELOGE(FAILED, "Get combined dynamic dims info failed."); | |||||
return FAILED; | |||||
} | |||||
if (combined_batch.empty()) { | |||||
GELOGE(FAILED, "Combined dynamic dims is empty."); | |||||
return FAILED; | |||||
} | |||||
if (combined_dims.size() != combined_batch[0].size()) { | |||||
GELOGE(FAILED, "Input dynamic dims's dimension size[%zu] is different from model[%zu].", combined_dims.size(), | |||||
combined_batch[0].size()); | |||||
return FAILED; | |||||
} | |||||
bool matched = false; | |||||
size_t idx = 0; | |||||
for (size_t i = 0; i < combined_batch.size(); i++) { | |||||
bool is_match = true; | |||||
for (size_t j = 0; j < combined_dims.size(); j++) { | |||||
if (combined_dims[j] != static_cast<uint64_t>(combined_batch[i][j])) { | |||||
is_match = false; | |||||
break; | |||||
} | |||||
} | |||||
if (is_match) { | |||||
idx = i; | |||||
matched = true; | |||||
break; | |||||
} | |||||
} | |||||
if (!matched) { | |||||
GELOGE(FAILED, "Input dynamic dims can not match model."); | |||||
return FAILED; | |||||
} | |||||
// batch_info save the dynamic info of combined_dims | |||||
vector<vector<int64_t>> batch_info; | |||||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||||
if (GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type) != SUCCESS) { | |||||
GELOGE(FAILED, "Get dynamic input info failed."); | |||||
return FAILED; | |||||
} | |||||
cur_dynamic_dims.clear(); | |||||
for (size_t i = 0; i < batch_info[idx].size(); i++) { | |||||
cur_dynamic_dims.emplace_back(static_cast<uint64_t>(batch_info[idx][i])); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||||
GELOGI("Begin to get current shape"); | GELOGI("Begin to get current shape"); | ||||
if (!isInit_) { | if (!isInit_) { | ||||
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
} | } | ||||
Status ret = GraphExecutor::GetCurShape(model_id, batch_info); | |||||
Status ret = GraphExecutor::GetCurShape(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Get current shape failed"); | |||||
return FAILED; | |||||
GELOGE(ret, "Get current shape failed"); | |||||
return ret; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -330,12 +446,12 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||||
const kAippDynamicPara &aippParms) { | const kAippDynamicPara &aippParms) { | ||||
GELOGI("Enter to SetDynamicAippData."); | GELOGI("Enter to SetDynamicAippData."); | ||||
if (dynamic_input_addr == nullptr) { | if (dynamic_input_addr == nullptr) { | ||||
GELOGE(FAILED, "Dynamic aipp input addr is nullptr!"); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "Dynamic aipp input addr is nullptr!"); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
if (aippBatchPara.empty()) { | if (aippBatchPara.empty()) { | ||||
GELOGE(FAILED, "aippBatchPara is empty."); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "aippBatchPara is empty."); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
uint64_t batch_num = aippBatchPara.size(); | uint64_t batch_num = aippBatchPara.size(); | ||||
uint64_t real_aippParms_size = sizeof(kAippDynamicPara) - sizeof(kAippDynamicBatchPara); | uint64_t real_aippParms_size = sizeof(kAippDynamicPara) - sizeof(kAippDynamicBatchPara); | ||||
@@ -345,24 +461,25 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||||
"batch num is %lu, struct_len is %lu", | "batch num is %lu, struct_len is %lu", | ||||
model_id, length, batch_num, struct_len); | model_id, length, batch_num, struct_len); | ||||
if (struct_len > length) { | if (struct_len > length) { | ||||
GELOGE(FAILED, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
// Memcpy real kAippDynamicBatchPara from host to device | // Memcpy real kAippDynamicBatchPara from host to device | ||||
if (rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE) != | |||||
RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "memcpy real_aippParms_size failed!"); | |||||
return FAILED; | |||||
rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "memcpy real_aippParms_size failed! ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
uint64_t remain_len = length - real_aippParms_size; | uint64_t remain_len = length - real_aippParms_size; | ||||
uint8_t *aipp_batch_para_dev = reinterpret_cast<uint8_t *>(dynamic_input_addr) + real_aippParms_size; | uint8_t *aipp_batch_para_dev = reinterpret_cast<uint8_t *>(dynamic_input_addr) + real_aippParms_size; | ||||
for (uint64_t i = 0; i < batch_num; ++i) { | for (uint64_t i = 0; i < batch_num; ++i) { | ||||
if (rtMemcpy(reinterpret_cast<void *>(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)), | |||||
(remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), sizeof(kAippDynamicBatchPara), | |||||
RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | |||||
GELOGE(FAILED, "memcpy kAippDynamicBatchPara input data failed!"); | |||||
return FAILED; | |||||
rt_ret = rtMemcpy(reinterpret_cast<void *>(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)), | |||||
(remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), | |||||
sizeof(kAippDynamicBatchPara), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "memcpy kAippDynamicBatchPara input data failed! ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -429,7 +546,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||||
} | } | ||||
Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed."); | |||||
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
return GraphLoader::UnloadModel(model_id); | return GraphLoader::UnloadModel(model_id); | ||||
@@ -468,17 +585,19 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
output_formats, new_model_desc); | output_formats, new_model_desc); | ||||
if (ret != domi::SUCCESS) { | if (ret != domi::SUCCESS) { | ||||
GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | ||||
return TransferDomiErrorCode(ret); | |||||
return ret; | |||||
} | } | ||||
if (input_formats.size() != input_desc_infos.size()) { | if (input_formats.size() != input_desc_infos.size()) { | ||||
GELOGE(ge::FAILED, "input_formats.size() != input_desc_infos.size()."); | |||||
return ge::FAILED; | |||||
GELOGE(ge::PARAM_INVALID, "input_formats size %zu is not equal to input_desc_infos size %zu.", input_formats.size(), | |||||
input_desc_infos.size()); | |||||
return ge::PARAM_INVALID; | |||||
} | } | ||||
if (output_formats.size() != output_desc_infos.size()) { | if (output_formats.size() != output_desc_infos.size()) { | ||||
GELOGE(ge::FAILED, "output_formats.size() != output_desc_infos.size()."); | |||||
return ge::FAILED; | |||||
GELOGE(ge::PARAM_INVALID, "output_formats size %zu is not equal to output_desc_infos size %zu.", | |||||
output_formats.size(), output_desc_infos.size()); | |||||
return ge::PARAM_INVALID; | |||||
} | } | ||||
// Transfer data to TensorDesc | // Transfer data to TensorDesc | ||||
@@ -494,16 +613,18 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||||
/// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
/// @param [in] model_id | /// @param [in] model_id | ||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @param [out] dynamic_type | |||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||||
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||||
int32_t &dynamic_type) { | |||||
GELOGI("Begin to get dynamic batch info."); | GELOGI("Begin to get dynamic batch info."); | ||||
if (!isInit_) { | if (!isInit_) { | ||||
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | ||||
return GE_EXEC_NOT_INIT; | return GE_EXEC_NOT_INIT; | ||||
} | } | ||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "GetDynamicBatchInfo failed."); | GELOGE(ret, "GetDynamicBatchInfo failed."); | ||||
return ret; | return ret; | ||||
@@ -515,6 +636,30 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Get combined dynamic dims info | |||||
/// @param [in] model_id | |||||
/// @param [out] batch_info | |||||
/// @return execute result | |||||
/// | |||||
Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64_t>> &batch_info) { | |||||
GELOGI("Begin to get combined dynamic dims info."); | |||||
if (!isInit_) { | |||||
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
return GE_EXEC_NOT_INIT; | |||||
} | |||||
Status ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "GetCombinedDynamicDims failed."); | |||||
return ret; | |||||
} | |||||
GELOGI("Get combined dynamic dims succ."); | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get AIPP input format | /// @brief Get AIPP input format | ||||
/// @param [in] model_id | /// @param [in] model_id | ||||
/// @param [in] index | /// @param [in] index | ||||
@@ -628,8 +773,8 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||||
string filePath = RealPath(path.c_str()); | string filePath = RealPath(path.c_str()); | ||||
if (filePath.empty()) { | if (filePath.empty()) { | ||||
GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str()); | |||||
return ge::FAILED; | |||||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "File path is invalid. please check your text file '%s'.", path.c_str()); | |||||
return GE_EXEC_MODEL_PATH_INVALID; | |||||
} | } | ||||
GELOGI("load modelData from file: %s.", path.c_str()); | GELOGI("load modelData from file: %s.", path.c_str()); | ||||
std::string key_path; | std::string key_path; | ||||
@@ -710,12 +855,20 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||||
GetDomiOutputData(run_output_data, output_data); | GetDomiOutputData(run_output_data, output_data); | ||||
if ((run_input_data.dynamic_batch_size != 0) || (run_input_data.dynamic_image_width != 0) || | if ((run_input_data.dynamic_batch_size != 0) || (run_input_data.dynamic_image_width != 0) || | ||||
(run_input_data.dynamic_image_height != 0)) { | |||||
(run_input_data.dynamic_image_height != 0) || (run_input_data.dynamic_dims.size() != 0)) { | |||||
std::vector<std::vector<int64_t>> batch_info; | std::vector<std::vector<int64_t>> batch_info; | ||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Get dynamic input info failed."); | |||||
return FAILED; | |||||
GELOGE(ret, "Get dynamic input info failed."); | |||||
return ret; | |||||
} | |||||
if (dynamic_type == static_cast<int32_t>(DYNAMIC_DIMS)) { | |||||
ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "Get dynamic input info failed."); | |||||
return FAILED; | |||||
} | |||||
} | } | ||||
if (!batch_info.empty()) { | if (!batch_info.empty()) { | ||||
SetDynamicInputDataFlag(run_input_data, batch_info, input_data); | SetDynamicInputDataFlag(run_input_data, batch_info, input_data); | ||||
@@ -790,6 +943,11 @@ Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelDa | |||||
return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op); | return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op); | ||||
} | } | ||||
Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||||
DynamicSingleOp **single_op) { | |||||
return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op); | |||||
} | |||||
Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | ||||
std::vector<DataBuffer> &outputs) { | std::vector<DataBuffer> &outputs) { | ||||
if (executor == nullptr) { | if (executor == nullptr) { | ||||
@@ -800,13 +958,21 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer | |||||
return executor->ExecuteAsync(inputs, outputs); | return executor->ExecuteAsync(inputs, outputs); | ||||
} | } | ||||
ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTensorDesc> &input_desc, | |||||
const vector<DataBuffer> &inputs, vector<GeTensorDesc> &output_desc, | |||||
vector<DataBuffer> &outputs) { | |||||
GE_CHECK_NOTNULL(executor); | |||||
return executor->ExecuteAsync(input_desc, inputs, output_desc, outputs); | |||||
} | |||||
Status GeExecutor::ReleaseSingleOpResource(void *stream) { | Status GeExecutor::ReleaseSingleOpResource(void *stream) { | ||||
return SingleOpManager::GetInstance().ReleaseResource(stream); | return SingleOpManager::GetInstance().ReleaseResource(stream); | ||||
} | } | ||||
Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | ||||
std::vector<std::vector<int64_t>> batch_info; | std::vector<std::vector<int64_t>> batch_info; | ||||
Status ret = GetDynamicBatchInfo(model_id, batch_info); | |||||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||||
Status ret = GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Calc batch info size failed. ret = %d", ret); | GELOGE(ret, "Calc batch info size failed. ret = %d", ret); | ||||
return ret; | return ret; | ||||
@@ -26,6 +26,7 @@ local_ge_executor_src_files := \ | |||||
../graph/load/new_model_manager/data_inputer.cc \ | ../graph/load/new_model_manager/data_inputer.cc \ | ||||
../graph/load/new_model_manager/data_dumper.cc \ | ../graph/load/new_model_manager/data_dumper.cc \ | ||||
../graph/load/new_model_manager/zero_copy_task.cc \ | ../graph/load/new_model_manager/zero_copy_task.cc \ | ||||
../graph/load/new_model_manager/zero_copy_offset.cc \ | |||||
../graph/load/new_model_manager/task_info/task_info.cc \ | ../graph/load/new_model_manager/task_info/task_info.cc \ | ||||
../graph/load/new_model_manager/task_info/event_record_task_info.cc \ | ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ | ||||
../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | ||||
@@ -79,6 +80,7 @@ local_ge_executor_shared_library := \ | |||||
libslog \ | libslog \ | ||||
libmmpa \ | libmmpa \ | ||||
libgraph \ | libgraph \ | ||||
libregister \ | |||||
libmsprof \ | libmsprof \ | ||||
local_ge_executor_ldflags := -lrt -ldl \ | local_ge_executor_ldflags := -lrt -ldl \ | ||||
@@ -128,6 +130,7 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
libslog \ | libslog \ | ||||
libmmpa \ | libmmpa \ | ||||
libgraph \ | libgraph \ | ||||
libregister \ | |||||
libmsprof \ | libmsprof \ | ||||
LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | ||||
@@ -153,6 +156,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||||
LOCAL_STATIC_LIBRARIES := \ | LOCAL_STATIC_LIBRARIES := \ | ||||
libge_common \ | libge_common \ | ||||
libgraph \ | libgraph \ | ||||
libregister \ | |||||
libprotobuf \ | libprotobuf \ | ||||
LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
@@ -184,6 +188,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||||
LOCAL_STATIC_LIBRARIES := \ | LOCAL_STATIC_LIBRARIES := \ | ||||
libge_common \ | libge_common \ | ||||
libgraph \ | libgraph \ | ||||
libregister \ | |||||
libprotobuf \ | libprotobuf \ | ||||
LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
@@ -70,6 +70,7 @@ OMG_HOST_SRC_FILES := \ | |||||
graph/passes/resource_pair_remove_control_pass.cc \ | graph/passes/resource_pair_remove_control_pass.cc \ | ||||
graph/passes/pass_utils.cc \ | graph/passes/pass_utils.cc \ | ||||
graph/passes/base_pass.cc \ | graph/passes/base_pass.cc \ | ||||
graph/passes/bitcast_pass.cc \ | |||||
graph/passes/constant_folding_pass.cc \ | graph/passes/constant_folding_pass.cc \ | ||||
graph/passes/aicpu_constant_folding_pass.cc \ | graph/passes/aicpu_constant_folding_pass.cc \ | ||||
graph/passes/reshape_remove_pass.cc \ | graph/passes/reshape_remove_pass.cc \ | ||||
@@ -91,8 +92,10 @@ OMG_HOST_SRC_FILES := \ | |||||
graph/passes/print_op_pass.cc \ | graph/passes/print_op_pass.cc \ | ||||
graph/passes/no_use_reshape_remove_pass.cc \ | graph/passes/no_use_reshape_remove_pass.cc \ | ||||
graph/passes/iterator_op_pass.cc \ | graph/passes/iterator_op_pass.cc \ | ||||
graph/passes/input_output_connection_identify_pass.cc \ | |||||
graph/passes/atomic_addr_clean_pass.cc \ | graph/passes/atomic_addr_clean_pass.cc \ | ||||
graph/passes/mark_same_addr_pass.cc \ | graph/passes/mark_same_addr_pass.cc \ | ||||
graph/passes/mark_graph_unknown_status_pass.cc \ | |||||
graph/common/omg_util.cc \ | graph/common/omg_util.cc \ | ||||
graph/common/bcast.cc \ | graph/common/bcast.cc \ | ||||
graph/passes/dimension_compute_pass.cc \ | graph/passes/dimension_compute_pass.cc \ | ||||
@@ -107,6 +110,7 @@ OMG_HOST_SRC_FILES := \ | |||||
graph/passes/isolated_op_remove_pass.cc \ | graph/passes/isolated_op_remove_pass.cc \ | ||||
graph/passes/permute_pass.cc \ | graph/passes/permute_pass.cc \ | ||||
graph/passes/ctrl_edge_transfer_pass.cc \ | graph/passes/ctrl_edge_transfer_pass.cc \ | ||||
graph/passes/end_of_sequence_add_control_pass.cc \ | |||||
host_kernels/broadcast_gradient_args_kernel.cc \ | host_kernels/broadcast_gradient_args_kernel.cc \ | ||||
host_kernels/greater_kernel.cc \ | host_kernels/greater_kernel.cc \ | ||||
host_kernels/gather_v2_kernel.cc \ | host_kernels/gather_v2_kernel.cc \ | ||||
@@ -185,6 +189,8 @@ OMG_HOST_SRC_FILES := \ | |||||
graph/passes/hccl_group_pass.cc \ | graph/passes/hccl_group_pass.cc \ | ||||
graph/passes/switch_fusion_pass.cc \ | graph/passes/switch_fusion_pass.cc \ | ||||
graph/passes/switch_split_pass.cc \ | graph/passes/switch_split_pass.cc \ | ||||
graph/passes/memcpy_addr_async_pass.cc \ | |||||
graph/passes/set_input_output_offset_pass.cc \ | |||||
OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES) | OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES) | ||||
@@ -203,6 +209,7 @@ OME_HOST_SRC_FILES := \ | |||||
graph/load/new_model_manager/tbe_handle_store.cc \ | graph/load/new_model_manager/tbe_handle_store.cc \ | ||||
graph/load/new_model_manager/cpu_queue_schedule.cc \ | graph/load/new_model_manager/cpu_queue_schedule.cc \ | ||||
graph/load/new_model_manager/zero_copy_task.cc \ | graph/load/new_model_manager/zero_copy_task.cc \ | ||||
graph/load/new_model_manager/zero_copy_offset.cc \ | |||||
graph/load/new_model_manager/data_dumper.cc \ | graph/load/new_model_manager/data_dumper.cc \ | ||||
graph/load/new_model_manager/task_info/task_info.cc \ | graph/load/new_model_manager/task_info/task_info.cc \ | ||||
graph/load/new_model_manager/task_info/event_record_task_info.cc \ | graph/load/new_model_manager/task_info/event_record_task_info.cc \ | ||||
@@ -1,5 +1,5 @@ | |||||
/** | /** | ||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | * | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
@@ -13,7 +13,6 @@ | |||||
* See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
#define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
@@ -61,5 +61,6 @@ REGISTER_OP_CREATOR(SwitchN, GeDeletedOp); | |||||
REGISTER_OP_CREATOR(RefMerge, GeDeletedOp); | REGISTER_OP_CREATOR(RefMerge, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(RefSwitch, GeDeletedOp); | REGISTER_OP_CREATOR(RefSwitch, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(TransShape, GeDeletedOp); | REGISTER_OP_CREATOR(TransShape, GeDeletedOp); | ||||
REGISTER_OP_CREATOR(Bitcast, GeDeletedOp); | |||||
} // namespace ge_local | } // namespace ge_local | ||||
} // namespace ge | } // namespace ge |
@@ -78,6 +78,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
graph/load/new_model_manager/task_info/task_info.cc \ | graph/load/new_model_manager/task_info/task_info.cc \ | ||||
graph/load/new_model_manager/tbe_handle_store.cc \ | graph/load/new_model_manager/tbe_handle_store.cc \ | ||||
graph/load/new_model_manager/zero_copy_task.cc \ | graph/load/new_model_manager/zero_copy_task.cc \ | ||||
graph/load/new_model_manager/zero_copy_offset.cc \ | |||||
graph/manager/graph_context.cc \ | graph/manager/graph_context.cc \ | ||||
graph/manager/graph_manager.cc \ | graph/manager/graph_manager.cc \ | ||||
graph/manager/graph_manager_utils.cc \ | graph/manager/graph_manager_utils.cc \ | ||||
@@ -98,10 +99,13 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
graph/passes/addn_pass.cc \ | graph/passes/addn_pass.cc \ | ||||
graph/passes/aicpu_constant_folding_pass.cc \ | graph/passes/aicpu_constant_folding_pass.cc \ | ||||
graph/passes/assert_pass.cc \ | graph/passes/assert_pass.cc \ | ||||
graph/passes/input_output_connection_identify_pass.cc \ | |||||
graph/passes/atomic_addr_clean_pass.cc \ | graph/passes/atomic_addr_clean_pass.cc \ | ||||
graph/passes/mark_same_addr_pass.cc \ | graph/passes/mark_same_addr_pass.cc \ | ||||
graph/passes/mark_graph_unknown_status_pass.cc \ | |||||
graph/partition/dynamic_shape_partition.cc \ | graph/partition/dynamic_shape_partition.cc \ | ||||
graph/passes/base_pass.cc \ | graph/passes/base_pass.cc \ | ||||
graph/passes/bitcast_pass.cc \ | |||||
graph/passes/cast_remove_pass.cc \ | graph/passes/cast_remove_pass.cc \ | ||||
graph/passes/cast_translate_pass.cc \ | graph/passes/cast_translate_pass.cc \ | ||||
graph/passes/common_subexpression_elimination_pass.cc \ | graph/passes/common_subexpression_elimination_pass.cc \ | ||||
@@ -214,6 +218,9 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
graph/passes/variable_prepare_op_pass.cc \ | graph/passes/variable_prepare_op_pass.cc \ | ||||
graph/passes/variable_ref_delete_op_pass.cc \ | graph/passes/variable_ref_delete_op_pass.cc \ | ||||
graph/passes/variable_ref_useless_control_out_delete_pass.cc \ | graph/passes/variable_ref_useless_control_out_delete_pass.cc \ | ||||
graph/passes/end_of_sequence_add_control_pass.cc \ | |||||
graph/passes/memcpy_addr_async_pass.cc \ | |||||
graph/passes/set_input_output_offset_pass.cc \ | |||||
graph/preprocess/graph_preprocess.cc \ | graph/preprocess/graph_preprocess.cc \ | ||||
graph/preprocess/insert_op/ge_aipp_op.cc \ | graph/preprocess/insert_op/ge_aipp_op.cc \ | ||||
graph/preprocess/insert_op/util_insert_aipp_op.cc \ | graph/preprocess/insert_op/util_insert_aipp_op.cc \ | ||||
@@ -23,15 +23,15 @@ | |||||
#include "common/util/error_manager/error_manager.h" | #include "common/util/error_manager/error_manager.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "ge/ge_api.h" | #include "ge/ge_api.h" | ||||
#include "graph/ge_context.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/ge_context.h" | |||||
#include "graph/manager/graph_manager.h" | #include "graph/manager/graph_manager.h" | ||||
#include "graph/manager/util/rt_context_util.h" | #include "graph/manager/util/rt_context_util.h" | ||||
#include "graph/opsproto_manager.h" | #include "graph/opsproto_manager.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "model/ge_model.h" | |||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "model/ge_model.h" | |||||
using std::map; | using std::map; | ||||
using std::string; | using std::string; | ||||
@@ -46,6 +46,16 @@ const char *const kFileNameSuffix = "online"; | |||||
std::map<ge::OpEngineType, std::string> engine_type_map{ | std::map<ge::OpEngineType, std::string> engine_type_map{ | ||||
{ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; | {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; | ||||
bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | |||||
for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { | |||||
if (tensor_desc->MutableShape().IsUnknownShape()) { | |||||
GELOGI("Contains unknown shape input. set is_dynamic_input to true."); | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -55,6 +65,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||||
GELOGI("CheckEngineType: use default engine."); | GELOGI("CheckEngineType: use default engine."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
// get op engine name | // get op engine name | ||||
string op_engine_name; | string op_engine_name; | ||||
auto iter = engine_type_map.find(engine_type); | auto iter = engine_type_map.find(engine_type); | ||||
@@ -65,6 +76,12 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||||
GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type)); | GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type)); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { | |||||
op_desc->SetOpEngineName(op_engine_name); | |||||
op_desc->SetOpKernelLibName(op_engine_name); | |||||
return SUCCESS; | |||||
} | |||||
// set op engine name and opkernelLib. when engine support | // set op engine name and opkernelLib. when engine support | ||||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | ||||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | ||||
@@ -195,18 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) { | |||||
class GeGenerator::Impl { | class GeGenerator::Impl { | ||||
public: | public: | ||||
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); | |||||
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models); | |||||
Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); | Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); | ||||
Status SaveParams(GeModelPtr &ge_model, const string &type, const map<string, GeAttrValue> &attrs, | Status SaveParams(GeModelPtr &ge_model, const string &type, const map<string, GeAttrValue> &attrs, | ||||
const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | ||||
Status GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id); | |||||
Status GenerateInfershapeGraph(const Graph &graph); | |||||
GraphManager graph_manager_; | GraphManager graph_manager_; | ||||
SaveParam save_param_; | SaveParam save_param_; | ||||
bool is_offline_ = true; | bool is_offline_ = true; | ||||
bool is_singleop_unregistered_ = false; | |||||
private: | private: | ||||
static std::string Trim(const std::string &str); | static std::string Trim(const std::string &str); | ||||
@@ -280,10 +298,9 @@ Status GeGenerator::GenerateOnlineModel(const Graph &graph, const vector<GeTenso | |||||
} | } | ||||
Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) { | Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) { | ||||
GraphId graph_id; | |||||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | ||||
Status ret = impl_->GenerateInfershapeGraph(graph, graph_id); | |||||
Status ret = impl_->GenerateInfershapeGraph(graph); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Dump infershape json failed"); | GELOGE(ret, "Dump infershape json failed"); | ||||
if (impl_->graph_manager_.Finalize() != SUCCESS) { | if (impl_->graph_manager_.Finalize() != SUCCESS) { | ||||
@@ -422,11 +439,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
} else { | } else { | ||||
ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx); | ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx); | ||||
} | } | ||||
GraphId graph_id; | |||||
GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | ||||
impl_->is_offline_ = is_offline; | impl_->is_offline_ = is_offline; | ||||
Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); | |||||
Status ret = impl_->BuildModel(graph, inputs, ge_root_model); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Build model failed."); | GELOGE(ret, "Build model failed."); | ||||
if (impl_->graph_manager_.Finalize() != SUCCESS) { | if (impl_->graph_manager_.Finalize() != SUCCESS) { | ||||
@@ -478,6 +495,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
domi::GetContext().is_dynamic_input = ContainsDynamicInpus(*op_desc); | |||||
if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { | |||||
impl_->is_singleop_unregistered_ = true; | |||||
} | |||||
// 0. Save original attributes. | // 0. Save original attributes. | ||||
OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | ||||
GE_CHECK_NOTNULL(op_desc_tmp); | GE_CHECK_NOTNULL(op_desc_tmp); | ||||
@@ -494,9 +517,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
// 2. Create ComputeGraph. | // 2. Create ComputeGraph. | ||||
string name = ge::CurrentTimeInStr() + "_" + model_file_name; | string name = ge::CurrentTimeInStr() + "_" + model_file_name; | ||||
ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(name); | ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(name); | ||||
if (compute_graph == nullptr) { | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); | GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); | ||||
// 3. Add Node to ComputeGraph. | // 3. Add Node to ComputeGraph. | ||||
@@ -529,16 +549,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); | Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); | ||||
GELOGI("ATC parser success in single op build."); | GELOGI("ATC parser success in single op build."); | ||||
GraphId graph_id; | |||||
GeRootModelPtr ge_root_model = nullptr; | GeRootModelPtr ge_root_model = nullptr; | ||||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | ||||
impl_->is_offline_ = is_offline; | impl_->is_offline_ = is_offline; | ||||
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, graph_id, ge_root_model)); | |||||
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | |||||
map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | ||||
GE_CHECK_NOTNULL(ge_root_model); | GE_CHECK_NOTNULL(ge_root_model); | ||||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | ||||
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | ||||
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||||
if (name_to_ge_model.empty()) { | |||||
GELOGE(PARAM_INVALID, "GetSubgraphInstanceNameToModel is empty."); | |||||
return PARAM_INVALID; | |||||
} | |||||
GeModelPtr &ge_model = name_to_ge_model.begin()->second; | |||||
GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | ||||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | ||||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | ||||
@@ -608,7 +631,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr & | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, | |||||
Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs, | |||||
GeRootModelPtr &ge_root_model) { | GeRootModelPtr &ge_root_model) { | ||||
static GraphId id = 0; | static GraphId id = 0; | ||||
const std::map<std::string, std::string> options; | const std::map<std::string, std::string> options; | ||||
@@ -627,19 +650,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us | uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us | ||||
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); | |||||
if (is_singleop_unregistered_) { | |||||
ret = graph_manager_.BuildGraphForUnregisteredOp(id, inputs, ge_root_model, session_id); | |||||
} else { | |||||
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); | |||||
} | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); | GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); | ||||
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | ||||
} | } | ||||
graph_id = id; | |||||
id += 1; | id += 1; | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id) { | |||||
Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) { | |||||
static GraphId id = 0; | static GraphId id = 0; | ||||
const std::map<std::string, std::string> options; | const std::map<std::string, std::string> options; | ||||
Status ret = graph_manager_.AddGraph(id, graph, options); | Status ret = graph_manager_.AddGraph(id, graph, options); | ||||
@@ -654,8 +680,6 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g | |||||
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed"); | GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed"); | ||||
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | ||||
} | } | ||||
graph_id = id; | |||||
id += 1; | id += 1; | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -37,21 +37,6 @@ const int32_t kInvalidPerfLevel = -1; | |||||
namespace ge { | namespace ge { | ||||
GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} | GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} | ||||
Status GraphBuilder::MarkGraph(ComputeGraphPtr &graph) { | |||||
GE_CHECK_NOTNULL(graph); | |||||
bool is_unknown_shape = false; | |||||
for (const auto &node : graph->GetDirectNode()) { | |||||
GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), | |||||
"Get node[%s] shape status failed!", node->GetName().c_str()); | |||||
if (is_unknown_shape) { | |||||
break; | |||||
} | |||||
} | |||||
graph->SetGraphUnknownFlag(is_unknown_shape); | |||||
GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); | |||||
return SUCCESS; | |||||
} | |||||
void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | ||||
stream_max_parallel_num_ = options.stream_max_parallel_num; | stream_max_parallel_num_ = options.stream_max_parallel_num; | ||||
hcom_parallel_ = options.hcom_parallel; | hcom_parallel_ = options.hcom_parallel; | ||||
@@ -277,14 +262,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
uint64_t session_id) { | uint64_t session_id) { | ||||
GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | ||||
// mark unknown shape attr | |||||
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||||
auto status = MarkGraph(sub_graph); | |||||
if (status != SUCCESS) { | |||||
GELOGE(FAILED, "mark graph failed!"); | |||||
return status; | |||||
} | |||||
} | |||||
// Update Root Graph Data size | // Update Root Graph Data size | ||||
for (auto &node : comp_graph->GetDirectNode()) { | for (auto &node : comp_graph->GetDirectNode()) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
@@ -297,11 +274,22 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
} | } | ||||
// | // | ||||
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | ||||
// exclude functional subgraph in known subgraph | |||||
if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | |||||
continue; | |||||
} | |||||
if (sub_graph->GetGraphUnknownFlag()) { | if (sub_graph->GetGraphUnknownFlag()) { | ||||
// unknown shape build flow | // unknown shape build flow | ||||
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | ||||
"Build for unknown shape graph failed."); | "Build for unknown shape graph failed."); | ||||
} else { | } else { | ||||
// reset functional subgraph parent graph as known subgraph | |||||
for (const auto &node : sub_graph->GetDirectNode()) { | |||||
for (const auto &sub_graph_name : node->GetOpDesc()->GetSubgraphInstanceNames()) { | |||||
auto sub_sub_graph = comp_graph->GetSubgraph(sub_graph_name); | |||||
GE_CHK_STATUS_RET(sub_graph->AddSubgraph(sub_sub_graph), "Failed add subgraph to known graph."); | |||||
} | |||||
} | |||||
// known shape build flow | // known shape build flow | ||||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | ||||
"Build for known shape graph failed."); | "Build for known shape graph failed."); | ||||
@@ -450,6 +438,11 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) | |||||
GELOGI("Begin to calc dynamic shape graph data[%s] size.", op_desc->GetName().c_str()); | GELOGI("Begin to calc dynamic shape graph data[%s] size.", op_desc->GetName().c_str()); | ||||
// data op only has one output anchor | // data op only has one output anchor | ||||
ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0); | ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0); | ||||
if (output_desc.MutableShape().IsUnknownShape()) { | |||||
GELOGI("No need to update dynamic shape graph data output size for unknown shape data."); | |||||
return SUCCESS; | |||||
} | |||||
int64_t output_size = 0; | int64_t output_size = 0; | ||||
if (ge::TensorUtils::GetSize(output_desc, output_size) != SUCCESS) { | if (ge::TensorUtils::GetSize(output_desc, output_size) != SUCCESS) { | ||||
GELOGW("Get size failed!"); | GELOGW("Get size failed!"); | ||||
@@ -67,7 +67,6 @@ class GraphBuilder { | |||||
GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | ||||
Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
Status MarkGraph(ComputeGraphPtr &graph); | |||||
int build_mode_; | int build_mode_; | ||||
std::map<std::string, int> stream_max_parallel_num_; | std::map<std::string, int> stream_max_parallel_num_; | ||||
@@ -55,6 +55,13 @@ using std::unordered_map; | |||||
using std::unordered_set; | using std::unordered_set; | ||||
using std::vector; | using std::vector; | ||||
void AlignMemOffset(size_t &mem_align_size) { | |||||
if (mem_align_size <= 0) { | |||||
return; | |||||
} | |||||
mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; | |||||
} | |||||
void MemoryBlock::SetHeadOffset(size_t offset) { | void MemoryBlock::SetHeadOffset(size_t offset) { | ||||
head_offset_ = offset; | head_offset_ = offset; | ||||
size_t child_offset = head_offset_; | size_t child_offset = head_offset_; | ||||
@@ -92,7 +99,7 @@ void MemoryBlock::Resize() { | |||||
} else { | } else { | ||||
size_t block_size = (child_block_size > *iter) ? child_block_size : *iter; | size_t block_size = (child_block_size > *iter) ? child_block_size : *iter; | ||||
if ((block_size > 0) && (block_size % MEM_ALIGN_SIZE != 0)) { | if ((block_size > 0) && (block_size % MEM_ALIGN_SIZE != 0)) { | ||||
block_size = (block_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; | |||||
AlignMemOffset(block_size); | |||||
} | } | ||||
block_size_ = block_size; | block_size_ = block_size; | ||||
if (last_continuous_block_) { | if (last_continuous_block_) { | ||||
@@ -101,6 +108,20 @@ void MemoryBlock::Resize() { | |||||
} | } | ||||
} | } | ||||
size_t MemoryBlock::AlignSize() const { | |||||
size_t align_block_size = 0; | |||||
auto iter = std::max_element(real_size_list_.begin(), real_size_list_.end()); | |||||
if (iter == real_size_list_.end()) { | |||||
GELOGW("real_size_list_ is empty"); | |||||
} else { | |||||
align_block_size = *iter; | |||||
if ((align_block_size > 0) && (align_block_size % MEM_ALIGN_SIZE != 0)) { | |||||
AlignMemOffset(align_block_size); | |||||
} | |||||
} | |||||
return align_block_size; | |||||
} | |||||
bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { | bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { | ||||
if (node_type_index_list_.empty()) { | if (node_type_index_list_.empty()) { | ||||
return false; | return false; | ||||
@@ -133,31 +154,69 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { | |||||
} | } | ||||
bool CanNotLifeReuse(MemoryBlock *block) { | bool CanNotLifeReuse(MemoryBlock *block) { | ||||
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_) { | |||||
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_) { | |||||
return true; | return true; | ||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { | |||||
// continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block | |||||
auto it_block = std::max_element(std::begin(block->NoAlignSizeList()), std::end(block->NoAlignSizeList())); | |||||
auto it_this = std::max_element(std::begin(NoAlignSizeList()), std::end(NoAlignSizeList())); | |||||
if (it_block != std::end(block->NoAlignSizeList()) && it_this != std::end(NoAlignSizeList())) { | |||||
if ((continuous_block_ && block->continuous_block_) || (continuous_block_ && (*it_this < *it_block)) || | |||||
(block->continuous_block_ && (*it_this > *it_block))) { | |||||
GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", *it_this, | |||||
continuous_block_, *it_block, block->continuous_block_); | |||||
return; | |||||
} | |||||
} | |||||
MemoryBlock *parent = nullptr; | |||||
MemoryBlock *child = nullptr; | |||||
// merge small block to large block | |||||
if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { | |||||
if ((block->child_offset_ + AlignSize()) <= *it_block) { | |||||
parent = block; | |||||
child = this; | |||||
} | |||||
} | |||||
if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { | |||||
parent->child_blocks_.emplace_back(child); | |||||
parent->child_offset_ += child->AlignSize(); | |||||
child->deleted_block_ = true; | |||||
GELOGI( | |||||
"Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" | |||||
" block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", | |||||
child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, | |||||
parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); | |||||
} | |||||
} | |||||
void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { | void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { | ||||
if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { | if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { | ||||
return; | return; | ||||
} | } | ||||
if (block->continuous_block_) { | |||||
AddContinuousLifeReuseBlock(block, total_node_depend_stream_life); | |||||
return; | |||||
} | |||||
MemoryBlock *parent = nullptr; | MemoryBlock *parent = nullptr; | ||||
MemoryBlock *child = nullptr; | MemoryBlock *child = nullptr; | ||||
// merge small block to large block | // merge small block to large block | ||||
if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { | if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { | ||||
if ((child_offset_ + block->block_size_) <= block_size_) { | |||||
if ((child_offset_ + block->AlignSize()) <= AlignSize()) { | |||||
parent = this; | parent = this; | ||||
child = block; | child = block; | ||||
} else if ((block->child_offset_ + block_size_) <= block->block_size_) { | |||||
} else if ((block->child_offset_ + AlignSize()) <= block->AlignSize()) { | |||||
parent = block; | parent = block; | ||||
child = this; | child = this; | ||||
} | } | ||||
} | } | ||||
if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { | if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { | ||||
parent->child_blocks_.emplace_back(child); | parent->child_blocks_.emplace_back(child); | ||||
parent->child_offset_ += child->block_size_; | |||||
parent->child_offset_ += child->AlignSize(); | |||||
child->deleted_block_ = true; | child->deleted_block_ = true; | ||||
GELOGI( | GELOGI( | ||||
"Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" | "Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" | ||||
@@ -431,7 +490,7 @@ size_t GetBlockSize(size_t size, const vector<int64_t> &ranges) { | |||||
} | } | ||||
GELOGW("Memory needed size:%zu is beyond the biggest block in memory ranges.", size); | GELOGW("Memory needed size:%zu is beyond the biggest block in memory ranges.", size); | ||||
return 0; | |||||
return size; | |||||
} | } | ||||
bool IsDirectOutputNode(const NodePtr &node, int idx) { | bool IsDirectOutputNode(const NodePtr &node, int idx) { | ||||
@@ -465,34 +524,8 @@ void ReduceReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t | |||||
} | } | ||||
bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block, | bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block, | ||||
size_t block_size, size_t real_size, bool continuous, int64_t atomic_addr_clean_id) { | |||||
size_t block_size, size_t real_size, bool continuous) { | |||||
bool can_reuse = false; | bool can_reuse = false; | ||||
// If node is before atomic_addr_clean node, the continus memory can't be reused. | |||||
if (!reusable_block.NodeTypeIndexList().empty()) { | |||||
auto node = reusable_block.NodeTypeIndexList()[0].node; | |||||
if (node != nullptr) { | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc != nullptr) { | |||||
if ((op_desc->GetId() < atomic_addr_clean_id) && continuous) { | |||||
return false; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
// continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block | |||||
if (continuous || reusable_block.continuous_block_) { | |||||
auto it = | |||||
std::max_element(std::begin(reusable_block.NoAlignSizeList()), std::end(reusable_block.NoAlignSizeList())); | |||||
if (it != std::end(reusable_block.NoAlignSizeList())) { | |||||
GE_IF_BOOL_EXEC((continuous && reusable_block.continuous_block_) || (continuous && (real_size < *it)) || | |||||
(reusable_block.continuous_block_ && (real_size > *it)), | |||||
GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", | |||||
real_size, continuous, *it, reusable_block.continuous_block_); | |||||
return false;); | |||||
} | |||||
} | |||||
if (reusable_block.Size() == block_size) { | if (reusable_block.Size() == block_size) { | ||||
can_reuse = true; | can_reuse = true; | ||||
} else { | } else { | ||||
@@ -683,6 +716,34 @@ void BlockMemAssigner::PrintSymbolMap() { | |||||
} | } | ||||
} | } | ||||
bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { | |||||
if (n == nullptr) { | |||||
GELOGE(FAILED, "Node is null."); | |||||
return false; | |||||
} | |||||
// Get the continuous output type of the node, default is false | |||||
bool is_output_continuous = false; | |||||
auto node_desc = n->GetOpDesc(); | |||||
if (node_desc == nullptr) { | |||||
GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); | |||||
return false; | |||||
} | |||||
// If GetBool fail, is_output_continuous is false. | |||||
(void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); | |||||
if (is_output_continuous) { | |||||
if (n->GetOwnerComputeGraph() != nullptr) { | |||||
string graph_name = n->GetOwnerComputeGraph()->GetName(); | |||||
GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), n->GetName().c_str(), | |||||
n->GetAllOutDataAnchorsSize()); | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, | MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, | ||||
MemoryType mem_type, const NodePtr &n, uint32_t out_index, | MemoryType mem_type, const NodePtr &n, uint32_t out_index, | ||||
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | ||||
@@ -699,7 +760,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | ||||
reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | ||||
auto stream_id = node_op_desc->GetStreamId(); | auto stream_id = node_op_desc->GetStreamId(); | ||||
if (is_reuse_memory) { | |||||
if (is_reuse_memory && !continuous) { | |||||
for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { | for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { | ||||
MemoryBlock *reusable_block = *it; | MemoryBlock *reusable_block = *it; | ||||
if (!IsPostReuse(reusable_block)) { | if (!IsPostReuse(reusable_block)) { | ||||
@@ -709,8 +770,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
// A node can reuse blocks of the same stream and preorder streams | // A node can reuse blocks of the same stream and preorder streams | ||||
auto id = GetAtomicAddrCleanId(); | |||||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id)) { | |||||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | ||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | ||||
@@ -750,6 +810,47 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
return block; | return block; | ||||
} | } | ||||
MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||||
const bool is_op_reuse_mem) { | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||||
auto node_op_desc = n->GetOpDesc(); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||||
MemoryBlock *block = nullptr; | |||||
int64_t total_size = 0; | |||||
for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | |||||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||||
if (output_op_desc == nullptr) { | |||||
return nullptr; | |||||
} | |||||
int64_t size = 0; | |||||
if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | |||||
GELOGI("Get size failed"); | |||||
return nullptr; | |||||
} | |||||
size_t align_size = static_cast<size_t>(size); | |||||
AlignMemOffset(align_size); | |||||
total_size += align_size; | |||||
// only apply total size in first block | |||||
if (index != 0) { | |||||
zero_memory_list_.emplace_back(n, kOutput, index); | |||||
} | |||||
} | |||||
auto block_size = GetBlockSize(total_size, ranges); | |||||
GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size, | |||||
block_size); | |||||
vector<bool> workspace_reuse_flag; | |||||
block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true); | |||||
if (block != nullptr) { | |||||
// hccl task need align header and tail | |||||
block->first_continuous_block_ = true; | |||||
block->last_continuous_block_ = true; | |||||
} | |||||
return block; | |||||
} | |||||
MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | ||||
const bool is_op_reuse_mem, const bool continuous) { | const bool is_op_reuse_mem, const bool continuous) { | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | ||||
@@ -991,6 +1092,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
// Allocate memory for the current node and release node memory of the same size in the workspace | // Allocate memory for the current node and release node memory of the same size in the workspace | ||||
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | ||||
ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);) | ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);) | ||||
if (IsContinuousOutput(node)) { | |||||
(void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||||
return SUCCESS; | |||||
} | |||||
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | ||||
int64_t size = 0; | int64_t size = 0; | ||||
auto output_op_desc = op_desc->GetOutputDescPtr(i); | auto output_op_desc = op_desc->GetOutputDescPtr(i); | ||||
@@ -1017,7 +1122,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
continue; | continue; | ||||
} | } | ||||
// atomic can't be reused | // atomic can't be reused | ||||
if (is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic) { | |||||
bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; | |||||
if (need_change) { | |||||
is_op_reuse_mem_ = false; | is_op_reuse_mem_ = false; | ||||
} | } | ||||
MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input); | MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input); | ||||
@@ -1225,10 +1331,12 @@ static bool CompareBlockIndex(MemoryBlock *left, MemoryBlock *right) { | |||||
/// @param [in] input blocks need continuous | /// @param [in] input blocks need continuous | ||||
/// @param [out] blocks after continuous order | /// @param [out] blocks after continuous order | ||||
/// @param [in/out] blocks ordered | /// @param [in/out] blocks ordered | ||||
/// @param [in] input or output | |||||
/// | /// | ||||
void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks, | void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks, | ||||
const std::map<MemoryBlock *, uint32_t> block_map, | const std::map<MemoryBlock *, uint32_t> block_map, | ||||
std::vector<MemoryBlock *> &dest_blocks, std::vector<MemoryBlock *> &continuous_blocks) { | |||||
std::vector<MemoryBlock *> &dest_blocks, std::vector<MemoryBlock *> &continuous_blocks, | |||||
const std::string &type) { | |||||
for (auto &memory_block : org_blocks) { | for (auto &memory_block : org_blocks) { | ||||
if (memory_block == nullptr || memory_block->deleted_block_) { | if (memory_block == nullptr || memory_block->deleted_block_) { | ||||
continue; | continue; | ||||
@@ -1245,7 +1353,7 @@ void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks, | |||||
for (auto &memory_block : continuous_blocks) { | for (auto &memory_block : continuous_blocks) { | ||||
GE_IF_BOOL_EXEC(memory_block == nullptr, continue); | GE_IF_BOOL_EXEC(memory_block == nullptr, continue); | ||||
GELOGI("Block continuous input index:%d", memory_block->input_index_); | |||||
GELOGI("Block continuous %s index:%d", type.c_str(), memory_block->input_index_); | |||||
count++; | count++; | ||||
if (count == 1) { | if (count == 1) { | ||||
memory_block->first_continuous_block_ = true; | memory_block->first_continuous_block_ = true; | ||||
@@ -1280,7 +1388,7 @@ void BlockMemAssigner::AssignContinuousBlocks() { | |||||
continuous_block_map.size(), continuous_blocks.size()); | continuous_block_map.size(), continuous_blocks.size()); | ||||
continue; | continue; | ||||
} | } | ||||
ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks); | |||||
ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks, "input"); | |||||
memory_blocks_.swap(dest_memory_blocks); | memory_blocks_.swap(dest_memory_blocks); | ||||
} | } | ||||
} | } | ||||
@@ -1292,14 +1400,25 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { | |||||
} | } | ||||
for (size_t i = 0; i < memory_blocks_.size(); ++i) { | for (size_t i = 0; i < memory_blocks_.size(); ++i) { | ||||
auto parent = memory_blocks_[i]; | auto parent = memory_blocks_[i]; | ||||
if (parent == nullptr || parent->deleted_block_) { | |||||
if (parent == nullptr || parent->deleted_block_ || parent->continuous_block_) { | |||||
continue; | continue; | ||||
} | } | ||||
if (parent->reuse_mem_ && !IsPostReuse(parent)) { | if (parent->reuse_mem_ && !IsPostReuse(parent)) { | ||||
parent->reuse_mem_ = false; | parent->reuse_mem_ = false; | ||||
} | } | ||||
for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { | for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { | ||||
parent->AddLifeReuseBlock(memory_blocks_[j], total_node_depend_stream_life_); | |||||
auto child = memory_blocks_[j]; | |||||
if (child == nullptr) { | |||||
continue; | |||||
} | |||||
// If node is before atomic_addr_clean node, the continus memory can't be reused. | |||||
if (!parent->NodeTypeIndexList().empty() && child->continuous_block_) { | |||||
auto node = parent->NodeTypeIndexList()[0].node; | |||||
if (node == nullptr || node->GetOpDesc() == nullptr || (node->GetOpDesc()->GetId() < GetAtomicAddrCleanId())) { | |||||
continue; | |||||
} | |||||
} | |||||
parent->AddLifeReuseBlock(child, total_node_depend_stream_life_); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -1450,8 +1569,8 @@ Status BlockMemAssigner::Assign() { | |||||
bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | ||||
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | ||||
(node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | |||||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||||
(node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE); | |||||
(node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || | |||||
(node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||||
(node_type == HVDCALLBACKBROADCAST); | |||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -90,6 +90,8 @@ class MemoryBlock { | |||||
} | } | ||||
size_t Size() const { return block_size_; } | size_t Size() const { return block_size_; } | ||||
size_t AlignSize() const; | |||||
void SetHeadOffset(size_t offset); | void SetHeadOffset(size_t offset); | ||||
void SetTailOffset(size_t offset); | void SetTailOffset(size_t offset); | ||||
@@ -118,6 +120,8 @@ class MemoryBlock { | |||||
bool IsSameLabel(std::string &first_batch_label); | bool IsSameLabel(std::string &first_batch_label); | ||||
void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life); | |||||
void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life); | void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life); | ||||
void SetLifeTimeEnd(size_t time); | void SetLifeTimeEnd(size_t time); | ||||
@@ -362,6 +366,10 @@ class BlockMemAssigner : public MemAssigner { | |||||
/// | /// | ||||
void ReuseBlocksByLifeTime(size_t range_size); | void ReuseBlocksByLifeTime(size_t range_size); | ||||
bool IsContinuousOutput(const NodePtr &n); | |||||
MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||||
std::unordered_map<int64_t, std::vector<MemoryBlock *>> reusable_blocks_; | std::unordered_map<int64_t, std::vector<MemoryBlock *>> reusable_blocks_; | ||||
std::map<std::string, uint64_t> reusable_block_counts_; | std::map<std::string, uint64_t> reusable_block_counts_; | ||||
@@ -293,7 +293,8 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
} else if (is_loop_graph) { | } else if (is_loop_graph) { | ||||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); | GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); | ||||
} else { | } else { | ||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, mem_clean_start, mem_clean_size), "SetAtomicCleanAttr failed."); | |||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), | |||||
"SetAtomicCleanAttr failed."); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -441,35 +442,33 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||||
GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); | GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); | ||||
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | ||||
if (out_op_desc->GetOutputsSize() > output_list.size()) { | |||||
if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { | |||||
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | ||||
out_op_desc->GetOutputsSize(), output_list.size()); | out_op_desc->GetOutputsSize(), output_list.size()); | ||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; | |||||
size_t mem_offset = output_list[0]; | |||||
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | ||||
output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; | |||||
size_t pre_mem_offset = memory_offset_[0].mem_offset_; | |||||
output_list[out_data_anchor->GetIdx()] = mem_offset; | |||||
int64_t tensor_desc_size = 0; | int64_t tensor_desc_size = 0; | ||||
if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != | if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != | ||||
ge::SUCCESS) { | ge::SUCCESS) { | ||||
GELOGE(FAILED, "GetSize failed."); | GELOGE(FAILED, "GetSize failed."); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
memory_offset_[0].mem_offset_ += tensor_desc_size; | |||||
AlignMemOffset(MEM_ALIGN_SIZE); | |||||
mem_offset += tensor_desc_size; | |||||
if (mem_offset <= 0) { | |||||
return FAILED; | |||||
} | |||||
mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; | |||||
GELOGI( | GELOGI( | ||||
"[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " | |||||
"[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " | |||||
"real_size[%ld].", | "real_size[%ld].", | ||||
node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), | node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), | ||||
pre_mem_offset, out_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size); | |||||
output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); | |||||
} | } | ||||
out_op_desc->SetOutputOffset(output_list); | out_op_desc->SetOutputOffset(output_list); | ||||
memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
@@ -809,14 +808,12 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt | |||||
} | } | ||||
Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | ||||
if (compute_graph_ == nullptr) { | |||||
GELOGE(ge::PARAM_INVALID, "Graph must not be null."); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
GE_CHECK_NOTNULL(compute_graph_); | |||||
// Atomic op memory start addr | // Atomic op memory start addr | ||||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | ||||
GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); | GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); | ||||
vector<NodePtr> connect_netoutput_nodes; | |||||
for (auto &node : compute_graph_->GetAllNodes()) { | for (auto &node : compute_graph_->GetAllNodes()) { | ||||
auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
if (node_op_desc == nullptr) { | if (node_op_desc == nullptr) { | ||||
@@ -839,36 +836,20 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
// Atomic op memory start addr of loop graph | |||||
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
// Reassign atomic node output memory | |||||
Status ret = AssignAtomicOutputMemory(node); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Assign atomic output memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||||
return ret; | |||||
vector<int> is_connect_netoutput; | |||||
// If GetBool fail, attr is_connect_netoutput is an empty vector. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); | |||||
if (!is_connect_netoutput.empty()) { | |||||
connect_netoutput_nodes.emplace_back(node); | |||||
continue; | |||||
} | } | ||||
// Check atomic workspace | |||||
map<string, map<int64_t, int64_t>> sub_node_workspace_info; | |||||
sub_node_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, sub_node_workspace_info); | |||||
if (!sub_node_workspace_info.empty()) { | |||||
bool is_fusion_node = false; | |||||
// If GetBool fail, is_fusion_node is false. | |||||
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); | |||||
if (is_fusion_node) { | |||||
// Assign fusion atomic node workspace memory | |||||
ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info); | |||||
} else { | |||||
// Assign single ordinary atomic node workspace memory, not include fusion node | |||||
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info); | |||||
} | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||||
return ret; | |||||
} | |||||
// Atomic op memory start addr of loop graph | |||||
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
vector<int64_t> mem_offset_end; | |||||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | } | ||||
/// In networks with loop op, atomic op uses atomic_addr_clean op independently, | /// In networks with loop op, atomic op uses atomic_addr_clean op independently, | ||||
@@ -883,13 +864,80 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
// Set the address attr of atomic clean operator | // Set the address attr of atomic clean operator | ||||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | ||||
if (atomic_mem_size != 0) { | if (atomic_mem_size != 0) { | ||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, atomic_mem_start, atomic_mem_size), "SetAtomicCleanAttr failed."); | |||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), | |||||
"SetAtomicCleanAttr failed."); | |||||
} | |||||
} | |||||
if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { | |||||
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, | |||||
vector<int64_t> &mem_offset_end) { | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
// Assign atomic node output memory | |||||
Status ret = AssignAtomicOutputMemory(node, mem_offset_end); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str()); | |||||
return ret; | |||||
} | |||||
// Check and assign atomic node workspace memory | |||||
map<string, map<int64_t, int64_t>> atomic_workspace_info; | |||||
atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info); | |||||
if (!atomic_workspace_info.empty()) { | |||||
bool is_fusion_node = false; | |||||
// If GetBool fail, is_fusion_node is false. | |||||
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); | |||||
if (is_fusion_node) { | |||||
// Assign fusion atomic node workspace memory | |||||
ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | |||||
} else { | |||||
// Assign single ordinary atomic node workspace memory, not include fusion node | |||||
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | |||||
} | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||||
return ret; | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) { | |||||
for (auto &node : connect_netoutput_nodes) { | |||||
GE_CHECK_NOTNULL(node); | |||||
if (node->GetOpDesc() == nullptr) { | |||||
GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str()); | |||||
continue; | |||||
} | |||||
// Atomic memory start addr | |||||
int64_t original_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.", | |||||
node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); | |||||
vector<int64_t> mem_offset_end; | |||||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
// All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. | |||||
if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) { | |||||
GELOGE(FAILED, "Failed to set atomic attr separately."); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphMemoryAssigner::AssignReferenceMemory() { | Status GraphMemoryAssigner::AssignReferenceMemory() { | ||||
for (auto &node : compute_graph_->GetDirectNode()) { | for (auto &node : compute_graph_->GetDirectNode()) { | ||||
// Get the reference type of the node, default is false | // Get the reference type of the node, default is false | ||||
@@ -971,9 +1019,10 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) { | |||||
return true; | return true; | ||||
} | } | ||||
Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||||
Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) { | |||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED); | GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED); | ||||
mem_offset_end.clear(); | |||||
GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str()); | GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str()); | ||||
vector<int64_t> atomic_output_index; | vector<int64_t> atomic_output_index; | ||||
@@ -996,24 +1045,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||||
// If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | ||||
bool is_assigned_mem = false; | bool is_assigned_mem = false; | ||||
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | |||||
GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); | |||||
GE_CHECK_NOTNULL(out_data_anchor); | |||||
auto input_anchors = out_data_anchor->GetPeerInDataAnchors(); | |||||
for (auto &input_anchor : input_anchors) { | |||||
auto output_node = input_anchor->GetOwnerNode(); | |||||
/// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address | |||||
/// has been assigned | |||||
vector<int64_t> atomic_input_index; | |||||
(void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); | |||||
if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { | |||||
is_assigned_mem = true; | |||||
break; | |||||
} | |||||
if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { | |||||
GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str()); | |||||
return ge::FAILED; | |||||
} | } | ||||
// If you have already assigned an atomic address, skip it, and you don't need to reassign it. | // If you have already assigned an atomic address, skip it, and you don't need to reassign it. | ||||
@@ -1038,6 +1072,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||||
memory_offset_[0].mem_offset_ += size; | memory_offset_[0].mem_offset_ += size; | ||||
AlignMemOffset(MEM_ALIGN_SIZE); | AlignMemOffset(MEM_ALIGN_SIZE); | ||||
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); | |||||
} | } | ||||
op_desc->SetOutputOffset(output_list); | op_desc->SetOutputOffset(output_list); | ||||
@@ -1045,8 +1080,33 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, | |||||
bool &is_mem_assigned) { | |||||
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | |||||
GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); | |||||
GE_CHECK_NOTNULL(out_data_anchor); | |||||
auto input_anchors = out_data_anchor->GetPeerInDataAnchors(); | |||||
for (auto &input_anchor : input_anchors) { | |||||
auto output_node = input_anchor->GetOwnerNode(); | |||||
/// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address | |||||
/// has been assigned | |||||
vector<int64_t> atomic_input_index; | |||||
(void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); | |||||
if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { | |||||
is_mem_assigned = true; | |||||
break; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | ||||
map<string, map<int64_t, int64_t>> &workspace_info) { | |||||
map<string, map<int64_t, int64_t>> &workspace_info, | |||||
vector<int64_t> &mem_offset_end) { | |||||
GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | ||||
vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | ||||
@@ -1078,6 +1138,7 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||||
op_desc->GetStreamId(), workspace_size, workspace_size); | op_desc->GetStreamId(), workspace_size, workspace_size); | ||||
memory_offset_[0].mem_offset_ += workspace_size; | memory_offset_[0].mem_offset_ += workspace_size; | ||||
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); | |||||
} | } | ||||
} | } | ||||
op_desc->SetWorkspace(workspace_vector); | op_desc->SetWorkspace(workspace_vector); | ||||
@@ -1086,7 +1147,8 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||||
} | } | ||||
Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | ||||
map<string, map<int64_t, int64_t>> &workspace_info) { | |||||
map<string, map<int64_t, int64_t>> &workspace_info, | |||||
vector<int64_t> &mem_offset_end) { | |||||
GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | ||||
map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | ||||
@@ -1108,6 +1170,7 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||||
op_desc->GetStreamId(), workspace_size, workspace_size); | op_desc->GetStreamId(), workspace_size, workspace_size); | ||||
memory_offset_[0].mem_offset_ += workspace_size; | memory_offset_[0].mem_offset_ += workspace_size; | ||||
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); | |||||
index_offset.insert(std::make_pair(workspace_index, workspace_offset)); | index_offset.insert(std::make_pair(workspace_index, workspace_offset)); | ||||
} | } | ||||
sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | ||||
@@ -1287,6 +1350,47 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | |||||
const vector<int64_t> &mem_offset_end) { | |||||
GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start); | |||||
// Parsing offset and size vectors | |||||
vector<int64_t> memory_offset_start; | |||||
vector<int64_t> memory_offset_size; | |||||
memory_offset_start.emplace_back(atomic_mem_start); | |||||
for (size_t i = 0; i < mem_offset_end.size(); ++i) { | |||||
memory_offset_start.emplace_back(mem_offset_end[i]); | |||||
// Number 1 means element index | |||||
auto size = memory_offset_start[i + 1] - memory_offset_start[i]; | |||||
memory_offset_size.emplace_back(size); | |||||
} | |||||
memory_offset_start.pop_back(); | |||||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||||
if (!memory_offset_size.empty() && in_control_anchor != nullptr) { | |||||
for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||||
if (peer_out_control_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||||
auto peer_out_node_desc = peer_out_node->GetOpDesc(); | |||||
if (peer_out_node_desc == nullptr) { | |||||
continue; | |||||
} | |||||
GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(), | |||||
peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); | |||||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | |||||
if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) { | |||||
GELOGE(FAILED, "Set atomic clean attr failed."); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { | Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { | ||||
// set the address attr of atomic clean operator for loop graph | // set the address attr of atomic clean operator for loop graph | ||||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | ||||
@@ -1308,7 +1412,7 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6 | |||||
peer_out_node_desc->GetType().c_str()); | peer_out_node_desc->GetType().c_str()); | ||||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | ||||
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, atomic_mem_start, atomic_mem_size), | |||||
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), | |||||
GELOGE(FAILED, "SetAtomicCleanAttr failed."); | GELOGE(FAILED, "SetAtomicCleanAttr failed."); | ||||
return FAILED); | return FAILED); | ||||
} | } | ||||
@@ -1317,8 +1421,8 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6 | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t atomic_mem_start, | |||||
int64_t atomic_mem_size) { | |||||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start, | |||||
const vector<int64_t> &atomic_mem_size) { | |||||
for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { | for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { | ||||
auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | ||||
@@ -1327,15 +1431,15 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato | |||||
((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { | ((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { | ||||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | ||||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | ||||
workspace_vector.emplace_back(atomic_mem_start); | |||||
workspace_byte_vector.emplace_back(atomic_mem_size); | |||||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
node_op_desc->SetWorkspace(workspace_vector); | node_op_desc->SetWorkspace(workspace_vector); | ||||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | ||||
std::vector<int64_t> mem_start_vector; | std::vector<int64_t> mem_start_vector; | ||||
// If GetListInt fail, mem_start_vector is empty. | // If GetListInt fail, mem_start_vector is empty. | ||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | ||||
mem_start_vector.emplace_back(atomic_mem_start); | |||||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | ||||
GELOGE(FAILED, "SetListInt failed."); | GELOGE(FAILED, "SetListInt failed."); | ||||
return FAILED); | return FAILED); | ||||
@@ -1343,16 +1447,26 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato | |||||
std::vector<int64_t> mem_size_vector; | std::vector<int64_t> mem_size_vector; | ||||
// If GetListInt fail, mem_size_vector is empty. | // If GetListInt fail, mem_size_vector is empty. | ||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | ||||
mem_size_vector.emplace_back(atomic_mem_size); | |||||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | ||||
GELOGE(FAILED, "SetListInt failed."); | GELOGE(FAILED, "SetListInt failed."); | ||||
return FAILED); | return FAILED); | ||||
GELOGI( | |||||
"[IMAS]SetAtomicCleanAttr : Set %s name[%s] output[%d] offset to [%ld] streamid[%ld] size[%ld] " | |||||
"realsize[%ld].", | |||||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), 0, atomic_mem_start, | |||||
node->GetOpDesc()->GetStreamId(), atomic_mem_size, atomic_mem_size); | |||||
std::stringstream ss; | |||||
for (auto iter : atomic_mem_start) { | |||||
ss << iter << " "; | |||||
} | |||||
string atomic_mem_start_str = ss.str(); | |||||
ss.clear(); | |||||
ss.str(""); | |||||
for (auto iter : atomic_mem_size) { | |||||
ss << iter << " "; | |||||
} | |||||
string atomic_mem_size_str = ss.str(); | |||||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -147,22 +147,33 @@ class GraphMemoryAssigner { | |||||
/// | /// | ||||
bool CheckInputIsSupportAtomic(const ge::NodePtr &node); | bool CheckInputIsSupportAtomic(const ge::NodePtr &node); | ||||
ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node); | |||||
ge::Status GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, bool &is_mem_assigned); | |||||
ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node, std::vector<int64_t> &mem_offset_end); | |||||
ge::Status AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | ge::Status AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | ||||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info); | |||||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info, | |||||
std::vector<int64_t> &mem_offset_end); | |||||
ge::Status AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | ge::Status AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | ||||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info); | |||||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info, | |||||
std::vector<int64_t> &mem_offset_end); | |||||
ge::Status AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, std::vector<int64_t> &mem_offset_end); | |||||
ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes); | |||||
ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | |||||
const std::vector<int64_t> &mem_offset_end); | |||||
/// | /// | ||||
/// @brief set loop graph atomic attr | /// @brief set loop graph atomic attr | ||||
/// @param node | |||||
/// @param node, atomic memory assignment start offset | |||||
/// @param atomic_mem_start: atomic op memory start address | /// @param atomic_mem_start: atomic op memory start address | ||||
/// | /// | ||||
ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); | ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); | ||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, int64_t atomic_mem_start, int64_t atomic_mem_size); | |||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start, | |||||
const std::vector<int64_t> &atomic_mem_size); | |||||
void AlignMemOffset(const int64_t &mem_align_size); | void AlignMemOffset(const int64_t &mem_align_size); | ||||
@@ -42,10 +42,12 @@ | |||||
#include "graph/utils/op_desc_utils.h" | #include "graph/utils/op_desc_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "graph/passes/memcpy_addr_async_pass.h" | |||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "memory/memory_assigner.h" | #include "memory/memory_assigner.h" | ||||
#include "omg/version.h" | #include "omg/version.h" | ||||
#include "register/op_registry.h" | #include "register/op_registry.h" | ||||
#include "graph/passes/set_input_output_offset_pass.h" | |||||
using std::map; | using std::map; | ||||
using std::set; | using std::set; | ||||
@@ -668,12 +670,36 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { | |||||
GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(label_num_), "Assign label failed."); | GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(label_num_), "Assign label failed."); | ||||
GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels"); | GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels"); | ||||
// Add memcpy_addr_async node. | |||||
rtFeatureType_t feature_type = FEATURE_TYPE_MEMCPY; | |||||
int32_t feature_info = MEMCPY_INFO_SUPPORT_ZEROCOPY; | |||||
int64_t value = 0; | |||||
rtError_t rt_ret = rtGetRtCapability(feature_type, feature_info, &value); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "rtGetRtCapability failed."); | |||||
return RT_FAILED; | |||||
} else { | |||||
if (value == RT_CAPABILITY_SUPPORT) { | |||||
GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); | |||||
MemcpyAddrAsyncPass memcpy_addr; | |||||
GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed."); | |||||
GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); | |||||
} else { | |||||
GELOGW("rtGetRtCapability not support memcpy_addr_async."); | |||||
} | |||||
} | |||||
GE_TIMESTAMP_START(AssignMemory); | GE_TIMESTAMP_START(AssignMemory); | ||||
MemoryAssigner mem_assigner(compute_graph_); | MemoryAssigner mem_assigner(compute_graph_); | ||||
GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_offset_, zero_copy_mem_size_), | GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_offset_, zero_copy_mem_size_), | ||||
"Assign Memory Failed!"); | "Assign Memory Failed!"); | ||||
GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory"); | GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory"); | ||||
GE_TIMESTAMP_START(SetInputOutputOffset); | |||||
SetInputOutputOffsetPass input_output_offset; | |||||
GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); | |||||
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run."); | |||||
// Compile single op in graph build stage | // Compile single op in graph build stage | ||||
GE_TIMESTAMP_START(CompileSingleOp); | GE_TIMESTAMP_START(CompileSingleOp); | ||||
GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); | GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); | ||||
@@ -612,6 +612,33 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr | |||||
AttrUtils::HasAttr(activate_stream_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE)) { | AttrUtils::HasAttr(activate_stream_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE)) { | ||||
return false; | return false; | ||||
} | } | ||||
/// | |||||
/// stream_0 --> stream_2 --> stream_3 --> stream_4 | |||||
/// /\ | | |||||
/// | \/ | |||||
/// | stream_1 --> stream_5 --> stream_6 --> stream_7 | |||||
/// | /\ | | | |||||
/// | | \/ | | |||||
/// | |---------- stream_8 | | |||||
/// | | | |||||
/// |-----------------------------------------------------------| | |||||
/// | |||||
/// Exit1(S7) Exit2(S7) Exit3(S7) | |||||
/// \ / | | |||||
/// AddN(S1) NextIteration(S7) | |||||
/// | | | |||||
/// NextIteration(S1) / | |||||
/// | / | |||||
/// | / | |||||
/// StreamActive(S7) | |||||
/// | |||||
/// Event between Exit1/Exit2 and AddN should not be optimized | |||||
/// | |||||
if (IsActiveAfterNextIteration(activate_stream_node)) { | |||||
continue; | |||||
} | |||||
visited_nodes.insert(activate_stream_node); | visited_nodes.insert(activate_stream_node); | ||||
// nodes in stream link to streamActivate no need to add event before activated node | // nodes in stream link to streamActivate no need to add event before activated node | ||||
for (const auto &pre_activate_stream_node : activate_stream_node->GetInNodes()) { | for (const auto &pre_activate_stream_node : activate_stream_node->GetInNodes()) { | ||||
@@ -639,6 +666,18 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr | |||||
return false; | return false; | ||||
} | } | ||||
bool StreamAllocator::IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const { | |||||
if ((active_node_ptr == nullptr) || active_node_ptr->GetInControlNodes().empty()) { | |||||
return false; | |||||
} | |||||
for (const auto &in_node : active_node_ptr->GetInControlNodes()) { | |||||
if ((in_node->GetType() != NEXTITERATION) && (in_node->GetType() != REFNEXTITERATION)) { | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
// Split the stream according to the maximum number of nodes in the stream. | // Split the stream according to the maximum number of nodes in the stream. | ||||
Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | ||||
if (enable_single_stream_ || stream_num_ == 0) { | if (enable_single_stream_ || stream_num_ == 0) { | ||||
@@ -55,6 +55,7 @@ class StreamAllocator { | |||||
Status OptimizeByStreamActivate(); | Status OptimizeByStreamActivate(); | ||||
// Determine if the successor node of RecvNode is directly or indirectly activated by the SendNode precursor node | // Determine if the successor node of RecvNode is directly or indirectly activated by the SendNode precursor node | ||||
bool IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const; | bool IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const; | ||||
bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const; | |||||
Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | ||||
bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | ||||
@@ -86,10 +86,10 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) { | |||||
Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type) { | |||||
auto model_manager = ge::ModelManager::GetInstance(); | auto model_manager = ge::ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
Status ret = model_manager->SetDynamicSize(model_id, batch_num); | |||||
Status ret = model_manager->SetDynamicSize(model_id, batch_num, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "SetDynamicSize failed"); | GELOGE(FAILED, "SetDynamicSize failed"); | ||||
return ret; | return ret; | ||||
@@ -486,12 +486,14 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp | |||||
/// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
/// @param [in] model_id | /// @param [in] model_id | ||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @param [out] dynamic_type | |||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||||
Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||||
int32_t &dynamic_type) { | |||||
auto model_manager = ge::ModelManager::GetInstance(); | auto model_manager = ge::ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info); | |||||
Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "GetDynamicBatchInfo failed."); | GELOGE(ret, "GetDynamicBatchInfo failed."); | ||||
return ret; | return ret; | ||||
@@ -499,12 +501,30 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::ve | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get combined dynamic dims info | |||||
/// @param [in] model_id | |||||
/// @param [out] batch_info | |||||
/// @return execute result | |||||
/// | |||||
Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||||
auto model_manager = ge::ModelManager::GetInstance(); | |||||
GE_CHECK_NOTNULL(model_manager); | |||||
Status ret = model_manager->GetCombinedDynamicDims(model_id, batch_info); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "GetCombinedDynamicDims failed."); | |||||
return ret; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||||
auto model_manager = ge::ModelManager::GetInstance(); | auto model_manager = ge::ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
Status ret = model_manager->GetCurShape(model_id, batch_info); | |||||
Status ret = model_manager->GetCurShape(model_id, batch_info, dynamic_type); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "GetCurShape failed"); | |||||
GELOGE(ret, "GetCurShape failed"); | |||||
return ret; | return ret; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -56,7 +56,7 @@ class GraphExecutor { | |||||
Status SetGraphContext(GraphContextPtr graph_context_ptr); | Status SetGraphContext(GraphContextPtr graph_context_ptr); | ||||
static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num); | |||||
static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||||
void SetTrainFlag(bool is_train_graph); | void SetTrainFlag(bool is_train_graph); | ||||
@@ -80,11 +80,22 @@ class GraphExecutor { | |||||
/// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
/// @param [in] model_id | /// @param [in] model_id | ||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @param [out] dynamic_type | |||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||||
static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||||
int32_t &dynamic_type); | |||||
static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get combined dynamic dims info | |||||
/// @param [in] model_id | |||||
/// @param [out] batch_info | |||||
/// @return execute result | |||||
/// | |||||
static Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||||
static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||||
static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | ||||
@@ -104,12 +104,11 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) { | |||||
GE_CHECK_NOTNULL(cond_out_desc); | GE_CHECK_NOTNULL(cond_out_desc); | ||||
GeTensorDesc pred_desc = cond_out_desc->GetInputDesc(kCondOutputIndex); | GeTensorDesc pred_desc = cond_out_desc->GetInputDesc(kCondOutputIndex); | ||||
GeTensorDesc cond_desc(GeShape(pred_desc.GetShape().GetDims()), pred_desc.GetFormat(), DT_INT32); | |||||
// false ==> 0 ==> switch_labels[0] ==> body_leave_index | // false ==> 0 ==> switch_labels[0] ==> body_leave_index | ||||
// true ==> 1 ==> switch_labels[1] ==> body_enter_name | // true ==> 1 ==> switch_labels[1] ==> body_enter_name | ||||
const std::vector<uint32_t> switch_labels = {body_leave_index, body_enter_index}; | const std::vector<uint32_t> switch_labels = {body_leave_index, body_enter_index}; | ||||
NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, cond_desc, switch_labels); | |||||
NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, pred_desc, switch_labels); | |||||
if (switch_node == nullptr) { | if (switch_node == nullptr) { | ||||
GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", cond_graph->GetName().c_str()); | GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", cond_graph->GetName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
@@ -36,20 +36,20 @@ GraphLoader::~GraphLoader() = default; | |||||
Status GraphLoader::UnloadModel(uint32_t model_id) { | Status GraphLoader::UnloadModel(uint32_t model_id) { | ||||
auto model_manager = ModelManager::GetInstance(); | auto model_manager = ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
GELOGI("UnLoad model begin, model_id:%u.", model_id); | |||||
GELOGI("UnLoad model begin, model id:%u.", model_id); | |||||
Status ret = model_manager->Stop(model_id); | Status ret = model_manager->Stop(model_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "UnloadModel: Stop failed."); | |||||
GELOGE(ret, "UnloadModel: Stop failed. model id:%u", model_id); | |||||
} | } | ||||
ret = model_manager->Unload(model_id); | ret = model_manager->Unload(model_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "UnloadModel: Unload failed."); | |||||
GELOGE(ret, "UnloadModel: Unload failed. model id:%u", model_id); | |||||
CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_UNLOAD); | CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_UNLOAD); | ||||
return ret; | return ret; | ||||
} | } | ||||
GELOGI("UnLoad model success, model_id:%u.", model_id); | |||||
GELOGI("UnLoad model success, model id:%u.", model_id); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -123,14 +123,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||||
Status ret; | Status ret; | ||||
try { | try { | ||||
if (!CheckInputPathValid(path)) { | if (!CheckInputPathValid(path)) { | ||||
GELOGE(PARAM_INVALID, "model path is invalid: %s", path.c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | |||||
return GE_EXEC_MODEL_PATH_INVALID; | |||||
} | } | ||||
GELOGI("Load model begin, model path is: %s", path.c_str()); | GELOGI("Load model begin, model path is: %s", path.c_str()); | ||||
if (!key_path.empty() && !CheckInputPathValid(key_path)) { | if (!key_path.empty() && !CheckInputPathValid(key_path)) { | ||||
GELOGE(PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||||
return GE_EXEC_MODEL_KEY_PATH_INVALID; | |||||
} | } | ||||
ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | ||||
@@ -16,6 +16,7 @@ | |||||
#include "graph/load/new_model_manager/cpu_queue_schedule.h" | #include "graph/load/new_model_manager/cpu_queue_schedule.h" | ||||
#include "common/debug/ge_log.h" | #include "common/debug/ge_log.h" | ||||
#include "common/debug/log.h" | |||||
namespace { | namespace { | ||||
const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch | const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch | ||||
@@ -58,7 +59,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) { | |||||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo); | in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo); | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
@@ -69,7 +70,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) { | |||||
status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE); | status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -84,7 +85,7 @@ Status CpuTaskModelDequeue::Distribute() { | |||||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_); | rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status); | GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GELOGI("Cpu kernel launch model dequeue task success."); | GELOGI("Cpu kernel launch model dequeue task success."); | ||||
@@ -98,24 +99,24 @@ Status CpuTaskModelDequeue::Distribute() { | |||||
/// @param [in] outside_addrs: model input/output memory addr | /// @param [in] outside_addrs: model input/output memory addr | ||||
/// @return: 0 for success / others for failed | /// @return: 0 for success / others for failed | ||||
/// | /// | ||||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, | |||||
std::map<const void *, std::vector<void *>> &outside_addrs) { | |||||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||||
if ((args_ != nullptr) || (args_size_ > 0)) { | if ((args_ != nullptr) || (args_size_ > 0)) { | ||||
GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
args_size_ = sizeof(AddrMapInfo); | args_size_ = sizeof(AddrMapInfo); | ||||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||||
return RT_FAILED; | |||||
} | |||||
GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); | |||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
AddrMapInfo addr_map_info; | AddrMapInfo addr_map_info; | ||||
for (const auto &addrs : outside_addrs) { | |||||
addr_map_info.addr_num += addrs.second.size(); | |||||
for (auto &addrs : outside_addrs) { | |||||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||||
addr_map_info.addr_num += virtual_args_addr.second.size(); | |||||
} | |||||
} | } | ||||
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | ||||
@@ -123,38 +124,31 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, | |||||
size_t index = 0; | size_t index = 0; | ||||
vector<uint64_t> src_addrs; | vector<uint64_t> src_addrs; | ||||
vector<uint64_t> dst_addrs; | vector<uint64_t> dst_addrs; | ||||
for (const auto &addrs : outside_addrs) { | |||||
for (size_t i = 0; i < addrs.second.size(); ++i) { | |||||
src_addrs.push_back(mbuf_list.at(index)); | |||||
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(addrs.second.at(i)))); | |||||
for (auto &addrs : outside_addrs) { | |||||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||||
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | |||||
src_addrs.push_back(mbuf_list.at(index)); | |||||
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | |||||
} | |||||
} | } | ||||
index++; | index++; | ||||
} | } | ||||
// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | ||||
status = rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||||
return RT_FAILED; | |||||
} | |||||
status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(), | |||||
src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||||
return RT_FAILED; | |||||
} | |||||
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | |||||
rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(), | |||||
src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||||
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); | |||||
return RT_ERROR_TO_GE_STATUS(status);) | |||||
status = rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM); | |||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||||
return RT_FAILED; | |||||
} | |||||
GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | |||||
status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(), | status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(), | ||||
dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||||
return RT_FAILED; | |||||
} | |||||
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); | |||||
return RT_ERROR_TO_GE_STATUS(status);) | |||||
// src_addr_list is init to src_addr, which is the point to src_addrs | // src_addr_list is init to src_addr, which is the point to src_addrs | ||||
if (!src_addrs.empty() && !dst_addrs.empty()) { | if (!src_addrs.empty() && !dst_addrs.empty()) { | ||||
@@ -164,10 +158,8 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, | |||||
} | } | ||||
status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE); | status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (status != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||||
return RT_FAILED; | |||||
} | |||||
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); | |||||
return RT_ERROR_TO_GE_STATUS(status);) | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -180,7 +172,7 @@ Status CpuTaskZeroCopy::Distribute() { | |||||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_); | rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status); | GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GELOGI("Cpu kernel launch zero copy task success."); | GELOGI("Cpu kernel launch zero copy task success."); | ||||
@@ -225,7 +217,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb | |||||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo); | out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo); | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
@@ -239,7 +231,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb | |||||
status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE); | status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -254,7 +246,7 @@ Status CpuTaskPrepareOutput::Distribute() { | |||||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_); | rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status); | GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GELOGI("Cpu kernel launch prepare output task success."); | GELOGI("Cpu kernel launch prepare output task success."); | ||||
@@ -279,7 +271,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { | |||||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
@@ -289,7 +281,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { | |||||
status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -304,7 +296,7 @@ Status CpuTaskModelEnqueue::Distribute() { | |||||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_); | rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status); | GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GELOGI("Cpu kernel launch model enqueue task success."); | GELOGI("Cpu kernel launch model enqueue task success."); | ||||
@@ -336,7 +328,7 @@ Status CpuTaskActiveEntry::Distribute() { | |||||
rtError_t ret = rtStreamActive(active_stream_, stream_); | rtError_t ret = rtStreamActive(active_stream_, stream_); | ||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret); | GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
GELOGI("Cpu kernel launch active entry task success."); | GELOGI("Cpu kernel launch active entry task success."); | ||||
@@ -359,14 +351,14 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) { | |||||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -381,7 +373,7 @@ Status CpuTaskWaitEndGraph::Distribute() { | |||||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_); | rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status); | GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GELOGI("Cpu kernel launch wait end task success."); | GELOGI("Cpu kernel launch wait end task success."); | ||||
@@ -404,14 +396,14 @@ Status CpuTaskModelRepeat::Init(uint32_t model_id) { | |||||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -426,7 +418,7 @@ Status CpuTaskModelRepeat::Distribute() { | |||||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_); | rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_); | ||||
if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(status); | |||||
} | } | ||||
GELOGI("Cpu kernel launch repeat task success."); | GELOGI("Cpu kernel launch repeat task success."); | ||||
@@ -22,6 +22,7 @@ | |||||
#include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | #include "graph/load/new_model_manager/task_info/task_info.h" | ||||
#include "graph/load/new_model_manager/zero_copy_offset.h" | |||||
#include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
namespace ge { | namespace ge { | ||||
@@ -93,7 +94,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { | |||||
~CpuTaskZeroCopy() override; | ~CpuTaskZeroCopy() override; | ||||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | ||||
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, std::vector<void *>> &outside_addrs); | |||||
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
Status Distribute() override; | Status Distribute() override; | ||||
@@ -487,8 +487,8 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in | |||||
size_t proto_size = op_mapping_info.ByteSizeLong(); | size_t proto_size = op_mapping_info.ByteSizeLong(); | ||||
bool ret = op_mapping_info.SerializeToString(&proto_str); | bool ret = op_mapping_info.SerializeToString(&proto_str); | ||||
if (!ret || proto_size == 0) { | if (!ret || proto_size == 0) { | ||||
GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
if (dev_mem_load_ != nullptr) { | if (dev_mem_load_ != nullptr) { | ||||
@@ -499,20 +499,20 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in | |||||
rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size) | ||||
rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size); | rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
load_flag_ = true; | load_flag_ = true; | ||||
@@ -525,8 +525,8 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ | |||||
size_t proto_size = op_mapping_info.ByteSizeLong(); | size_t proto_size = op_mapping_info.ByteSizeLong(); | ||||
bool ret = op_mapping_info.SerializeToString(&proto_str); | bool ret = op_mapping_info.SerializeToString(&proto_str); | ||||
if (!ret || proto_size == 0) { | if (!ret || proto_size == 0) { | ||||
GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||||
return FAILED; | |||||
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||||
return PARAM_INVALID; | |||||
} | } | ||||
if (dev_mem_unload_ != nullptr) { | if (dev_mem_unload_ != nullptr) { | ||||
@@ -537,20 +537,20 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ | |||||
rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size) | ||||
rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size); | rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
load_flag_ = false; | load_flag_ = false; | ||||
GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); | GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); | ||||
@@ -588,18 +588,20 @@ Status DataDumper::LoadDumpInfo() { | |||||
task.mutable_op()->set_op_type(op_desc->GetType()); | task.mutable_op()->set_op_type(op_desc->GetType()); | ||||
if (dump_properties_.GetDumpMode() == kDumpOutput) { | if (dump_properties_.GetDumpMode() == kDumpOutput) { | ||||
if (DumpOutput(op_iter, task) != SUCCESS) { | |||||
GELOGE(FAILED, "Dump output failed"); | |||||
return FAILED; | |||||
Status ret = DumpOutput(op_iter, task); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Dump output failed"); | |||||
return ret; | |||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
continue; | continue; | ||||
} | } | ||||
if (dump_properties_.GetDumpMode() == kDumpInput) { | if (dump_properties_.GetDumpMode() == kDumpInput) { | ||||
if (op_iter.is_task) { | if (op_iter.is_task) { | ||||
if (DumpInput(op_iter, task) != SUCCESS) { | |||||
GELOGE(FAILED, "Dump input failed"); | |||||
return FAILED; | |||||
Status ret = DumpInput(op_iter, task); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Dump input failed"); | |||||
return ret; | |||||
} | } | ||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
@@ -608,14 +610,14 @@ Status DataDumper::LoadDumpInfo() { | |||||
if (dump_properties_.GetDumpMode() == kDumpAll) { | if (dump_properties_.GetDumpMode() == kDumpAll) { | ||||
auto ret = DumpOutput(op_iter, task); | auto ret = DumpOutput(op_iter, task); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Dump output failed when in dumping all"); | |||||
return FAILED; | |||||
GELOGE(ret, "Dump output failed when in dumping all"); | |||||
return ret; | |||||
} | } | ||||
if (op_iter.is_task) { | if (op_iter.is_task) { | ||||
ret = DumpInput(op_iter, task); | ret = DumpInput(op_iter, task); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Dump input failed when in dumping all"); | |||||
return FAILED; | |||||
GELOGE(ret, "Dump input failed when in dumping all"); | |||||
return ret; | |||||
} | } | ||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
@@ -630,8 +632,8 @@ Status DataDumper::LoadDumpInfo() { | |||||
if (!op_list_.empty() || is_op_debug_) { | if (!op_list_.empty() || is_op_debug_) { | ||||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | auto ret = ExecuteLoadDumpInfo(op_mapping_info); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Execute load dump info failed"); | |||||
return FAILED; | |||||
GELOGE(ret, "Execute load dump info failed"); | |||||
return ret; | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -702,8 +704,8 @@ Status DataDumper::UnloadDumpInfo() { | |||||
} | } | ||||
auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); | auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Execute unload dump info failed"); | |||||
return FAILED; | |||||
GELOGE(ret, "Execute unload dump info failed"); | |||||
return ret; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -28,14 +28,15 @@ | |||||
#include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
#include "common/helper/om_file_helper.h" | #include "common/helper/om_file_helper.h" | ||||
#include "common/opskernel/ge_task_info.h" | #include "common/opskernel/ge_task_info.h" | ||||
#include "common/types.h" | |||||
#include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
#include "common/types.h" | |||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/aipp_utils.h" | |||||
#include "graph/load/new_model_manager/data_dumper.h" | #include "graph/load/new_model_manager/data_dumper.h" | ||||
#include "graph/load/new_model_manager/data_inputer.h" | #include "graph/load/new_model_manager/data_inputer.h" | ||||
#include "graph/load/new_model_manager/model_utils.h" | #include "graph/load/new_model_manager/model_utils.h" | ||||
#include "graph/load/new_model_manager/aipp_utils.h" | |||||
#include "graph/load/new_model_manager/zero_copy_offset.h" | |||||
#include "graph/load/new_model_manager/zero_copy_task.h" | #include "graph/load/new_model_manager/zero_copy_task.h" | ||||
#include "graph/model.h" | #include "graph/model.h" | ||||
#include "graph/node.h" | #include "graph/node.h" | ||||
@@ -285,11 +286,20 @@ class DavinciModel { | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @param [out] dynamic_type | |||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info) const; | |||||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const; | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get combined dynamic dims info | |||||
/// @param [out] batch_info | |||||
/// @return None | |||||
/// | |||||
void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const; | |||||
void GetCurShape(std::vector<int64_t> &batch_info); | |||||
void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||||
void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | ||||
@@ -416,7 +426,7 @@ class DavinciModel { | |||||
void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args, | void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args, | ||||
size_t size, size_t offset); | size_t size, size_t offset); | ||||
void SetDynamicSize(const std::vector<uint64_t> &batch_num); | |||||
void SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||||
bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | ||||
@@ -456,6 +466,9 @@ class DavinciModel { | |||||
void *cur_args = static_cast<char *>(args_) + offset; | void *cur_args = static_cast<char *>(args_) + offset; | ||||
return cur_args; | return cur_args; | ||||
} | } | ||||
void SetTotalIOAddrs(vector<void *> &io_addrs) { | |||||
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | |||||
} | |||||
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | ||||
int64_t GetFixedAddrsSize(string tensor_name); | int64_t GetFixedAddrsSize(string tensor_name); | ||||
void *GetCurrentFixedAddr(int64_t offset) const { | void *GetCurrentFixedAddr(int64_t offset) const { | ||||
@@ -474,7 +487,8 @@ class DavinciModel { | |||||
Status MallocKnownArgs(); | Status MallocKnownArgs(); | ||||
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
Status UpdateKnownZeroCopyAddr(vector<void *> &io_addrs, uint32_t args_offset); | |||||
Status UpdateKnownZeroCopyAddr(); | |||||
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||||
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | ||||
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | ||||
@@ -513,22 +527,6 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Save Data address info for ZeroCopy. | |||||
/// @param [in] const std::vector<void *> &outside_addrs | |||||
/// @return None. | |||||
/// | |||||
void SetInputOutsideAddr(const std::vector<void *> &outside_addrs); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Save NetOutput address info for ZeroCopy. | |||||
/// @param [in] const std::vector<void *> &outside_addrs | |||||
/// @return None. | |||||
/// | |||||
void SetOutputOutsideAddr(const std::vector<void *> &outside_addrs); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Copy Check input size and model op size. | /// @brief Copy Check input size and model op size. | ||||
/// @param [in] const int64_t &input_size: input size. | /// @param [in] const int64_t &input_size: input size. | ||||
/// @param [in] const int64_t &op_size: model op size. | /// @param [in] const int64_t &op_size: model op size. | ||||
@@ -564,7 +562,7 @@ class DavinciModel { | |||||
/// @param [in] batch_label: batch label for multi-batch scenes | /// @param [in] batch_label: batch label for multi-batch scenes | ||||
/// @return SUCCESS handle successfully / others handle failed | /// @return SUCCESS handle successfully / others handle failed | ||||
/// | /// | ||||
Status UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> &data_info, bool is_input, | |||||
Status UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | |||||
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label); | const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label); | ||||
Status CopyInputData(const InputData &input_data, bool device_data = false); | Status CopyInputData(const InputData &input_data, bool device_data = false); | ||||
@@ -706,8 +704,7 @@ class DavinciModel { | |||||
/// | /// | ||||
Status BindInputQueue(); | Status BindInputQueue(); | ||||
Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | |||||
std::map<const void *, std::vector<void *>> &outside_addrs); | |||||
Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -816,8 +813,12 @@ class DavinciModel { | |||||
vector<OpDescPtr> variable_op_list_; | vector<OpDescPtr> variable_op_list_; | ||||
std::map<uint32_t, std::pair<int64_t, void *>> input_data_info_; // Virtual address from Data output. | |||||
std::map<uint32_t, std::pair<int64_t, void *>> output_data_info_; // Virtual address from NetOutput input. | |||||
std::map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||||
std::map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||||
std::map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||||
std::map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||||
std::vector<void *> real_virtual_addrs_; | |||||
// output op: save cce op actual needed memory size | // output op: save cce op actual needed memory size | ||||
vector<int64_t> output_memory_size_list_; | vector<int64_t> output_memory_size_list_; | ||||
@@ -849,9 +850,7 @@ class DavinciModel { | |||||
std::mutex outside_addrs_mutex_; | std::mutex outside_addrs_mutex_; | ||||
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | ||||
std::set<const void *> copy_only_addrs_; // Address need copy to original place. | std::set<const void *> copy_only_addrs_; // Address need copy to original place. | ||||
// {node_addr, {addr_in_task_args}} | |||||
std::map<const void *, std::vector<void *>> input_outside_addrs_; // Key is virtual address from Data. | |||||
std::map<const void *, std::vector<void *>> output_outside_addrs_; // Key is virtual address from NetOutput. | |||||
// {op_id, batch_label} | // {op_id, batch_label} | ||||
std::map<int64_t, std::string> zero_copy_op_id_batch_label_; | std::map<int64_t, std::string> zero_copy_op_id_batch_label_; | ||||
// {batch_label, addrs} | // {batch_label, addrs} | ||||
@@ -920,8 +919,13 @@ class DavinciModel { | |||||
int64_t total_fixed_addr_size_ = 0; | int64_t total_fixed_addr_size_ = 0; | ||||
std::map<const void *, void *> knonw_input_data_info_; | std::map<const void *, void *> knonw_input_data_info_; | ||||
std::map<const void *, void *> knonw_output_data_info_; | std::map<const void *, void *> knonw_output_data_info_; | ||||
vector<void *> total_io_addrs_; | |||||
vector<void *> orig_total_io_addrs_; | |||||
bool base_addr_not_changed_ = false; | |||||
vector<vector<int64_t>> batch_info_; | vector<vector<int64_t>> batch_info_; | ||||
std::vector<std::vector<int64_t>> combined_batch_info_; | |||||
int32_t dynamic_type_ = 0; | |||||
vector<uint64_t> batch_size_; | vector<uint64_t> batch_size_; | ||||
// key: input tensor name, generally rts op; | // key: input tensor name, generally rts op; | ||||
@@ -70,11 +70,11 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||||
auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size()); | auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size()); | ||||
rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | ||||
return RT_FAILED;) | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||||
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||||
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr)); | uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr)); | ||||
param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; | param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; | ||||
// In the scene of loading once and running many times, the kernel needs to be destroyed many times, | // In the scene of loading once and running many times, the kernel needs to be destroyed many times, | ||||
@@ -84,64 +84,64 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||||
rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "malloc device memory failed."); | |||||
GELOGE(RT_FAILED, "malloc device memory failed. ret: 0x%X", rt_ret); | |||||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = | rt_ret = | ||||
rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); | rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "memory copy to device failed."); | |||||
GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret); | |||||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | ||||
GE_CHK_RT(rtFree(devicebase)); | GE_CHK_RT(rtFree(devicebase)); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rtStream_t stream = nullptr; | rtStream_t stream = nullptr; | ||||
rt_ret = rtStreamCreate(&stream, 0); | rt_ret = rtStreamCreate(&stream, 0); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "create stream failed."); | |||||
GELOGE(RT_FAILED, "create stream failed. ret: 0x%X", rt_ret); | |||||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | ||||
GE_CHK_RT(rtFree(devicebase)); | GE_CHK_RT(rtFree(devicebase)); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream); | rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "rtKernelLaunchEx failed."); | |||||
GELOGE(RT_FAILED, "rtKernelLaunchEx failed. ret: 0x%X", rt_ret); | |||||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | ||||
GE_CHK_RT(rtFree(devicebase)); | GE_CHK_RT(rtFree(devicebase)); | ||||
GE_CHK_RT(rtStreamDestroy(stream)); | GE_CHK_RT(rtStreamDestroy(stream)); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtStreamSynchronize(stream); | rt_ret = rtStreamSynchronize(stream); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "rtStreamSynchronize failed."); | |||||
GELOGE(RT_FAILED, "rtStreamSynchronize failed. ret: 0x%X", rt_ret); | |||||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | ||||
GE_CHK_RT(rtFree(devicebase)); | GE_CHK_RT(rtFree(devicebase)); | ||||
GE_CHK_RT(rtStreamDestroy(stream)); | GE_CHK_RT(rtStreamDestroy(stream)); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
if (aicpu_kernel_addr != nullptr) { | if (aicpu_kernel_addr != nullptr) { | ||||
rt_ret = rtFree(aicpu_kernel_addr); | rt_ret = rtFree(aicpu_kernel_addr); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "free memory failed."); | |||||
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); | |||||
GE_CHK_RT(rtFree(devicebase)); | GE_CHK_RT(rtFree(devicebase)); | ||||
GE_CHK_RT(rtStreamDestroy(stream)); | GE_CHK_RT(rtStreamDestroy(stream)); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
rt_ret = rtFree(devicebase); | rt_ret = rtFree(devicebase); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "free memory failed."); | |||||
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); | |||||
GE_CHK_RT(rtStreamDestroy(stream)); | GE_CHK_RT(rtStreamDestroy(stream)); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtStreamDestroy(stream); | rt_ret = rtStreamDestroy(stream); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "rtStreamDestroy failed."); | |||||
return FAILED; | |||||
GELOGE(RT_FAILED, "rtStreamDestroy failed. ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -168,8 +168,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||||
std::lock_guard<std::mutex> lock(map_mutex_); | std::lock_guard<std::mutex> lock(map_mutex_); | ||||
auto it = model_map_.find(model_id); | auto it = model_map_.find(model_id); | ||||
if (it == model_map_.end()) { | if (it == model_map_.end()) { | ||||
GELOGE(PARAM_INVALID, "model id %u does not exists.", model_id); | |||||
return PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | |||||
return GE_EXEC_MODEL_ID_INVALID; | |||||
} | } | ||||
uint64_t session_id = it->second->GetSessionId(); | uint64_t session_id = it->second->GetSessionId(); | ||||
GELOGI("Destroy aicpu session for infer, session id is %u.", session_id); | GELOGI("Destroy aicpu session for infer, session id is %u.", session_id); | ||||
@@ -223,10 +223,11 @@ Status ModelManager::SetDevice(int32_t deviceId) const { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) { | |||||
ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, | |||||
int32_t dynamic_type) { | |||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
davinci_model->SetDynamicSize(batch_num); | |||||
davinci_model->SetDynamicSize(batch_num, dynamic_type); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -332,8 +333,8 @@ Status ModelManager::DeleteModel(uint32_t id) { | |||||
} else if (hybrid_model_it != hybrid_model_map_.end()) { | } else if (hybrid_model_it != hybrid_model_map_.end()) { | ||||
(void)hybrid_model_map_.erase(hybrid_model_it); | (void)hybrid_model_map_.erase(hybrid_model_it); | ||||
} else { | } else { | ||||
GELOGE(PARAM_INVALID, "model id %u does not exists.", id); | |||||
return PARAM_INVALID; | |||||
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); | |||||
return GE_EXEC_MODEL_ID_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -386,7 +387,7 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d | |||||
std::shared_ptr<DavinciModel> model = GetModel(model_id); | std::shared_ptr<DavinciModel> model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid Model ID %u in InputData! ", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); | |||||
GE_IF_BOOL_EXEC(model->GetDataInputTid() == 0, model->SetDataInputTid(mmGetTid())); | GE_IF_BOOL_EXEC(model->GetDataInputTid() == 0, model->SetDataInputTid(mmGetTid())); | ||||
@@ -422,7 +423,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||||
for (size_t i = 0; i < inputs.size(); ++i) { | for (size_t i = 0; i < inputs.size(); ++i) { | ||||
DataBuffer data; | DataBuffer data; | ||||
data.data = inputs[i].data; | data.data = inputs[i].data; | ||||
data.length = static_cast<uint32_t>(inputs[i].length); | |||||
data.length = inputs[i].length; | |||||
input_data.blobs.push_back(data); | input_data.blobs.push_back(data); | ||||
} | } | ||||
@@ -442,7 +443,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid Model ID %u in InputData! ", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); | |||||
DataInputer *inputer = model->GetDataInputer(); | DataInputer *inputer = model->GetDataInputer(); | ||||
GE_CHECK_NOTNULL(inputer); | GE_CHECK_NOTNULL(inputer); | ||||
@@ -472,7 +473,7 @@ Status ModelManager::Start(uint32_t model_id) { | |||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to start! ", model_id); | |||||
Status status = davinci_model->ModelRunStart(); | Status status = davinci_model->ModelRunStart(); | ||||
if (status == SUCCESS) { | if (status == SUCCESS) { | ||||
@@ -499,7 +500,7 @@ Status ModelManager::Stop(uint32_t model_id) { | |||||
} | } | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to stop!", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to stop!", model_id); | |||||
Status status = davinci_model->ModelRunStop(); | Status status = davinci_model->ModelRunStop(); | ||||
if (status == SUCCESS) { | if (status == SUCCESS) { | ||||
@@ -661,7 +662,7 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz | |||||
} | } | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid Model ID %u !", | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid model id %u!", | |||||
model_id); | model_id); | ||||
max_size = davinci_model->TotalMemSize(); | max_size = davinci_model->TotalMemSize(); | ||||
@@ -671,8 +672,8 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz | |||||
Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | ||||
vector<InputOutputDescInfo> &output_desc) { | vector<InputOutputDescInfo> &output_desc) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||||
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", | |||||
model_id); | |||||
return davinci_model->GetInputOutputDescInfo(input_desc, output_desc); | return davinci_model->GetInputOutputDescInfo(input_desc, output_desc); | ||||
} | } | ||||
@@ -682,8 +683,8 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | ||||
bool new_model_desc) { | bool new_model_desc) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||||
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | |||||
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | |||||
davinci_model->SetModelDescVersion(new_model_desc); | davinci_model->SetModelDescVersion(new_model_desc); | ||||
@@ -697,18 +698,35 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||||
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||||
int32_t &dynamic_type) { | |||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | |||||
"GetDynamicBatchInfo failed, Invalid model id %u!", model_id); | |||||
return davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); | |||||
} | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get combined dynamic dims info | |||||
/// @param [in] model_id | |||||
/// @param [out] batch_info | |||||
/// @return execute result | |||||
/// | |||||
Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vector<int64_t>> &batch_info) { | |||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetDynamicBatchInfo Failed, Invalid Model ID %u !", | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetCombinedDynamicDims Failed, Invalid Model ID %u!", | |||||
model_id); | model_id); | ||||
return davinci_model->GetDynamicBatchInfo(batch_info); | |||||
davinci_model->GetCombinedDynamicDims(batch_info); | |||||
return SUCCESS; | |||||
} | } | ||||
Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||||
Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
davinci_model->GetCurShape(batch_info); | |||||
davinci_model->GetCurShape(batch_info, dynamic_type); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -724,8 +742,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||||
std::vector<uint32_t> &inputFormats, | std::vector<uint32_t> &inputFormats, | ||||
std::vector<uint32_t> &outputFormats) { | std::vector<uint32_t> &outputFormats) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||||
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", | |||||
model_id); | |||||
return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | ||||
} | } | ||||
@@ -767,8 +785,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { | |||||
Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | ||||
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | ||||
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, | |||||
"input key file path is not valid, %s", strerror(errno)); | |||||
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, GE_EXEC_MODEL_KEY_PATH_INVALID, | |||||
"input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); | |||||
GenModelId(&model_id); | GenModelId(&model_id); | ||||
shared_ptr<DavinciModel> davinci_model = nullptr; | shared_ptr<DavinciModel> davinci_model = nullptr; | ||||
@@ -786,11 +804,11 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
try { | try { | ||||
davinci_model = std::make_shared<DavinciModel>(model.priority, listener); | davinci_model = std::make_shared<DavinciModel>(model.priority, listener); | ||||
} catch (std::bad_alloc &) { | } catch (std::bad_alloc &) { | ||||
GELOGE(FAILED, "Make shared failed"); | |||||
return FAILED; | |||||
GELOGE(MEMALLOC_FAILED, "Make shared failed"); | |||||
return MEMALLOC_FAILED; | |||||
} catch (...) { | } catch (...) { | ||||
GELOGE(FAILED, "Make shared failed since other exception raise"); | |||||
return FAILED; | |||||
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
ret = davinci_model->Assign(ge_model); | ret = davinci_model->Assign(ge_model); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -803,7 +821,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
rtError_t rt_ret = rtGetDevice(&device_id); | rtError_t rt_ret = rtGetDevice(&device_id); | ||||
if (rt_ret != RT_ERROR_NONE || device_id < 0) { | if (rt_ret != RT_ERROR_NONE || device_id < 0) { | ||||
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
davinci_model->SetDeviceId(device_id); | davinci_model->SetDeviceId(device_id); | ||||
davinci_model->SetOmName(model.om_name); | davinci_model->SetOmName(model.om_name); | ||||
@@ -851,8 +869,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | ||||
const std::vector<uint32_t> &input_queue_ids, | const std::vector<uint32_t> &input_queue_ids, | ||||
const std::vector<uint32_t> &output_queue_ids) { | const std::vector<uint32_t> &output_queue_ids) { | ||||
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, PARAM_INVALID, | |||||
"input key file path is not valid, %s", strerror(errno)); | |||||
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, | |||||
GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s", | |||||
model_data.key.c_str(), strerror(errno)); | |||||
ModelHelper model_helper; | ModelHelper model_helper; | ||||
Status ret = model_helper.LoadModel(model_data); | Status ret = model_helper.LoadModel(model_data); | ||||
@@ -863,8 +882,8 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||||
shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(model_data.priority, nullptr); | shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(model_data.priority, nullptr); | ||||
if (davinci_model == nullptr) { | if (davinci_model == nullptr) { | ||||
GELOGE(FAILED, "create model failed."); | |||||
return FAILED; | |||||
GELOGE(MEMALLOC_FAILED, "create model failed."); | |||||
return MEMALLOC_FAILED; | |||||
} | } | ||||
ret = davinci_model->Assign(model_helper.GetGeModel()); | ret = davinci_model->Assign(model_helper.GetGeModel()); | ||||
@@ -916,7 +935,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||||
Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | ||||
OutputData &output_data) { | OutputData &output_data) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); | |||||
if (davinci_model->NeedDestroyAicpuKernel()) { | if (davinci_model->NeedDestroyAicpuKernel()) { | ||||
GELOGI("Start to destroy specified aicpu kernel."); | GELOGI("Start to destroy specified aicpu kernel."); | ||||
@@ -973,29 +992,30 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me | |||||
auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data); | auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data); | ||||
if (partition_table->num == 1) { | if (partition_table->num == 1) { | ||||
GELOGE(FAILED, "om model is error,please use executable om model"); | |||||
return FAILED; | |||||
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model"); | |||||
return GE_EXEC_MODEL_PARTITION_NUM_INVALID; | |||||
} | } | ||||
ModelPartition task_partition; | ModelPartition task_partition; | ||||
if (om_file_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { | if (om_file_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { | ||||
GELOGE(FAILED, "get task model partition failed."); | |||||
return FAILED; | |||||
GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "get task model partition failed."); | |||||
return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||||
} | } | ||||
std::shared_ptr<domi::ModelTaskDef> model_task_def = MakeShared<domi::ModelTaskDef>(); | std::shared_ptr<domi::ModelTaskDef> model_task_def = MakeShared<domi::ModelTaskDef>(); | ||||
if (model_task_def == nullptr) { | if (model_task_def == nullptr) { | ||||
return FAILED; | |||||
return MEMALLOC_FAILED; | |||||
} | } | ||||
if (task_partition.size != 0) { | if (task_partition.size != 0) { | ||||
if (!ReadProtoFromArray(task_partition.data, static_cast<int>(task_partition.size), model_task_def.get())) { | if (!ReadProtoFromArray(task_partition.data, static_cast<int>(task_partition.size), model_task_def.get())) { | ||||
GELOGE(FAILED, "ReadProtoFromArray failed."); | |||||
return FAILED; | |||||
GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "ReadProtoFromArray failed."); | |||||
return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||||
} | } | ||||
} | } | ||||
ModelPartition partition_weight; | ModelPartition partition_weight; | ||||
ret = om_file_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition_weight); | ret = om_file_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition_weight); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Get weight partition failed. ret = %u", ret); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, | |||||
"Get weight partition failed. ret = %u", ret); | |||||
mem_size = model_task_def->memory_size(); | mem_size = model_task_def->memory_size(); | ||||
weight_size = partition_weight.size; | weight_size = partition_weight.size; | ||||
@@ -187,9 +187,19 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
/// @brief Get dynamic batch_info | /// @brief Get dynamic batch_info | ||||
/// @param [in] model_id | /// @param [in] model_id | ||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @param [out] dynamic_type | |||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||||
ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||||
int32_t &dynamic_type); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get combined dynamic dims info | |||||
/// @param [in] model_id | |||||
/// @param [out] batch_info | |||||
/// @return execute result | |||||
/// | |||||
ge::Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -215,13 +225,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
std::vector<uint32_t> &inputFormats, | std::vector<uint32_t> &inputFormats, | ||||
std::vector<uint32_t> &outputFormats); | std::vector<uint32_t> &outputFormats); | ||||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||||
ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | ||||
ge::Status SetDevice(int32_t deviceId) const; | ge::Status SetDevice(int32_t deviceId) const; | ||||
ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num); | |||||
ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||||
/// | /// | ||||
/// @ingroup domi_ome | /// @ingroup domi_ome | ||||
@@ -56,6 +56,7 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | |||||
if (tensor_size) { | if (tensor_size) { | ||||
v_input_size.push_back(tensor_size); | v_input_size.push_back(tensor_size); | ||||
} | } | ||||
GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); | |||||
continue; | continue; | ||||
} | } | ||||
@@ -64,6 +65,8 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | |||||
GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); | GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); | ||||
continue); | continue); | ||||
GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); | |||||
v_input_size.push_back(tensor_size); | v_input_size.push_back(tensor_size); | ||||
} | } | ||||
@@ -34,7 +34,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id()); | GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id()); | ||||
return FAILED; | |||||
return ret; | |||||
} | } | ||||
model_ = davinci_model->GetRtModelHandle(); | model_ = davinci_model->GetRtModelHandle(); | ||||
@@ -53,14 +53,14 @@ Status EndGraphTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtEndGraphEx failed, ret: 0x%x", rt_ret); | GELOGE(RT_FAILED, "Call rtEndGraphEx failed, ret: 0x%x", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} else { | } else { | ||||
GELOGI("Start to call rtEndGraph"); | GELOGI("Start to call rtEndGraph"); | ||||
rtError_t rt_ret = rtEndGraph(model_, stream_); | rtError_t rt_ret = rtEndGraph(model_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtEndGraph failed, ret: 0x%x", rt_ret); | GELOGE(RT_FAILED, "Call rtEndGraph failed, ret: 0x%x", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
@@ -69,7 +69,7 @@ Status EndGraphTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); | rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
task_id_ = task_id; | task_id_ = task_id; | ||||
stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
@@ -49,7 +49,7 @@ Status EventRecordTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtEventRecord(event_, stream_); | rtError_t rt_ret = rtEventRecord(event_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -51,13 +51,13 @@ Status EventWaitTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtStreamWaitEvent(stream_, event_); | rtError_t rt_ret = rtStreamWaitEvent(stream_, event_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtEventReset(event_, stream_); | rt_ret = rtEventReset(event_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -40,7 +40,7 @@ Status FusionStartTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtKernelFusionStart(stream_); | rtError_t rt_ret = rtKernelFusionStart(stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("FusionStartTaskInfo Distribute Success."); | GELOGI("FusionStartTaskInfo Distribute Success."); | ||||
@@ -40,7 +40,7 @@ Status FusionStopTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtKernelFusionEnd(stream_); | rtError_t rt_ret = rtKernelFusionEnd(stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("FusionStopTaskInfo Distribute Success."); | GELOGI("FusionStopTaskInfo Distribute Success."); | ||||
@@ -73,24 +73,24 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m | |||||
// Only in Horovod scenario should get the inputName and GeShape | // Only in Horovod scenario should get the inputName and GeShape | ||||
ret = HcomOmeUtil::GetHorovodInputs(op_desc_, kernel_hccl_infos_); | ret = HcomOmeUtil::GetHorovodInputs(op_desc_, kernel_hccl_infos_); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); | |||||
return FAILED; | |||||
GELOGE(ret, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); | |||||
return ret; | |||||
} | } | ||||
Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc_, kernel_hccl_infos_); | Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc_, kernel_hccl_infos_); | ||||
if (dmrt != SUCCESS) { | if (dmrt != SUCCESS) { | ||||
GELOGE(FAILED, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); | |||||
return FAILED; | |||||
GELOGE(dmrt, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); | |||||
return dmrt; | |||||
} | } | ||||
dmrt = HcomOmeUtil::GetHcclCount(op_desc_, kernel_hccl_infos_); | dmrt = HcomOmeUtil::GetHcclCount(op_desc_, kernel_hccl_infos_); | ||||
if (dmrt != SUCCESS) { | if (dmrt != SUCCESS) { | ||||
GELOGE(FAILED, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); | |||||
return FAILED; | |||||
GELOGE(dmrt, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); | |||||
return dmrt; | |||||
} | } | ||||
// Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId | // Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId | ||||
dmrt = HcomOmeUtil::GetAllRootId(op_desc_, kernel_hccl_infos_); | dmrt = HcomOmeUtil::GetAllRootId(op_desc_, kernel_hccl_infos_); | ||||
if (dmrt != SUCCESS) { | if (dmrt != SUCCESS) { | ||||
GELOGE(FAILED, "davinci_model: Get rootId fail! domi error: %u", dmrt); | |||||
return FAILED; | |||||
GELOGE(dmrt, "davinci_model: Get rootId fail! domi error: %u", dmrt); | |||||
return dmrt; | |||||
} | } | ||||
// GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl | // GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl | ||||
@@ -138,8 +138,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM | |||||
uint32_t max_task_count; | uint32_t max_task_count; | ||||
ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_count, &max_task_count); | ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_count, &max_task_count); | ||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
GELOGE(FAILED, "Get max stream and task count by rts failed."); | |||||
return FAILED; | |||||
GELOGE(RT_FAILED, "Get max stream and task count by rts failed."); | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
max_node_of_hccl_stream_ = max_task_count / kMaxTaskOfStream; | max_node_of_hccl_stream_ = max_task_count / kMaxTaskOfStream; | ||||
} | } | ||||
@@ -153,8 +153,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM | |||||
ReuseStream(created_stream_num, davinci_model); | ReuseStream(created_stream_num, davinci_model); | ||||
ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model); | ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Create hccl stream failed."); | |||||
return FAILED; | |||||
GELOGE(RT_FAILED, "Create hccl stream failed."); | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
} | } | ||||
GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num); | GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num); | ||||
@@ -179,14 +179,14 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode | |||||
rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); | rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
// Create slave stream, inactive by default, activated by hccl | // Create slave stream, inactive by default, activated by hccl | ||||
rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM); | rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
(void)rtStreamDestroy(stream); | (void)rtStreamDestroy(stream); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGD("hccl_stream addr is=%p", stream); | GELOGD("hccl_stream addr is=%p", stream); | ||||
int64_t remain_cap = max_node_of_hccl_stream_ - 1; | int64_t remain_cap = max_node_of_hccl_stream_ - 1; | ||||
@@ -250,8 +250,7 @@ Status HcclTaskInfo::UpdateArgs() { | |||||
io_addrs.insert(io_addrs.end(), output_data_addrs_.begin(), output_data_addrs_.end()); | io_addrs.insert(io_addrs.end(), output_data_addrs_.begin(), output_data_addrs_.end()); | ||||
io_addrs.insert(io_addrs.end(), workspace_data_addrs_.begin(), workspace_data_addrs_.end()); | io_addrs.insert(io_addrs.end(), workspace_data_addrs_.begin(), workspace_data_addrs_.end()); | ||||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
GELOGI("HcclTaskInfo::UpdateArgs success."); | GELOGI("HcclTaskInfo::UpdateArgs success."); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -72,11 +72,11 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | ||||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
return FAILED;) | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | ||||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
return FAILED;) | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
} | } | ||||
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(), | GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(), | ||||
@@ -113,7 +113,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_output_addr)); | static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_output_addr)); | ||||
void *workspace_base_addr = nullptr; | void *workspace_base_addr = nullptr; | ||||
rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error, ret: Ox%X", rt_ret); return FAILED;); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||||
rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), | rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), | ||||
kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); | kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = | fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = | ||||
@@ -123,20 +124,23 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_); | fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_); | ||||
rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM); | rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy(kernel_buf_, kernel_buf_size_, static_cast<void *>(&fwk_op_kernel), kernel_buf_size_, | rt_ret = rtMemcpy(kernel_buf_, kernel_buf_size_, static_cast<void *>(&fwk_op_kernel), kernel_buf_size_, | ||||
RT_MEMCPY_HOST_TO_DEVICE); | RT_MEMCPY_HOST_TO_DEVICE); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
GELOGI("KernelExTaskInfo knonw node Init Success."); | GELOGI("KernelExTaskInfo knonw node Init Success."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
// 3. Set workspaceaddr, inputOutputDataAddr | // 3. Set workspaceaddr, inputOutputDataAddr | ||||
if (CopyTaskInfo(kernel_ex_def, rts_param, op_desc) != SUCCESS) { | |||||
GELOGE(FAILED, "copy task info to workspace failed."); | |||||
return FAILED; | |||||
Status ge_ret = CopyTaskInfo(kernel_ex_def, rts_param, op_desc); | |||||
if (ge_ret != SUCCESS) { | |||||
GELOGE(ge_ret, "copy task info to workspace failed."); | |||||
return ge_ret; | |||||
} | } | ||||
const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc); | const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc); | ||||
@@ -155,11 +159,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
auto addrs_size = sizeof(uint64_t) * (io_addrs.size()); | auto addrs_size = sizeof(uint64_t) * (io_addrs.size()); | ||||
if (addrs_size > 0) { | if (addrs_size > 0) { | ||||
rtError_t rt_ret = rtMalloc(&input_output_addr_, addrs_size, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&input_output_addr_, addrs_size, RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); return RT_FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy(input_output_addr_, addrs_size, io_addrs.data(), addrs_size, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(input_output_addr_, addrs_size, io_addrs.data(), addrs_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||||
return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | ||||
op_desc->GetName())) { | op_desc->GetName())) { | ||||
@@ -177,11 +182,13 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
// 4. Return result | // 4. Return result | ||||
rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy(kernel_buf_, sizeof(STR_FWK_OP_KERNEL), static_cast<void *>(&fwk_op_kernel), | rt_ret = rtMemcpy(kernel_buf_, sizeof(STR_FWK_OP_KERNEL), static_cast<void *>(&fwk_op_kernel), | ||||
sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); | sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0); | davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0); | ||||
@@ -254,9 +261,7 @@ Status KernelExTaskInfo::UpdateArgs() { | |||||
} | } | ||||
} | } | ||||
} | } | ||||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
GELOGI("KernelExTaskInfo::UpdateArgs success."); | GELOGI("KernelExTaskInfo::UpdateArgs success."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -286,8 +291,8 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const | |||||
rtError_t rt_ret = rtMemcpy(workspace_data_addrs[0], kernel_def.task_info_size(), kernel_def.task_info().data(), | rtError_t rt_ret = rtMemcpy(workspace_data_addrs[0], kernel_def.task_info_size(), kernel_def.task_info().data(), | ||||
kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); | kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(FAILED, "rtMemcpy error: 0x%X", rt_ret); | |||||
return FAILED; | |||||
GELOGE(RT_FAILED, "rtMemcpy error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -298,7 +303,7 @@ Status KernelExTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
if (davinci_model_ == nullptr) { | if (davinci_model_ == nullptr) { | ||||
@@ -311,7 +316,7 @@ Status KernelExTaskInfo::Distribute() { | |||||
rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); | rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
task_id_ = task_id; | task_id_ = task_id; | ||||
stream_id_ = stream_id; | stream_id_ = stream_id; | ||||
@@ -326,7 +331,7 @@ Status KernelExTaskInfo::Release() { | |||||
rtError_t rt_ret = rtFree(kernel_buf_); | rtError_t rt_ret = rtFree(kernel_buf_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGW("rtFree error, ret: 0x%X", rt_ret); | GELOGW("rtFree error, ret: 0x%X", rt_ret); | ||||
ret = FAILED; | |||||
ret = RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} else { | } else { | ||||
kernel_buf_ = nullptr; | kernel_buf_ = nullptr; | ||||
} | } | ||||
@@ -335,7 +340,7 @@ Status KernelExTaskInfo::Release() { | |||||
rtError_t rt_ret = rtFree(input_output_addr_); | rtError_t rt_ret = rtFree(input_output_addr_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGW("rtFree error, ret: 0x%X", rt_ret); | GELOGW("rtFree error, ret: 0x%X", rt_ret); | ||||
ret = FAILED; | |||||
ret = RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} else { | } else { | ||||
input_output_addr_ = nullptr; | input_output_addr_ = nullptr; | ||||
} | } | ||||
@@ -344,7 +349,7 @@ Status KernelExTaskInfo::Release() { | |||||
rtError_t rt_ret = rtFree(ext_info_addr_); | rtError_t rt_ret = rtFree(ext_info_addr_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret); | GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret); | ||||
ret = FAILED; | |||||
ret = RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} else { | } else { | ||||
ext_info_addr_ = nullptr; | ext_info_addr_ = nullptr; | ||||
} | } | ||||
@@ -99,13 +99,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); | rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", | ||||
kernel_def.stub_func().c_str()); | kernel_def.stub_func().c_str()); | ||||
return RT_FAILED;); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||||
} else if (kernel_type_ != cce::ccKernelType::AI_CPU) { | } else if (kernel_type_ != cce::ccKernelType::AI_CPU) { | ||||
rtError_t rt_ret; | rtError_t rt_ret; | ||||
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); | rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | ||||
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); | GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); | ||||
return RT_FAILED;); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||||
} | } | ||||
if (context.origin_op_index_size() > CC_FUSION_OP_MAX) { | if (context.origin_op_index_size() > CC_FUSION_OP_MAX) { | ||||
@@ -232,7 +232,7 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||||
skt_info_.last_dump_flag); | skt_info_.last_dump_flag); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
call_save_dump_ = true; | call_save_dump_ = true; | ||||
GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); | GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); | ||||
@@ -241,21 +241,24 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||||
// Create super kernel factory | // Create super kernel factory | ||||
skt::SuperKernelFactory *factory = &skt::SuperKernelFactory::GetInstance(); | skt::SuperKernelFactory *factory = &skt::SuperKernelFactory::GetInstance(); | ||||
// Init super kernel factory | // Init super kernel factory | ||||
if (factory->Init() != SUCCESS) { | |||||
GELOGE(RT_FAILED, "SuperKernelLaunch: SuperKernelFactory init failed"); | |||||
return RT_FAILED; | |||||
Status ge_ret = factory->Init(); | |||||
if (ge_ret != SUCCESS) { | |||||
GELOGE(ge_ret, "SuperKernelLaunch: SuperKernelFactory init failed"); | |||||
return ge_ret; | |||||
} | } | ||||
// Call the fuse API | // Call the fuse API | ||||
std::unique_ptr<skt::SuperKernel> superKernel = nullptr; | std::unique_ptr<skt::SuperKernel> superKernel = nullptr; | ||||
if (factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel) != SUCCESS) { | |||||
GELOGE(RT_FAILED, "SuperKernelLaunch: fuse call failed"); | |||||
return RT_FAILED; | |||||
ge_ret = factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel); | |||||
if (ge_ret != SUCCESS) { | |||||
GELOGE(ge_ret, "SuperKernelLaunch: fuse call failed"); | |||||
return ge_ret; | |||||
} | } | ||||
// Launch a super kernel | // Launch a super kernel | ||||
skt_dump_flag_ = GetDumpFlag(); | skt_dump_flag_ = GetDumpFlag(); | ||||
if (superKernel->Launch(skt_info_.last_stream, skt_dump_flag_) != SUCCESS) { | |||||
GELOGE(RT_FAILED, "SuperKernelLaunch: launch failed"); | |||||
return RT_FAILED; | |||||
ge_ret = superKernel->Launch(skt_info_.last_stream, skt_dump_flag_); | |||||
if (ge_ret != SUCCESS) { | |||||
GELOGE(ge_ret, "SuperKernelLaunch: launch failed"); | |||||
return ge_ret; | |||||
} | } | ||||
GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(), | GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(), | ||||
skt_arg_list.size()); | skt_arg_list.size()); | ||||
@@ -276,9 +279,9 @@ Status KernelTaskInfo::SaveSuperKernelInfo() { | |||||
skt_info_.last_dump_flag = dump_flag_; | skt_info_.last_dump_flag = dump_flag_; | ||||
skt_info_.dump_flag_list.push_back(dump_flag_); | skt_info_.dump_flag_list.push_back(dump_flag_); | ||||
skt_info_.op_desc_list.push_back(op_desc_); | skt_info_.op_desc_list.push_back(op_desc_); | ||||
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(dump_args_)); | |||||
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(skt_dump_args_)); | |||||
skt_info_.last_group_key = group_key_; | skt_info_.last_group_key = group_key_; | ||||
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(dump_args_); | |||||
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(skt_dump_args_); | |||||
skt_info_.last_op = op_desc_; | skt_info_.last_op = op_desc_; | ||||
// last node in a stream, just launch | // last node in a stream, just launch | ||||
if (IsMarkedLastNode()) { | if (IsMarkedLastNode()) { | ||||
@@ -345,15 +348,15 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||||
// 1.launch before | // 1.launch before | ||||
ret = SuperKernelLaunch(); | ret = SuperKernelLaunch(); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Call SuperKernelLaunch failed!"); | |||||
return FAILED; | |||||
GELOGE(ret, "Call SuperKernelLaunch failed!"); | |||||
return ret; | |||||
} | } | ||||
// 2.launch current | // 2.launch current | ||||
rtError_t rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, | rtError_t rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, | ||||
static_cast<rtSmDesc_t *>(sm_desc_), stream_, dump_flag_); | static_cast<rtSmDesc_t *>(sm_desc_), stream_, dump_flag_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return FAILED; | |||||
return rt_ret; | |||||
} | } | ||||
call_save_dump_ = true; | call_save_dump_ = true; | ||||
UpdateTaskId(); | UpdateTaskId(); | ||||
@@ -361,8 +364,8 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||||
} else { | } else { | ||||
ret = SaveSuperKernelInfo(); | ret = SaveSuperKernelInfo(); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Call SuperKernelLaunch failed!"); | |||||
return FAILED; | |||||
GELOGE(ret, "Call SuperKernelLaunch failed!"); | |||||
return ret; | |||||
} | } | ||||
GELOGI("Save Current task [block_dim:%u, size:%zu].", block_dim_, skt_info_.kernel_list.size()); | GELOGI("Save Current task [block_dim:%u, size:%zu].", block_dim_, skt_info_.kernel_list.size()); | ||||
} | } | ||||
@@ -403,7 +406,7 @@ Status KernelTaskInfo::Distribute() { | |||||
} | } | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
// set for task_id_ | // set for task_id_ | ||||
UpdateTaskId(); | UpdateTaskId(); | ||||
@@ -448,9 +451,7 @@ Status KernelTaskInfo::UpdateArgs() { | |||||
} | } | ||||
} | } | ||||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
GELOGI("KernelTaskInfo::UpdateArgs success."); | GELOGI("KernelTaskInfo::UpdateArgs success."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -459,26 +460,31 @@ Status KernelTaskInfo::Release() { | |||||
if (davinci_model_ != nullptr && davinci_model_->IsKnownNode()) { | if (davinci_model_ != nullptr && davinci_model_->IsKnownNode()) { | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
FreeRtMem(&args_); | |||||
FreeRtMem(&superkernel_device_args_addr_); | |||||
FreeRtMem(&superkernel_dev_nav_table_); | |||||
FreeRtMem(&flowtable_); | |||||
FreeRtMem(&custom_info_.input_descs); | |||||
FreeRtMem(&custom_info_.input_addrs); | |||||
FreeRtMem(&custom_info_.output_descs); | |||||
FreeRtMem(&custom_info_.output_addrs); | |||||
FreeRtMem(&custom_info_.attr_handle); | |||||
FreeRtMem(&aicpu_ext_info_addr_); | |||||
rtContext_t ctx = nullptr; | |||||
rtError_t ret = rtCtxGetCurrent(&ctx); | |||||
if (ret == RT_ERROR_NONE) { | |||||
FreeRtMem(&args_); | |||||
FreeRtMem(&superkernel_device_args_addr_); | |||||
FreeRtMem(&superkernel_dev_nav_table_); | |||||
FreeRtMem(&flowtable_); | |||||
FreeRtMem(&custom_info_.input_descs); | |||||
FreeRtMem(&custom_info_.input_addrs); | |||||
FreeRtMem(&custom_info_.output_descs); | |||||
FreeRtMem(&custom_info_.output_addrs); | |||||
FreeRtMem(&custom_info_.attr_handle); | |||||
FreeRtMem(&aicpu_ext_info_addr_); | |||||
} | |||||
if (ctx_.argsOffset != nullptr) { | if (ctx_.argsOffset != nullptr) { | ||||
delete[] ctx_.argsOffset; | delete[] ctx_.argsOffset; | ||||
ctx_.argsOffset = nullptr; | ctx_.argsOffset = nullptr; | ||||
} | } | ||||
rtError_t ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE; | |||||
ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE; | |||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", static_cast<int>(ret)); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", static_cast<int>(ret)); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
sm_desc_ = nullptr; | sm_desc_ = nullptr; | ||||
@@ -508,13 +514,13 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
rtError_t rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); | rtError_t rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -591,14 +597,14 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
// copy orign args | // copy orign args | ||||
rt_ret = rtMemcpy(args_, args_size_, kernel_def.args().data(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(args_, args_size_, kernel_def.args().data(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
vector<uint8_t> args_info(args_size_); | vector<uint8_t> args_info(args_size_); | ||||
errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); | errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); | ||||
@@ -617,7 +623,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
kAddrLen * tensor_device_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE); | kAddrLen * tensor_device_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), | sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), | ||||
kAddrLen * tensor_device_addrs.size()); | kAddrLen * tensor_device_addrs.size()); | ||||
@@ -625,16 +631,17 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
skt_dump_args_ = static_cast<char *>(args_) + offset; | |||||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | ||||
op_desc->GetName())) { | op_desc->GetName())) { | ||||
dump_flag_ = RT_KERNEL_DUMPFLAG; | dump_flag_ = RT_KERNEL_DUMPFLAG; | ||||
dump_args_ = static_cast<char *>(args_) + offset; | dump_args_ = static_cast<char *>(args_) + offset; | ||||
} | } | ||||
Status ge_ret = UpdateL2Data(kernel_def); | |||||
// update origin l2 data | // update origin l2 data | ||||
if (UpdateL2Data(kernel_def) != SUCCESS) { | |||||
return RT_FAILED; | |||||
if (ge_ret != SUCCESS) { | |||||
return ge_ret; | |||||
} | } | ||||
vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | ||||
@@ -698,13 +705,13 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||||
rtError_t rt_ret = rtMalloc(&custom_info_.attr_handle, op_attr_size, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&custom_info_.attr_handle, op_attr_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMemcpy(custom_info_.attr_handle, op_attr_size, buffer.GetData(), op_attr_size, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(custom_info_.attr_handle, op_attr_size, buffer.GetData(), op_attr_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
// args | // args | ||||
@@ -731,14 +738,14 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||||
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = | rt_ret = | ||||
rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); | rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
davinci_model_->SetZeroCopyAddr(op_desc, input_data_addrs, input_data_addrs.data(), custom_info_.input_addrs, | davinci_model_->SetZeroCopyAddr(op_desc, input_data_addrs, input_data_addrs.data(), custom_info_.input_addrs, | ||||
@@ -784,7 +791,8 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||||
ctx_.genVariableBaseSize = davinci_model_->TotalVarMemSize(); | ctx_.genVariableBaseSize = davinci_model_->TotalVarMemSize(); | ||||
ctx_.l2ctrlSize = sm_contrl_size; | ctx_.l2ctrlSize = sm_contrl_size; | ||||
if (UpdateCceArgs(sm_desc, flowtable, kernel_def) != SUCCESS) { | |||||
ret = UpdateCceArgs(sm_desc, flowtable, kernel_def); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "update cce args fail"); | GELOGE(ret, "update cce args fail"); | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -800,7 +808,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||||
rtError_t rt_ret = rtMalloc(&args_, kernel_def.args_size(), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&args_, kernel_def.args_size(), RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size()) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size()) | ||||
@@ -808,7 +816,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||||
rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); | rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
// L2 | // L2 | ||||
@@ -816,13 +824,13 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||||
rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); | rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -883,7 +891,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", args_size_) | ||||
@@ -891,7 +899,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | ||||
@@ -912,12 +920,12 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||||
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
return FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -934,7 +942,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||||
rtError_t rt_ret = rtMalloc(&custom_info_.input_descs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&custom_info_.input_descs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
for (std::size_t i = 0; i < input_size; ++i) { | for (std::size_t i = 0; i < input_size; ++i) { | ||||
@@ -942,7 +950,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||||
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); | const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
@@ -950,7 +958,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||||
rt_ret = rtMalloc(&custom_info_.input_addrs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); | rt_ret = rtMalloc(&custom_info_.input_addrs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
if (!input_data_addrs.empty()) { | if (!input_data_addrs.empty()) { | ||||
@@ -958,7 +966,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||||
RT_MEMCPY_HOST_TO_DEVICE); | RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
@@ -966,14 +974,14 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||||
rt_ret = rtMalloc(&custom_info_.output_descs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); | rt_ret = rtMalloc(&custom_info_.output_descs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
for (std::size_t i = 0; i < output_size; ++i) { | for (std::size_t i = 0; i < output_size; ++i) { | ||||
rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.output_descs) + i, sizeof(opTensor_t), | rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.output_descs) + i, sizeof(opTensor_t), | ||||
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); | const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
@@ -981,7 +989,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||||
rt_ret = rtMalloc(&custom_info_.output_addrs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); | rt_ret = rtMalloc(&custom_info_.output_addrs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
if (!output_data_addrs.empty()) { | if (!output_data_addrs.empty()) { | ||||
@@ -989,7 +997,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||||
RT_MEMCPY_HOST_TO_DEVICE); | RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
@@ -1051,8 +1059,8 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl | |||||
Status status = | Status status = | ||||
CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); | CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(FAILED, "Call cce api failed"); | |||||
return FAILED; | |||||
GELOGE(status, "Call cce api failed"); | |||||
return status; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1118,14 +1126,14 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe | |||||
rtError_t rt_ret = rtMalloc(&flowtable_, flowtable.size(), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&flowtable_, flowtable.size(), RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "flowtable refresh of cce scence.", flowtable.size()) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "flowtable refresh of cce scence.", flowtable.size()) | ||||
rt_ret = rtMemcpy(flowtable_, flowtable.size(), flowtable.data(), flowtable.size(), RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(flowtable_, flowtable.size(), flowtable.data(), flowtable.size(), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
// modify flowtable addr in args | // modify flowtable addr in args | ||||
@@ -165,6 +165,7 @@ class KernelTaskInfo : public TaskInfo { | |||||
void *aicpu_ext_info_addr_ = nullptr; | void *aicpu_ext_info_addr_ = nullptr; | ||||
// For super kernel | // For super kernel | ||||
void *skt_dump_args_ = nullptr; | |||||
uint32_t skt_id_; | uint32_t skt_id_; | ||||
std::string stub_func_name_; | std::string stub_func_name_; | ||||
bool is_l1_fusion_enable_; | bool is_l1_fusion_enable_; | ||||
@@ -59,7 +59,7 @@ Status LabelGotoExTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("LabelGotoExTaskInfo Distribute Success."); | GELOGI("LabelGotoExTaskInfo Distribute Success."); | ||||
@@ -59,7 +59,7 @@ Status LabelSetTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtLabelSet(label_, stream_); | rtError_t rt_ret = rtLabelSet(label_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("LabelSetTaskInfo Distribute Success."); | GELOGI("LabelSetTaskInfo Distribute Success."); | ||||
@@ -98,13 +98,13 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||||
rtError_t rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_); | rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("LabelSwitchByIndexTaskInfo Init success, branch max: %u.", branch_max_); | GELOGI("LabelSwitchByIndexTaskInfo Init success, branch max: %u.", branch_max_); | ||||
@@ -19,6 +19,10 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | #include "graph/load/new_model_manager/davinci_model.h" | ||||
namespace { | |||||
const uint32_t kAlignBytes = 64; | |||||
} | |||||
namespace ge { | namespace ge { | ||||
Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
GELOGI("MemcpyAddrAsyncTaskInfo Init Start"); | GELOGI("MemcpyAddrAsyncTaskInfo Init Start"); | ||||
@@ -55,39 +59,40 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel | |||||
// malloc args memory | // malloc args memory | ||||
size_t args_size = sizeof(void *) * io_addrs.size(); | size_t args_size = sizeof(void *) * io_addrs.size(); | ||||
rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); | |||||
rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
args_align_ = reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(args_) / kAlignBytes + 1) * kAlignBytes); | |||||
// copy orign src/dst | // copy orign src/dst | ||||
GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_, args_size, src_, | |||||
static_cast<uint8_t *>(args_) + args_size, dst_, io_addrs.size()); | |||||
rt_ret = rtMemcpy(args_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_align_, args_size, src_, | |||||
static_cast<uint8_t *>(args_align_) + args_size, dst_, io_addrs.size()); | |||||
rt_ret = rtMemcpy(args_align_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api for src failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api for src failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
count_ = memcpy_async.count(); | count_ = memcpy_async.count(); | ||||
kind_ = memcpy_async.kind(); | kind_ = memcpy_async.kind(); | ||||
dst_max_ = memcpy_async.dst_max(); | dst_max_ = memcpy_async.dst_max(); | ||||
GELOGI("InitMemcpyAddrAsyncTaskInfo, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu, args:%p, size:%zu", | GELOGI("InitMemcpyAddrAsyncTaskInfo, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu, args:%p, size:%zu", | ||||
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_, args_size); | |||||
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_align_, args_size); | |||||
davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_, args_size, 0); | |||||
davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_align_, args_size, 0); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status MemcpyAddrAsyncTaskInfo::Distribute() { | Status MemcpyAddrAsyncTaskInfo::Distribute() { | ||||
GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start, dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_); | GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start, dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_); | ||||
rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + sizeof(void *)), | |||||
dst_max_, args_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | |||||
rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_align_) + sizeof(void *)), | |||||
dst_max_, args_align_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -22,7 +22,8 @@ | |||||
namespace ge { | namespace ge { | ||||
class MemcpyAddrAsyncTaskInfo : public TaskInfo { | class MemcpyAddrAsyncTaskInfo : public TaskInfo { | ||||
public: | public: | ||||
MemcpyAddrAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), count_(0), kind_(0) {} | |||||
MemcpyAddrAsyncTaskInfo() | |||||
: dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), args_align_(nullptr), count_(0), kind_(0) {} | |||||
~MemcpyAddrAsyncTaskInfo() override { | ~MemcpyAddrAsyncTaskInfo() override { | ||||
src_ = nullptr; | src_ = nullptr; | ||||
@@ -46,6 +47,7 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo { | |||||
uint64_t dst_max_; | uint64_t dst_max_; | ||||
uint8_t *src_; | uint8_t *src_; | ||||
void *args_; | void *args_; | ||||
void *args_align_; | |||||
uint64_t count_; | uint64_t count_; | ||||
uint32_t kind_; | uint32_t kind_; | ||||
}; | }; | ||||
@@ -68,7 +68,7 @@ Status MemcpyAsyncTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("MemcpyAsyncTaskInfo Distribute Success"); | GELOGI("MemcpyAsyncTaskInfo Distribute Success"); | ||||
@@ -102,8 +102,7 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { | |||||
io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | ||||
io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | ||||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||||
"update memcpyasync in known node zero copy addr failed."); | |||||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -47,7 +47,7 @@ Status ProfilerTraceTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtProfilerTrace(log_id_, notify_, flat_, stream_); | rtError_t rt_ret = rtProfilerTrace(log_id_, notify_, flat_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("ProfilerTraceTaskInfo Distribute Success."); | GELOGI("ProfilerTraceTaskInfo Distribute Success."); | ||||
@@ -74,7 +74,7 @@ Status StreamActiveTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtStreamActive(active_stream_, stream_); | rtError_t rt_ret = rtStreamActive(active_stream_, stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("StreamActiveTaskInfo Distribute Success. activeStreamID:%p.", active_stream_); | GELOGI("StreamActiveTaskInfo Distribute Success. activeStreamID:%p.", active_stream_); | ||||
@@ -104,7 +104,7 @@ Status StreamSwitchTaskInfo::Distribute() { | |||||
rtError_t rt_ret = rtStreamSwitchEx(input_ptr_, cond_, value_ptr_, true_stream_, stream_, data_type_); | rtError_t rt_ret = rtStreamSwitchEx(input_ptr_, cond_, value_ptr_, true_stream_, stream_, data_type_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("StreamSwitchTaskInfo Distribute Success. cond:%d, stream:%p, datatype:%d.", cond_, true_stream_, data_type_); | GELOGI("StreamSwitchTaskInfo Distribute Success. cond:%d, stream:%p, datatype:%d.", cond_, true_stream_, data_type_); | ||||
@@ -22,10 +22,8 @@ | |||||
#include "graph/load/new_model_manager/model_utils.h" | #include "graph/load/new_model_manager/model_utils.h" | ||||
namespace { | namespace { | ||||
const uint32_t kDynamicBtachParamNum = 1; | |||||
const uint32_t kDynamicResolutionParamNum = 2; | |||||
const uint8_t kStreamSwitchnInputNum = 1; | const uint8_t kStreamSwitchnInputNum = 1; | ||||
} // namespace | |||||
} | |||||
namespace ge { | namespace ge { | ||||
Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
@@ -45,10 +43,6 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel * | |||||
// set size_ | // set size_ | ||||
input_size_ = stream_switchn_def.size(); | input_size_ = stream_switchn_def.size(); | ||||
if (input_size_ != kDynamicBtachParamNum && input_size_ != kDynamicResolutionParamNum) { | |||||
GELOGE(FAILED, "The size of dynamic batch or imagesize input is 1 or 2, now it is %u.", input_size_); | |||||
return FAILED; | |||||
} | |||||
// set value_ptr_ | // set value_ptr_ | ||||
auto value = stream_switchn_def.target_value(); | auto value = stream_switchn_def.target_value(); | ||||
@@ -95,7 +89,7 @@ Status StreamSwitchNTaskInfo::Distribute() { | |||||
rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_); | rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | ||||
return RT_FAILED; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("StreamSwitchNTaskInfo Distribute Success. inputSize:%u, elementSize:%d, datatype:%d.", input_size_, | GELOGI("StreamSwitchNTaskInfo Distribute Success. inputSize:%u, elementSize:%d, datatype:%d.", input_size_, | ||||
@@ -26,13 +26,15 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { | |||||
reinterpret_cast<const void *>(reinterpret_cast<uintptr_t>(this->GetNavTableSize()))}; | reinterpret_cast<const void *>(reinterpret_cast<uintptr_t>(this->GetNavTableSize()))}; | ||||
rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failied. error: 0x%X", rt_ret); return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failied. error: 0x%X", rt_ret); return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream, | ||||
dump_flag); | dump_flag); | ||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelLaunchWithFlag failied. error: 0x%X", rt_ret); | |||||
return FAILED;) | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtKernelLaunchWithFlag failied. error: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
} // namespace skt | } // namespace skt | ||||