Author | SHA1 | Message | Date |
---|---|---|---|
|
885af56694 |
!212 fix securec download links due to mistakes made by openeuler community
Merge pull request !212 from yanghaoran/r0.6 |
4 years ago |
|
9f8bdb838e | fix securec download links due to mistakes made by openeuler community | 4 years ago |
|
2cb83c8f4d |
!52 Revert "Op debug feature"
Merge pull request !52 from yanghaoran/r0.6 |
4 years ago |
|
20f86e636b |
Revert "Op debug feature"
This reverts commit
|
4 years ago |
|
efd823cc18 |
!51 runpackage sync C75B050 for r0.6
Merge pull request !51 from HW_KK/r0.6 |
4 years ago |
|
ca11480c34 | runpackage sync C75B050 | 4 years ago |
|
db2ea7a6ff | update RELEASE.md. | 4 years ago |
@@ -1,3 +1,24 @@ | |||
# Release 0.6.0-beta | |||
## Major Features and Improvements | |||
- GE supports function control operators such as If/Case/While/For. | |||
- In a single operator call scenario, GE supports recording the correspondence between operators and tasks for performance commissioning. | |||
- GE supports new operator overflow positioning solution. | |||
## Bugfixes | |||
- Fix the problem that the aclmdlGetCurOutputDims interface failed to query output Dims in dynamic batch scenarios. | |||
- Fix the problem that the operator compilation options (advanced and advanced) cannot be selected. | |||
- Fix the problem that zero copy function cannot be performed in the scene of converging conditional operators after Data operators. | |||
- Fix the problem that the empty graph cannot be handled. | |||
## Thanks to our Contributors | |||
Thanks goes to these wonderful people: | |||
wangcong,weiyang,yanghaorang,xutianchun,shibeiji,zhouchao, tanghuikang, zhoulili, liujunzhu, zhengyuanhua, taoxiangdong | |||
Contributions of any kind are welcome! | |||
# Release 0.5.0-beta | |||
## Major Features and Improvements | |||
@@ -1,7 +1,7 @@ | |||
graphengine_add_pkg(securec | |||
VER 1.1.10 | |||
URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz | |||
MD5 0782dd2351fde6920d31a599b23d8c91 | |||
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz | |||
MD5 193f0ca5246c1dd84920db34d2d8249f | |||
LIBS c_sec | |||
PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 | |||
CMAKE_OPTION " " | |||
@@ -63,5 +63,12 @@ struct HcomOpertion { | |||
int32_t root; | |||
}; | |||
struct HcomRemoteAccessAddrInfo { | |||
uint32_t remotetRankID; | |||
uint64_t remoteAddr; // host embedding table address | |||
uint64_t localAddr; // device HBM address | |||
uint64_t length; // memory Length in Bytes | |||
}; | |||
} // namespace ge | |||
#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_ |
@@ -31,27 +31,37 @@ class ErrorManager { | |||
/// | |||
/// @brief init | |||
/// @param [in] path current so path | |||
/// @param [in] path: current so path | |||
/// @return int 0(success) -1(fail) | |||
/// | |||
int Init(std::string path); | |||
/// | |||
/// @brief Report error message | |||
/// @param [in] errCode error code | |||
/// @param [in] mapArgs parameter map | |||
/// @param [in] error_code: error code | |||
/// @param [in] args_map: parameter map | |||
/// @return int 0(success) -1(fail) | |||
/// | |||
int ReportErrMessage(std::string error_code, const std::map<std::string, std::string> &args_map); | |||
/// | |||
/// @brief output error message | |||
/// @param [in] handle print handle | |||
/// @param [in] handle: print handle | |||
/// @return int 0(success) -1(fail) | |||
/// | |||
int OutputErrMessage(int handle); | |||
/// | |||
/// @brief output message | |||
/// @param [in] handle: print handle | |||
/// @return int 0(success) -1(fail) | |||
/// | |||
int OutputMessage(int handle); | |||
/// | |||
/// @brief Report error message | |||
/// @param [in] vector parameter key, vector parameter value | |||
/// @param [in] key: vector parameter key | |||
/// @param [in] value: vector parameter value | |||
/// | |||
void ATCReportErrMessage(std::string error_code, const std::vector<std::string> &key = {}, | |||
const std::vector<std::string> &value = {}); | |||
@@ -60,7 +70,7 @@ class ErrorManager { | |||
struct ErrorInfo { | |||
std::string error_id; | |||
std::string error_message; | |||
std::vector<std::string> arglist; | |||
std::vector<std::string> arg_list; | |||
}; | |||
ErrorManager() {} | |||
@@ -77,7 +87,8 @@ class ErrorManager { | |||
bool is_init_ = false; | |||
std::map<std::string, ErrorInfo> error_map_; | |||
std::vector<std::string> error_message_evc_; | |||
std::vector<std::string> error_messages_; | |||
std::vector<std::string> warning_messages_; | |||
}; | |||
#endif // ERROR_MANAGER_H_ |
@@ -82,6 +82,8 @@ class PlatformInfoManager { | |||
void ParseVectorCoreMemoryRates(map<string, string> &vectorCoreMemoryRatesMap, PlatformInfo &platformInfoTemp); | |||
void ParseCPUCache(map<string, string> &CPUCacheMap, PlatformInfo &platformInfoTemp); | |||
void ParseVectorCoreintrinsicDtypeMap(map<string, string> &vectorCoreintrinsicDtypeMap, | |||
PlatformInfo &platformInfoTemp); | |||
@@ -73,6 +73,8 @@ typedef struct tagAiCoreSpec { | |||
typedef struct tagAiCoreMemoryRates { | |||
double ddrRate; | |||
double ddrReadRate; | |||
double ddrWriteRate; | |||
double l2Rate; | |||
double l2ReadRate; | |||
double l2WriteRate; | |||
@@ -86,6 +88,7 @@ typedef struct tagAiCoreMemoryRates { | |||
} AiCoreMemoryRates; | |||
typedef struct tagVectorCoreSpec { | |||
double vecFreq; | |||
uint64_t vecCalcSize; | |||
uint64_t smaskBuffer; | |||
uint64_t ubSize; | |||
@@ -94,10 +97,15 @@ typedef struct tagVectorCoreSpec { | |||
uint64_t ubbankNum; | |||
uint64_t ubburstInOneBlock; | |||
uint64_t ubbankGroupNum; | |||
uint64_t vectorRegSize; | |||
uint64_t predicateRegSize; | |||
uint64_t addressRegSize; | |||
} VectorCoreSpec; | |||
typedef struct tagVectorCoreMemoryRates { | |||
double ddrRate; | |||
double ddrReadRate; | |||
double ddrWriteRate; | |||
double l2Rate; | |||
double l2ReadRate; | |||
double l2WriteRate; | |||
@@ -105,6 +113,11 @@ typedef struct tagVectorCoreMemoryRates { | |||
double ubToDdrRate; | |||
} VectorCoreMemoryRates; | |||
typedef struct tagCPUCache { | |||
uint32_t AICPUSyncBySW; | |||
uint32_t TSCPUSyncBySW; | |||
} CPUCache; | |||
typedef struct tagPlatformInfo { | |||
StrInfo strInfo; | |||
SoCInfo socInfo; | |||
@@ -113,6 +126,7 @@ typedef struct tagPlatformInfo { | |||
map<string, vector<string>> aiCoreIntrinsicDtypeMap; | |||
VectorCoreSpec vectorCoreSpec; | |||
VectorCoreMemoryRates vectorCoreMemoryRates; | |||
CPUCache cpucache; | |||
map<string, vector<string>> vectorCoreIntrinsicDtypeMap; | |||
} PlatformInfo; | |||
@@ -46,7 +46,6 @@ const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep"; | |||
const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode"; | |||
const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug"; | |||
const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode"; | |||
const char *const OPTION_EXEC_OP_DEBUG_LEVEL = "ge.exec.opDebugLevel"; | |||
const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild"; | |||
const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath"; | |||
const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses"; | |||
@@ -174,6 +173,9 @@ const char *const kDynamicBatchSize = "ge.dynamicBatchSize"; | |||
// configure whether to use dynamic image size | |||
const char *const kDynamicImageSize = "ge.dynamicImageSize"; | |||
// Configure whether to use dynamic dims | |||
const char *const kDynamicDims = "ge.dynamicDims"; | |||
// Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y, | |||
// example: GA|RL, support configure multiple, split by | | |||
const std::string AUTO_TUNE_MODE = "ge.autoTuneMode"; | |||
@@ -269,6 +271,7 @@ static const char *const INPUT_SHAPE = "input_shape"; | |||
static const char *const OP_NAME_MAP = "op_name_map"; | |||
static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; | |||
static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; | |||
static const char *const DYNAMIC_DIMS = kDynamicDims; | |||
static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); | |||
static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); | |||
static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; | |||
@@ -291,10 +294,11 @@ static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c | |||
// for interface: aclgrphBuildModel | |||
const std::set<std::string> ir_builder_suppported_options = { | |||
INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, DYNAMIC_BATCH_SIZE, | |||
DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, | |||
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES, | |||
LOG_LEVEL}; | |||
INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, | |||
DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, | |||
INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, | |||
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, | |||
INPUT_FP16_NODES, LOG_LEVEL}; | |||
// for interface: aclgrphBuildInitialize | |||
const std::set<std::string> global_options = {CORE_TYPE, | |||
SOC_VERSION, | |||
@@ -343,6 +343,7 @@ class OpReg { | |||
auto x_type = op.GetInputDesc(in_name).GetDataType(); \ | |||
TensorDesc op_output_desc = op.GetOutputDesc(out_name); \ | |||
op_output_desc.SetShape(ge::Shape(x_shape)); \ | |||
op_output_desc.SetOriginShape(ge::Shape(x_shape)); \ | |||
op_output_desc.SetDataType(x_type); \ | |||
return op.UpdateOutputDesc(out_name, op_output_desc); \ | |||
} | |||
@@ -232,7 +232,7 @@ | |||
rtError_t _rt_ret = (expr); \ | |||
if (_rt_ret != RT_ERROR_NONE) { \ | |||
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ | |||
return ge::RT_FAILED; \ | |||
return RT_ERROR_TO_GE_STATUS(_rt_ret); \ | |||
} \ | |||
} while (0); | |||
@@ -280,8 +280,25 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r | |||
// Executor module error code definition | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); | |||
GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist."); | |||
GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PATH_INVALID, 2, "Model file path is invalid."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_KEY_PATH_INVALID, 3, "Key file path of model is invalid."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_ID_INVALID, 4, "Model id is invalid."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_DATA_SIZE_INVALID, 5, "Data size of model is invalid."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_WEIGHT_SIZE_INVALID, 6, "Weight size of model is invalid."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_PARTITION_NUM_INVALID, 7, "Partition number of model is invalid."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_QUEUE_ID_INVALID, 8, "Queue id of model is invalid."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, 9, "Model does not support encryption."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_READ_MODEL_FILE_FAILED, 10, "Failed to read model file."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_REPEATED, 11, "The model is loaded repeatedly."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_MODEL_PARTITION_FAILED, 12, "Failed to load model partition."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, 13, "Failed to load weight partition."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_TASK_PARTITION_FAILED, 14, "Failed to load task partition."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, 15, "Failed to load kernel partition."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, 16, "Failed to allocate feature map memory."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, 17, "Failed to allocate weight memory."); | |||
GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_VAR_MEM_FAILED, 18, "Failed to allocate variable memory."); | |||
GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 19, "GE AIPP is not exist."); | |||
GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 20, "GE Dynamic AIPP is not support to query temporarily."); | |||
// Generator module error code definition | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); | |||
@@ -289,6 +306,8 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, 2, "Graph mana | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph manager build graph failed."); | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed."); | |||
GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed."); | |||
#define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR) | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_ |
@@ -339,6 +339,7 @@ REGISTER_OPTYPE_DECLARE(GETNEXT, "GetNext"); | |||
REGISTER_OPTYPE_DECLARE(INITDATA, "InitData"); | |||
REGISTER_OPTYPE_DECLARE(TRANSSHAPE, "TransShape") | |||
REGISTER_OPTYPE_DECLARE(REFIDENTITY, "RefIdentity"); | |||
REGISTER_OPTYPE_DECLARE(BITCAST, "Bitcast"); | |||
// ANN dedicated operator | |||
REGISTER_OPTYPE_DECLARE(ANN_MEAN, "AnnMean"); | |||
@@ -432,6 +433,8 @@ REGISTER_OPTYPE_DECLARE(HCOMALLREDUCE, "HcomAllReduce"); | |||
REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter"); | |||
REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend"); | |||
REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); | |||
REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); | |||
REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | |||
REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); | |||
REGISTER_OPTYPE_DECLARE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | |||
@@ -559,6 +562,16 @@ enum ModelCheckType { | |||
}; | |||
/// | |||
/// @brief dynamic input type | |||
/// | |||
enum DynamicInputType { | |||
FIXED = 0, // default mode | |||
DYNAMIC_BATCH = 1, | |||
DYNAMIC_IMAGE = 2, | |||
DYNAMIC_DIMS = 3 | |||
}; | |||
/// | |||
/// @brief magic number of the model file | |||
/// | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_MAGIC_NUM; | |||
@@ -26,23 +26,26 @@ | |||
#include "common/ge_types.h" | |||
#include "common/types.h" | |||
#include "graph/tensor.h" | |||
#include "graph/ge_tensor.h" | |||
#include "runtime/base.h" | |||
namespace ge { | |||
class ModelListenerAdapter; | |||
class SingleOp; | |||
class DynamicSingleOp; | |||
struct RunModelData { | |||
uint32_t index; // Data index | |||
uint32_t modelId; | |||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||
uint32_t timestamp; // Data creation time | |||
uint32_t timeout; // Processing timeout | |||
uint64_t request_id = 0; // Request ID | |||
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 | |||
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 | |||
std::vector<DataBuffer> blobs; // All input/output data buffer | |||
uint32_t timestamp; // Data creation time | |||
uint32_t timeout; // Processing timeout | |||
uint64_t request_id = 0; // Request ID | |||
uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 | |||
uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 | |||
uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 | |||
std::vector<uint64_t> dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty | |||
}; | |||
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
@@ -87,16 +90,52 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
/// | |||
ge::Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height, | |||
uint64_t image_width); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Set dynamic dims info | |||
/// @param [in] model_id: model id allocate from manager | |||
/// @param [in] dynamic_input_addr: dynamic input addr created by user | |||
/// @param [in] length: length of dynamic input addr | |||
/// @param [in] dynamic_dim_num: number of dynamic dimension | |||
/// @param [in] dynamic_dims: array of dynamic dimensions | |||
/// @return execute result | |||
/// | |||
ge::Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
const std::vector<uint64_t> &dynamic_dims); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get current dynamic dims info by combined dims | |||
/// @param [in] model_id: model id allocate from manager | |||
/// @param [in] combined_dims: array of combined dimensions | |||
/// @param [out] cur_dynamic_dims: current dynamic dims | |||
/// @return execute result | |||
/// | |||
ge::Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &combined_dims, | |||
std::vector<uint64_t> &cur_dynamic_dims); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get dynamic batch_info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get combined dynamic dims info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
ge::Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||
/// | |||
/// @ingroup ge | |||
@@ -209,6 +248,13 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
static ge::Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | |||
std::vector<DataBuffer> &outputs); | |||
static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
DynamicSingleOp **single_op); | |||
static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc, | |||
const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &outputs); | |||
static ge::Status ReleaseSingleOpResource(void *stream); | |||
ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); | |||
@@ -35,9 +35,6 @@ class ModelRunner { | |||
bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id, | |||
std::shared_ptr<DavinciModel> davinci_model, std::shared_ptr<ModelListener> listener); | |||
bool DistributeTask(uint32_t model_id); | |||
bool LoadModelComplete(uint32_t model_id); | |||
const std::vector<uint32_t> &GetTaskIdList(uint32_t model_id) const; | |||
@@ -46,8 +43,6 @@ class ModelRunner { | |||
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &GetRuntimeInfoMap(uint32_t model_id) const; | |||
void *GetModelHandle(uint32_t model_id) const; | |||
bool UnloadModel(uint32_t model_id); | |||
bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data); | |||
@@ -0,0 +1,56 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||
#define INC_FRAMEWORK_MEMORY_MEMORY_API_H_ | |||
#include <string> | |||
#include <vector> | |||
#include "ge/ge_api_error_codes.h" | |||
#include "runtime/mem.h" | |||
namespace ge { | |||
enum MemStorageType { | |||
HBM = 0, | |||
RDMA_HBM, | |||
}; | |||
struct HostVarInfo { | |||
uint64_t base_addr; | |||
uint64_t var_size; | |||
}; | |||
/// | |||
/// \param size [in] rdma pool memory size to be allocated. | |||
/// \param mem_type [in] memory type for rdma pool. | |||
/// \return Status result of function | |||
Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); | |||
/// | |||
/// \param var_info [in] host variable addr infos. | |||
/// \param mem_type [in] memory type for rdma pool. | |||
/// \return Status result of function | |||
Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); | |||
/// | |||
/// \param var_name [in] var_name name of host variable. | |||
/// \param base_addr [out] base_addr vase addr of host variable. | |||
/// \param var_size [out] var_size memory_size of host variable. | |||
/// \return Status result of function | |||
Status GetVarBaseAddrAndSize(const std::string &var_name, uint64_t &base_addr, uint64_t &var_size); | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_MEMORY_MEMORY_API_H_ |
@@ -96,10 +96,6 @@ Status CheckCustomAiCpuOpLib(); | |||
Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | |||
Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | |||
Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info); | |||
void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||
std::vector<std::string> &output_nodes_name); | |||
@@ -120,6 +120,7 @@ struct OmgContext { | |||
bool is_dynamic_input = false; | |||
std::string dynamic_batch_size; | |||
std::string dynamic_image_size; | |||
std::string dynamic_dims; | |||
}; | |||
} // namespace ge | |||
@@ -0,0 +1,110 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ | |||
#define INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ | |||
#include <google/protobuf/message.h> | |||
#include "framework/common/types.h" | |||
#include "framework/omg/omg_inner_types.h" | |||
#include "graph/attr_value.h" | |||
#include "graph/compute_graph.h" | |||
#include "graph/ge_tensor.h" | |||
#include "graph/graph.h" | |||
#include "graph/op_desc.h" | |||
#include "graph/operator.h" | |||
#include "graph/range_vistor.h" | |||
#include "graph/utils/attr_utils.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
using Status = domi::Status; | |||
namespace domi { | |||
using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>( | |||
const google::protobuf::Message *root_proto, const std::string &graph)>; | |||
class ModelParser { | |||
public: | |||
ModelParser() {} | |||
virtual ~ModelParser() {} | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Analyze network model data | |||
* @param [in] file Network model file path | |||
* @param [in|out] graph Save the network information after analysis | |||
* @return SUCCESS | |||
* @return Others failed | |||
*/ | |||
virtual Status Parse(const char *file, ge::Graph &graph) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Parse relevant data from memory and save it to graph | |||
* @param [in] input Model file memory data | |||
* @param [in|out] graph A graph for saving the model information after analysis | |||
* @return SUCCESS | |||
* @return FAILED | |||
* @author | |||
*/ | |||
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Analyze network model data | |||
* @param [in] proto network model | |||
* @param [in|out] graph Save the network information after analysis | |||
* @return SUCCESS | |||
* @return Others failed | |||
*/ | |||
virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Analyze callback model data in subgraph | |||
* @param [in] proto network model | |||
* @param [in] callback callback of subgraph | |||
* @param [in|out] graph Save the network information after analysis | |||
* @return SUCCESS | |||
* @return Others failed | |||
*/ | |||
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, | |||
ge::ComputeGraphPtr &graph) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Convert model files to JSON format | |||
* @param [in] model_file Model file path to be converted | |||
* @param [out] json_file Converted JSON file path | |||
* @return SUCCESS | |||
* @return Others failed | |||
*/ | |||
virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } | |||
/* | |||
* @ingroup domi_omg | |||
* @brief Convert network data type | |||
* @param [in] type Data type to be converted | |||
* @return ge::DataType | |||
*/ | |||
virtual ge::DataType ConvertToGeDataType(const uint32_t type) = 0; | |||
virtual Status ParseAllGraph(const google::protobuf::Message *root_proto, ge::ComputeGraphPtr &root_graph) = 0; | |||
}; | |||
} // namespace domi | |||
#endif // INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ |
@@ -0,0 +1,92 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ | |||
#define INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ | |||
#include <google/protobuf/text_format.h> | |||
#include "common/types.h" | |||
#include "omg/omg_inner_types.h" | |||
#include "proto/om.pb.h" | |||
#include "graph/ge_tensor.h" | |||
#include "graph/op_desc.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
using google::protobuf::Message; | |||
using Status = domi::Status; | |||
namespace ge { | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Used to analyze operator information | |||
* | |||
*/ | |||
class OpParser { | |||
public: | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Deconstructor | |||
*/ | |||
virtual ~OpParser() {} | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Analytic operator parameters | |||
* @param [in] op_src Parameter data to be resolved | |||
* @param [out] graph Parsed parameter data | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Analytic operator parameters | |||
* @param [in] op_src Parameter data to be resolved | |||
* @param [out] Operator parameter data | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Analytic operator weight information | |||
* @param [in] op_src Weight data to be resolved | |||
* @param [out] op_dest Weight data after analysis | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Get the format information according to the parameters in the operator | |||
* @param [in] op_src Parameter data to be resolved | |||
* @param [out] format Output the parsed format | |||
* @return SUCCESS | |||
* @return FAILED | |||
*/ | |||
virtual Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) { | |||
(void)op_src; | |||
// Indicates that the op does not provide a value for format | |||
format = domi::DOMI_TENSOR_RESERVED; | |||
return domi::SUCCESS; | |||
} | |||
}; | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ |
@@ -0,0 +1,31 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ | |||
#define INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ | |||
#include <iostream> | |||
#include <map> | |||
#include <string> | |||
#include "ge/ge_api_error_codes.h" | |||
namespace ge { | |||
// Initialize parser | |||
Status ParserInitialize(const std::map<std::string, std::string>& options); | |||
// Finalize parser, release all resources | |||
Status ParserFinalize(); | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ |
@@ -0,0 +1,138 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ | |||
#define INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ | |||
#include <map> | |||
#include <memory> | |||
#include <mutex> | |||
#include <string> | |||
#include "framework/common/types.h" | |||
#include "framework/omg/omg_inner_types.h" | |||
using Status = domi::Status; | |||
namespace domi { | |||
class WeightsParser; | |||
class ModelParser; | |||
typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void); | |||
// Create modelparser for different frameworks | |||
class ModelParserFactory { | |||
public: | |||
static ModelParserFactory *Instance(); | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Create a modelparser based on the type entered | |||
* @param [in] type Framework type | |||
* @return Created modelparser | |||
*/ | |||
std::shared_ptr<ModelParser> CreateModelParser(const domi::FrameworkType type); | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Register create function | |||
* @param [in] type Framework type | |||
* @param [in] fun ModelParser's create function | |||
*/ | |||
void RegisterCreator(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun); | |||
protected: | |||
ModelParserFactory() {} | |||
~ModelParserFactory(); | |||
private: | |||
std::map<domi::FrameworkType, MODEL_PARSER_CREATOR_FUN> creator_map_; | |||
}; // end class ModelParserFactory | |||
class ModelParserRegisterar { | |||
public: | |||
ModelParserRegisterar(const domi::FrameworkType type, MODEL_PARSER_CREATOR_FUN fun) { | |||
ModelParserFactory::Instance()->RegisterCreator(type, fun); | |||
} | |||
~ModelParserRegisterar() {} | |||
}; | |||
// Registration macros for model parsers | |||
#define REGISTER_MODEL_PARSER_CREATOR(type, clazz) \ | |||
std::shared_ptr<ModelParser> Creator_##type##_Model_Parser() { \ | |||
std::shared_ptr<clazz> ptr = nullptr; \ | |||
try { \ | |||
ptr = make_shared<clazz>(); \ | |||
} catch (...) { \ | |||
ptr = nullptr; \ | |||
} \ | |||
return std::shared_ptr<ModelParser>(ptr); \ | |||
} \ | |||
ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser) | |||
typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void); | |||
// Create weightsparser for different frameworks | |||
class WeightsParserFactory { | |||
public: | |||
static WeightsParserFactory *Instance(); | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Create weightsparser based on the type entered | |||
* @param [in] type Framework type | |||
* @return Created weightsparser | |||
*/ | |||
std::shared_ptr<WeightsParser> CreateWeightsParser(const domi::FrameworkType type); | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Register create function | |||
* @param [in] type Framework type | |||
* @param [in] fun WeightsParser's create function | |||
*/ | |||
void RegisterCreator(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun); | |||
protected: | |||
WeightsParserFactory() {} | |||
~WeightsParserFactory(); | |||
private: | |||
std::map<domi::FrameworkType, WEIGHTS_PARSER_CREATOR_FUN> creator_map_; | |||
}; // end class WeightsParserFactory | |||
class WeightsParserRegisterar { | |||
public: | |||
WeightsParserRegisterar(const domi::FrameworkType type, WEIGHTS_PARSER_CREATOR_FUN fun) { | |||
WeightsParserFactory::Instance()->RegisterCreator(type, fun); | |||
} | |||
~WeightsParserRegisterar() {} | |||
}; | |||
// Register macro of weight resolver | |||
#define REGISTER_WEIGHTS_PARSER_CREATOR(type, clazz) \ | |||
std::shared_ptr<WeightsParser> Creator_##type##_Weights_Parser() { \ | |||
std::shared_ptr<clazz> ptr = nullptr; \ | |||
try { \ | |||
ptr = make_shared<clazz>(); \ | |||
} catch (...) { \ | |||
ptr = nullptr; \ | |||
} \ | |||
return std::shared_ptr<WeightsParser>(ptr); \ | |||
} \ | |||
WeightsParserRegisterar g_##type##_Weights_Parser_Creator(type, Creator_##type##_Weights_Parser) | |||
}; // namespace domi | |||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_FACTORY_H_ |
@@ -0,0 +1,43 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ | |||
#define INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ | |||
#include <map> | |||
#include <memory> | |||
#include <string> | |||
#include <unordered_map> | |||
#include <utility> | |||
#include <vector> | |||
#include "external/register/register_fmk_types.h" | |||
#include "external/register/register_types.h" | |||
#include "framework/omg/omg_inner_types.h" | |||
namespace ge { | |||
struct ParserContext { | |||
std::unordered_map<std::string, std::vector<int64_t>> input_dims; | |||
domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; | |||
; | |||
RunMode run_mode = ONLY_PRE_CHECK; | |||
std::string custom_proto_path; // save caffe custom proto path, used by caffe parse | |||
std::string caffe_proto_path; // save caffe proto path, used by caffe parse | |||
}; | |||
ParserContext &GetParserContext(); | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_INNER_CONTEXT_H_ |
@@ -0,0 +1,74 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ | |||
#define INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ | |||
#include "graph/graph.h" | |||
#include "graph/attr_value.h" | |||
#include "graph/compute_graph.h" | |||
#include "graph/ge_tensor.h" | |||
#include "graph/op_desc.h" | |||
#include "graph/operator.h" | |||
#include "graph/range_vistor.h" | |||
#include "graph/utils/attr_utils.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
namespace domi { | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Weight information resolver | |||
* | |||
*/ | |||
class WeightsParser { | |||
public: | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Constructor | |||
*/ | |||
WeightsParser() {} | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Deconstructor | |||
*/ | |||
virtual ~WeightsParser() {} | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Analyze weight data | |||
* @param [in] file Path of weight file after training | |||
* @param [in|out] graph Graph for saving weight information after analysis | |||
* @return SUCCESS | |||
* @return Others failed | |||
*/ | |||
virtual Status Parse(const char *file, ge::Graph &graph) = 0; | |||
/** | |||
* @ingroup domi_omg | |||
* @brief Parse relevant data from memory and save it to graph | |||
* @param [in] input Model file memory data | |||
* @param [in|out] graph A graph for saving the model information after analysis | |||
* @return SUCCESS | |||
* @return FAILED | |||
* @author | |||
*/ | |||
virtual Status ParseFromMemory(const char *input, uint32_t lengt, ge::ComputeGraphPtr &graph) = 0; | |||
}; | |||
} // namespace domi | |||
#endif // INC_FRAMEWORK_OMG_PARSER_WEIGHTS_PARSER_H_ |
@@ -87,11 +87,14 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A | |||
// AddNode with NodePtr | |||
NodePtr AddNode(NodePtr node); | |||
NodePtr AddNode(OpDescPtr op); | |||
NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize. | |||
NodePtr AddNode(OpDescPtr op, int64_t id); // for unserialize | |||
NodePtr AddNodeFront(NodePtr node); | |||
NodePtr AddNodeFront(const OpDescPtr &op); | |||
NodePtr AddInputNode(NodePtr node); | |||
NodePtr AddOutputNode(NodePtr node); | |||
// insert node with specific pre_node | |||
NodePtr AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node); | |||
NodePtr AddNodeAfter(NodePtr node, const NodePtr &pre_node); | |||
graphStatus RemoveNode(const NodePtr &node); | |||
graphStatus RemoveInputNode(const NodePtr &node); | |||
@@ -185,6 +185,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT; | |||
// to be deleted | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; | |||
@@ -934,12 +937,14 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PRED_VALUE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_NUM; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_BATCH_LABEL; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_BATCH; | |||
// Control flow | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_SWITCH_COND; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ACTIVE_STREAM_LIST; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCHN_PRED_VALUE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG; | |||
@@ -983,6 +988,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NEE | |||
// For mutil-batch | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERT_BY_MBATCH; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_TYPE; | |||
// For inserted op | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INSERTED_BY_GE; | |||
@@ -1022,6 +1028,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_ADDR; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_INPUT_L1_VALID_SIZE; | |||
// for unregistered op | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_OPPATH; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_UNREGST_ATTRLIST; | |||
// op overflow dump | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | |||
@@ -1075,8 +1085,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX; | |||
// atc user def dtype&format | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_DATATYPE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ATC_USER_DEFINE_FORMAT; | |||
// for fusion op plugin | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; | |||
// graph partition for aicpu | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME; | |||
// input and output memory type | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_VARIABLE_PLACEMENT; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_INPUT_MEMORY_TYPE; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OUTPUT_MEMORY_TYPE; | |||
// input_output_offset | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_BASIC_OFFSET; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET; | |||
} // namespace ge | |||
#endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ |
@@ -67,6 +67,9 @@ class ModelSerializeImp { | |||
bool HandleNodeNameRef(); | |||
bool UnserializeOpDesc(OpDescPtr &opDesc, proto::OpDef &opDefProto); | |||
void AttrDefToOpDesc(OpDescPtr &op_desc, std::vector<string> &key_in, std::vector<string> &key_out, | |||
std::vector<uint32_t> &value_in, std::vector<uint32_t> &value_out, std::vector<string> &opt); | |||
void OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto); | |||
bool UnserializeNode(ComputeGraphPtr &graph, proto::OpDef &opDefProto); | |||
@@ -159,10 +159,6 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||
std::map<string, uint32_t> GetAllInputName() const; | |||
void SetAllInputName(const std::map<string, uint32_t> &input_name_idx); | |||
std::vector<string> GetAllOptionalInputName() const; | |||
std::map<string, uint32_t> GetAllOutputName(); | |||
bool UpdateInputName(std::map<string, uint32_t> inputNameIdx); | |||
@@ -300,6 +296,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||
std::map<std::string, SubgraphType> subgraph_ir_names_to_type_; | |||
vector<GeTensorDescPtr> inputs_desc_{}; | |||
map<string, uint32_t> input_name_idx_{}; | |||
std::unordered_set<string> optional_input_names_{}; | |||
vector<GeTensorDescPtr> outputs_desc_{}; | |||
map<string, uint32_t> output_name_idx_{}; | |||
std::function<graphStatus(Operator &)> infer_func_ = nullptr; | |||
@@ -62,18 +62,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string ComputeGraph::GetName() co | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ComputeGraph::SetName(const string &name) { name_ = name; } | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY size_t ComputeGraph::GetAllNodesSize() const { | |||
size_t s = nodes_.size(); | |||
for (const auto &sub_graph : sub_graph_) { | |||
s += sub_graph->GetAllNodesSize(); | |||
} | |||
return s; | |||
return GetAllNodes().size(); | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::Vistor<NodePtr> ComputeGraph::GetAllNodes() const { | |||
if (sub_graph_.empty()) { | |||
return Vistor<NodePtr>(shared_from_this(), nodes_); | |||
} | |||
std::vector<std::shared_ptr<ComputeGraph>> subgraphs; | |||
return AllGraphNodes(subgraphs); | |||
} | |||
@@ -277,7 +269,7 @@ NodePtr ComputeGraph::AddNodeFront(NodePtr node) { | |||
NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) { | |||
if (op == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||
return nullptr; | |||
} | |||
op->SetId(nodes_.size()); | |||
@@ -287,9 +279,38 @@ NodePtr ComputeGraph::AddNodeFront(const OpDescPtr &op) { | |||
return AddNodeFront(node_ptr); | |||
} | |||
NodePtr ComputeGraph::AddNodeAfter(NodePtr node, const NodePtr &pre_node) { | |||
if (node == nullptr || node->GetOpDesc() == nullptr || pre_node == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The node ptr or op desc should not be null."); | |||
return nullptr; | |||
} | |||
node->GetOpDesc()->SetId(nodes_.size()); | |||
auto node_iter = std::find(nodes_.begin(), nodes_.end(), pre_node); | |||
if (node_iter != nodes_.end()) { | |||
nodes_.insert(node_iter + 1, node); | |||
} else { | |||
GELOGE(GRAPH_FAILED, "Cannot find pre_node in nodes_."); | |||
return nullptr; | |||
} | |||
return node; | |||
} | |||
NodePtr ComputeGraph::AddNodeAfter(OpDescPtr &op, const NodePtr &pre_node) { | |||
if (op == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||
return nullptr; | |||
} | |||
op->SetId(nodes_.size()); | |||
NodePtr node_ptr = shared_ptr<Node>(new (std::nothrow) Node(op, shared_from_this())); | |||
GE_IF_BOOL_EXEC(node_ptr == nullptr, GELOGE(GRAPH_FAILED, "node_ptr is NULL!!!"); return nullptr); | |||
GE_IF_BOOL_EXEC(node_ptr->Init() != GRAPH_SUCCESS, GELOGE(GRAPH_FAILED, "node init failed."); return nullptr); | |||
return AddNodeAfter(node_ptr, pre_node); | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(NodePtr node) { | |||
if (node == nullptr || node->GetOpDesc() == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||
return nullptr; | |||
} | |||
node->GetOpDesc()->SetId((int64_t)GetDirectNodesSize()); | |||
@@ -299,7 +320,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(Nod | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpDescPtr op) { | |||
if (op == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||
return nullptr; | |||
} | |||
op->SetId(GetDirectNodesSize()); | |||
@@ -311,7 +332,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::AddNode(OpD | |||
NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize. | |||
if (op == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The OpDesc ptr should not be null."); | |||
return nullptr; | |||
} | |||
op->SetId(id); | |||
@@ -324,7 +345,7 @@ NodePtr ComputeGraph::AddNode(OpDescPtr op, int64_t id) { // for unserialize. | |||
NodePtr ComputeGraph::AddInputNode(NodePtr node) { | |||
if (node == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||
return nullptr; | |||
} | |||
input_nodes_.push_back(node); | |||
@@ -336,7 +357,7 @@ NodePtr ComputeGraph::AddInputNode(NodePtr node) { | |||
NodePtr ComputeGraph::AddOutputNode(NodePtr node) { | |||
if (node == nullptr || node->GetOpDesc() == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The node ptr or opdesc should be not null."); | |||
GELOGE(GRAPH_FAILED, "The node ptr or opdesc should not be null."); | |||
return nullptr; | |||
} | |||
@@ -372,7 +393,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) { | |||
if (out_anchor->GetOwnerNode()->GetType() == CONSTANT || out_anchor->GetOwnerNode()->GetType() == CONSTANTOP) { | |||
GE_CHK_BOOL_RET_STATUS(GraphUtils::RemoveEdge(out_anchor, in_anchor) == GRAPH_SUCCESS, GRAPH_FAILED, | |||
"Remove edge from const op failed."); | |||
if (out_anchor->GetOwnerNode()->GetOutDataNodes().size() == 0) { | |||
if (out_anchor->GetOwnerNode()->GetOutNodes().size() == 0) { | |||
GELOGI("Remove const op %s.", out_anchor->GetOwnerNode()->GetName().c_str()); | |||
auto iter = find(nodes_.begin(), nodes_.end(), out_anchor->GetOwnerNode()); | |||
if (iter != nodes_.end()) { | |||
@@ -386,7 +407,7 @@ graphStatus ComputeGraph::RemoveConstInput(const NodePtr &node) { | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveNode(const NodePtr &node) { | |||
if (node == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||
return GRAPH_FAILED; | |||
} | |||
@@ -415,7 +436,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::RemoveN | |||
// Used in sub_graph scenes | |||
graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) { | |||
if (node == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||
return GRAPH_FAILED; | |||
} | |||
@@ -430,7 +451,7 @@ graphStatus ComputeGraph::RemoveInputNode(const NodePtr &node) { | |||
// Used in sub_graph scenes | |||
graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) { | |||
if (node == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The node ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The node ptr should not be null."); | |||
return GRAPH_FAILED; | |||
} | |||
@@ -451,7 +472,7 @@ graphStatus ComputeGraph::RemoveOutputNode(const NodePtr &node) { | |||
std::shared_ptr<ComputeGraph> ComputeGraph::AddSubGraph(std::shared_ptr<ComputeGraph> sub_graph) { | |||
if (sub_graph == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The graph ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The graph ptr should not be null."); | |||
return nullptr; | |||
} | |||
sub_graph_.push_back(sub_graph); | |||
@@ -461,7 +482,7 @@ std::shared_ptr<ComputeGraph> ComputeGraph::AddSubGraph(std::shared_ptr<ComputeG | |||
graphStatus ComputeGraph::RemoveSubGraph(const std::shared_ptr<ComputeGraph> &sub_graph) { | |||
if (sub_graph == nullptr) { | |||
GELOGE(GRAPH_FAILED, "The graph ptr should be not null."); | |||
GELOGE(GRAPH_FAILED, "The graph ptr should not be null."); | |||
return GRAPH_FAILED; | |||
} | |||
@@ -500,8 +521,7 @@ ComputeGraph::AddSubgraph(const std::string &name, const std::shared_ptr<Compute | |||
return GRAPH_PARAM_INVALID; | |||
} | |||
if (!this->parent_graph_.expired()) { | |||
GE_LOGE("The subgraphs can only be added to the root graph"); | |||
return GRAPH_PARAM_INVALID; | |||
GELOGW("The subgraphs should only be added to the root graph"); | |||
} | |||
if (name != subgraph->GetName()) { | |||
GELOGW("The subgraph name %s is different with input %s", subgraph->GetName().c_str(), name.c_str()); | |||
@@ -653,7 +673,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::InsertE | |||
GELOGW("node or OpDescPtr is nullptr."); | |||
continue; | |||
} | |||
GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should be not null."); return GRAPH_FAILED); | |||
GE_IF_BOOL_EXEC(node == nullptr, GELOGE(GRAPH_FAILED, "The node should not be null."); return GRAPH_FAILED); | |||
if (node->GetOpDesc()->GetType() == RECV) { | |||
auto iter = find(node_vec.begin(), node_vec.end(), node); | |||
if (iter == node_vec.end()) { | |||
@@ -799,7 +819,8 @@ graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map<No | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::TopologicalSorting() { | |||
auto ret = TopologicalSortingGraph(); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Sub graph partition Failed"); | |||
GraphUtils::DumpGEGraphToOnnx(*this, "black_box"); | |||
GELOGE(ret, "Graph [%s] topological sort failed, saved to file black_box", name_.c_str()); | |||
return ret; | |||
} | |||
@@ -1117,9 +1138,11 @@ graphStatus ComputeGraph::RemoveExtraOutEdge(const NodePtr &node) { | |||
} | |||
graphStatus ComputeGraph::Verify() { | |||
bool is_unknown_graph = GetGraphUnknownFlag(); | |||
for (const auto &node_ptr : GetAllNodes()) { | |||
GE_CHECK_NOTNULL(node_ptr); | |||
GE_CHECK_NOTNULL(node_ptr->GetOpDesc()); | |||
GE_IF_BOOL_EXEC(is_unknown_graph, continue); | |||
GE_CHK_BOOL_EXEC(node_ptr->GetOpDesc()->CommonVerify() == GRAPH_SUCCESS, return GRAPH_FAILED, | |||
"Verifying %s failed.", node_ptr->GetName().c_str()); | |||
} | |||
@@ -158,6 +158,10 @@ const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; | |||
const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | |||
const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; | |||
// Identify node connecting to input and output | |||
const std::string ATTR_NAME_NODE_CONNECT_INPUT = "_is_connected_to_data"; | |||
const std::string ATTR_NAME_NODE_CONNECT_OUTPUT = "_is_connected_to_netoutput"; | |||
// To be deleted | |||
const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | |||
const std::string PERMUTE_RESHAPE_FUSION = "permute_reshape_fusion"; | |||
@@ -905,6 +909,7 @@ const std::string ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE = "is_end_of_inputmem_l | |||
const std::string ATTR_NAME_PRED_VALUE = "_pred_value"; | |||
const std::string ATTR_NAME_BATCH_NUM = "_batch_num"; | |||
const std::string ATTR_NAME_BATCH_LABEL = "_batch_label"; | |||
const std::string ATTR_NAME_COMBINED_BATCH = "_combined_batch"; | |||
// Control flow | |||
const std::string ATTR_NAME_STREAM_SWITCH_COND = "switch_condition"; | |||
@@ -914,6 +919,7 @@ const std::string ATTR_NAME_SWITCHN_PRED_VALUE = "switch_pred_value"; | |||
const std::string ATTR_NAME_ITERATORS_PER_LOOP = "iterations_per_loop"; | |||
const std::string ATTR_NAME_FLOW_CTRL_NODE_FLAG = "is_flow_ctrl_node"; | |||
const std::string ATTR_NAME_SUBGRAPH_FIRST_ACTIVE = "subgraph_first_active"; | |||
const std::string ATTR_NAME_COMBINED_DYNAMIC_DIMS = "combined_dynamic_dims"; | |||
const std::string ATTR_NAME_SWITCH_BRANCH_NODE_LABEL = "_switch_branch_node_label"; | |||
const std::string ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG = "_switch_true_branch_flag"; | |||
@@ -983,6 +989,8 @@ const std::string ATTR_INSERT_BY_MBATCH = "mbatch-inserted-node"; | |||
const std::string ATTR_MBATCH_ORIGIN_INPUT_DIMS = "_mbatch_origin_input_dims"; | |||
const std::string ATTR_DYNAMIC_TYPE = "mbatch_dynamic_type"; | |||
// For inserted op | |||
const std::string ATTR_INSERTED_BY_GE = "_inserted_by_ge"; | |||
@@ -1021,6 +1029,10 @@ const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_ | |||
const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; | |||
const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list"; | |||
// for unregistered op | |||
const std::string ATTR_NAME_UNREGST_OPPATH = "_unregst_oppath"; | |||
const std::string ATTR_NAME_UNREGST_ATTRLIST = "_unregst_attrlist"; | |||
// used for Horovod | |||
const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id"; | |||
const std::string ATTR_HOROVOD_ATTR_REDUCE_TYPE = "reduce_op"; | |||
@@ -1032,6 +1044,23 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node"; | |||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | |||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | |||
// atc user def dtype&format | |||
const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; | |||
const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; | |||
// for fusion op plugin | |||
const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE = "_fusionop_original_type"; | |||
// graph partition for aicpu | |||
const std::string ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME = "pld_front_node_engine_name"; | |||
const std::string ATTR_NAME_END_REAR_NODE_ENGINE_NAME = "end_rear_node_engine_name"; | |||
// input and output memory type | |||
const std::string ATTR_VARIABLE_PLACEMENT = "_variable_placement"; | |||
const std::string ATTR_INPUT_MEMORY_TYPE = "_input_memory_type"; | |||
const std::string ATTR_OUTPUT_MEMORY_TYPE = "_output_memory_type"; | |||
// input_output_offset | |||
const std::string ATTR_ZERO_COPY_BASIC_OFFSET = "_zero_copy_basic_offset"; | |||
const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET = "_zero_copy_relative_offset"; | |||
} // namespace ge |
@@ -1216,27 +1216,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc( | |||
GE_CHK_BOOL_EXEC(imp.UnserializeOpDesc(op_desc, *op_def), return op_desc, "op_desc unserialize failed"); | |||
op_desc->extAttrs_ = org_op_desc->extAttrs_; | |||
if (op_desc->HasAttr("_input_name_idx_key")) { | |||
if (op_desc->DelAttr("_input_name_idx_key") != SUCCESS) { | |||
GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_key failed."); | |||
} | |||
// This function may be called by some passes of fusion engine, in this condition, do not need these attribute | |||
if (!op_desc->input_name_idx_.empty()) { | |||
op_desc->input_name_idx_.clear(); | |||
} | |||
if (op_desc->HasAttr("_input_name_idx_value")) { | |||
if (op_desc->DelAttr("_input_name_idx_value") != SUCCESS) { | |||
GELOGE(GRAPH_FAILED, "DelAttr _input_name_idx_value failed."); | |||
} | |||
} | |||
if (op_desc->HasAttr("_opt_input")) { | |||
if (op_desc->DelAttr("_opt_input") != SUCCESS) { | |||
GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed."); | |||
} | |||
} | |||
if (!op_desc->output_name_idx_.empty()) { | |||
op_desc->output_name_idx_.clear(); | |||
} | |||
if (!op_desc->optional_input_names_.empty()) { | |||
op_desc->optional_input_names_.clear(); | |||
} | |||
return op_desc; | |||
} | |||
@@ -1260,6 +1249,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CopyOpDesc(c | |||
op_desc->extAttrs_ = org_op_desc->extAttrs_; | |||
op_desc->input_name_idx_.insert(org_op_desc->input_name_idx_.begin(), org_op_desc->input_name_idx_.end()); | |||
op_desc->optional_input_names_.insert(org_op_desc->optional_input_names_.begin(), | |||
org_op_desc->optional_input_names_.end()); | |||
op_desc->output_name_idx_.insert(org_op_desc->output_name_idx_.begin(), org_op_desc->output_name_idx_.end()); | |||
op_desc->infer_func_ = org_op_desc->infer_func_; | |||
@@ -124,6 +124,7 @@ LOCAL_SRC_FILES := \ | |||
../../out/graph/lib64/stub/operator.cc \ | |||
../../out/graph/lib64/stub/operator_factory.cc \ | |||
../../out/graph/lib64/stub/tensor.cc \ | |||
../../out/graph/lib64/stub/inference_context.cc \ | |||
LOCAL_SHARED_LIBRARIES := | |||
@@ -201,6 +202,7 @@ LOCAL_SRC_FILES := \ | |||
../../out/graph/lib64/stub/operator.cc \ | |||
../../out/graph/lib64/stub/operator_factory.cc \ | |||
../../out/graph/lib64/stub/tensor.cc \ | |||
../../out/graph/lib64/stub/inference_context.cc \ | |||
LOCAL_SHARED_LIBRARIES := | |||
@@ -128,21 +128,42 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op | |||
for (const std::string &name : op_desc->GetSubgraphInstanceNames()) { | |||
op_def_proto->add_subgraph_name(name); | |||
} | |||
if (!op_desc->output_name_idx_.empty()) { | |||
proto::AttrDef key; | |||
proto::AttrDef value; | |||
for (auto &item : op_desc->output_name_idx_) { | |||
key.mutable_list()->add_s(item.first); | |||
value.mutable_list()->add_i(item.second); | |||
} | |||
auto op_desc_attr = op_def_proto->mutable_attr(); | |||
op_desc_attr->insert({"_output_name_key", key}); | |||
op_desc_attr->insert({"_output_name_value", value}); | |||
} | |||
OpDescToAttrDef(op_desc, op_def_proto); | |||
} | |||
return true; | |||
} | |||
void ModelSerializeImp::OpDescToAttrDef(const ConstOpDescPtr &op_desc, proto::OpDef *op_def_proto) { | |||
proto::AttrDef key_in; | |||
proto::AttrDef value_in; | |||
auto op_desc_attr = op_def_proto->mutable_attr(); | |||
if (!op_desc->input_name_idx_.empty()) { | |||
for (auto &item : op_desc->input_name_idx_) { | |||
key_in.mutable_list()->add_s(item.first); | |||
value_in.mutable_list()->add_i(item.second); | |||
} | |||
op_desc_attr->insert({"_input_name_key", key_in}); | |||
op_desc_attr->insert({"_input_name_value", value_in}); | |||
} | |||
proto::AttrDef key_out; | |||
proto::AttrDef value_out; | |||
if (!op_desc->output_name_idx_.empty()) { | |||
for (auto &item : op_desc->output_name_idx_) { | |||
key_out.mutable_list()->add_s(item.first); | |||
value_out.mutable_list()->add_i(item.second); | |||
} | |||
op_desc_attr->insert({"_output_name_key", key_out}); | |||
op_desc_attr->insert({"_output_name_value", value_out}); | |||
} | |||
proto::AttrDef opt_input; | |||
if (!op_desc->optional_input_names_.empty()) { | |||
for (auto &item : op_desc->optional_input_names_) { | |||
opt_input.mutable_list()->add_s(item); | |||
} | |||
op_desc_attr->insert({"_opt_input", opt_input}); | |||
} | |||
} | |||
bool ModelSerializeImp::SerializeNode(const NodePtr &node, proto::OpDef *op_def_proto, bool is_dump) { | |||
if (node == nullptr || op_def_proto == nullptr) { | |||
GELOGE(GRAPH_FAILED, "Input Para Node Invalid"); | |||
@@ -236,13 +257,70 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali | |||
} | |||
} | |||
void ModelSerializeImp::AttrDefToOpDesc(OpDescPtr &op_desc, std::vector<string> &key_in, std::vector<string> &key_out, | |||
std::vector<uint32_t> &value_in, std::vector<uint32_t> &value_out, | |||
std::vector<string> &opt_input) { | |||
if (!key_in.empty()) { | |||
if (key_in.size() != value_in.size()) { | |||
GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(), | |||
value_in.size()); | |||
} else { | |||
for (uint32_t i = 0; i < key_in.size(); ++i) { | |||
op_desc->input_name_idx_.insert(std::pair<string, uint32_t>(key_in.at(i), value_in.at(i))); | |||
} | |||
} | |||
} | |||
if (!key_out.empty()) { | |||
if (key_out.size() != value_out.size()) { | |||
GELOGW("Key and value vector size is different. key_size: %zu, value_size: %zu.", key_out.size(), | |||
value_out.size()); | |||
} else { | |||
for (uint32_t i = 0; i < key_out.size(); ++i) { | |||
op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key_out.at(i), value_out.at(i))); | |||
} | |||
} | |||
} | |||
if (!opt_input.empty()) { | |||
for (const auto &i : opt_input) { | |||
op_desc->optional_input_names_.insert(i); | |||
} | |||
} | |||
} | |||
bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) { | |||
std::vector<string> key; | |||
std::vector<uint32_t> value; | |||
std::vector<string> opt_input; | |||
std::vector<string> key_in; | |||
std::vector<uint32_t> value_in; | |||
if (op_def_proto.attr().count("_opt_input") > 0) { | |||
auto &name_list = op_def_proto.attr().at("_opt_input").list(); | |||
for (const auto &item_s : name_list.s()) { | |||
opt_input.push_back(item_s); | |||
} | |||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||
op_desc_attr->erase("_opt_input"); | |||
} | |||
if (op_def_proto.attr().count("_input_name_key") > 0) { | |||
auto &output_name_key_list = op_def_proto.attr().at("_input_name_key").list(); | |||
for (const auto &item_s : output_name_key_list.s()) { | |||
key_in.push_back(item_s); | |||
} | |||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||
op_desc_attr->erase("_input_name_key"); | |||
} | |||
if (op_def_proto.attr().count("_input_name_value") > 0) { | |||
auto &input_name_value_list = op_def_proto.attr().at("_input_name_value").list(); | |||
for (const auto &item_i : input_name_value_list.i()) { | |||
value_in.push_back(static_cast<uint32_t>(item_i)); | |||
} | |||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||
op_desc_attr->erase("_input_name_value"); | |||
} | |||
std::vector<string> key_out; | |||
std::vector<uint32_t> value_out; | |||
if (op_def_proto.attr().count("_output_name_key") > 0) { | |||
auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list(); | |||
for (const auto &item_s : output_name_key_list.s()) { | |||
key.push_back(item_s); | |||
key_out.push_back(item_s); | |||
} | |||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||
op_desc_attr->erase("_output_name_key"); | |||
@@ -250,7 +328,7 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d | |||
if (op_def_proto.attr().count("_output_name_value") > 0) { | |||
auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list(); | |||
for (const auto &item_i : output_name_value_list.i()) { | |||
value.push_back(static_cast<uint32_t>(item_i)); | |||
value_out.push_back(static_cast<uint32_t>(item_i)); | |||
} | |||
auto op_desc_attr = op_def_proto.mutable_attr(); | |||
op_desc_attr->erase("_output_name_value"); | |||
@@ -281,15 +359,8 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d | |||
op_desc->SetSubgraphInstanceName(graph_index++, name); | |||
} | |||
if (key.size() != 0) { | |||
if (key.size() != value.size()) { | |||
GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); | |||
} else { | |||
for (uint32_t i = 0; i < key.size(); ++i) { | |||
op_desc->output_name_idx_.insert(std::pair<string, uint32_t>(key.at(i), value.at(i))); | |||
} | |||
} | |||
} | |||
// insert name index by key and value | |||
AttrDefToOpDesc(op_desc, key_in, key_out, value_in, value_out, opt_input); | |||
return true; | |||
} | |||
@@ -449,9 +449,6 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InDataAnchorPtr Node::GetInDataAn | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AnchorPtr Node::GetInAnchor(int idx) const { | |||
// Idx can't be less than -1 or >= in_data_anchors_.size(), -1 means index of control anchor_ | |||
if (idx < -1 || idx >= static_cast<int>(in_data_anchors_.size())) { | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E19019", {"opname", "index", "anchorname", "optype"}, | |||
{GetName().c_str(), std::to_string(idx), "in_anchor", GetType().c_str()}); | |||
GELOGW("Op[%s] doesn't have index[%d]'s in_anchor which optype is %s.", GetName().c_str(), idx, GetType().c_str()); | |||
return nullptr; | |||
} else { | |||
@@ -743,26 +740,27 @@ graphStatus Node::Verify() const { | |||
const string aipp_data_type = "AippData"; | |||
const string const_type = "Const"; | |||
const string variable_type = "Variable"; | |||
bool is_unknown_graph = GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||
GE_CHK_BOOL_EXEC(op_ != nullptr, return GRAPH_FAILED, "original OpDesc is nullptr"); | |||
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | |||
if (in_anchor_ptr == nullptr) { | |||
GELOGW("in anchor ptr is null"); | |||
continue; | |||
} | |||
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || | |||
op_->GetType() == const_type || op_->GetType() == variable_type || | |||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||
if (!valid_anchor) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"name", "index"}, | |||
{GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | |||
GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); | |||
return GRAPH_FAILED; | |||
if (!is_unknown_graph) { | |||
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | |||
GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); | |||
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || | |||
op_->GetType() == const_type || op_->GetType() == variable_type || | |||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||
if (!valid_anchor) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, | |||
{GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | |||
GELOGE(GRAPH_FAILED, "operator %s's input %d is not linked.", GetName().c_str(), in_anchor_ptr->GetIdx()); | |||
return GRAPH_FAILED; | |||
} | |||
} | |||
} | |||
string frameworkop_type = "FrameworkOp"; | |||
if (op_->GetType() != frameworkop_type) { | |||
bool need_update_name = op_->GetType() != frameworkop_type && !is_unknown_graph; | |||
if (need_update_name) { | |||
auto node_op = ge::OperatorFactoryImpl::CreateOperator("node_op", op_->GetType()); | |||
if (node_op.IsEmpty()) { | |||
GELOGW("get op from OperatorFactory fail. opType: %s", op_->GetType().c_str()); | |||
@@ -782,7 +780,7 @@ graphStatus Node::Verify() const { | |||
} | |||
node_op.BreakConnect(); | |||
} | |||
GE_IF_BOOL_EXEC(is_unknown_graph, return GRAPH_SUCCESS;); | |||
if (op_->CommonVerify() == GRAPH_SUCCESS) { | |||
Operator op_proxy = ge::OpDescUtils::CreateOperatorFromNode(shared_from_this()); | |||
auto verify_func = op_->GetVerifyFunc(); | |||
@@ -64,12 +64,6 @@ const std::string ATTR_NAME_IS_INPUT_CONST = "is_input_const"; | |||
const std::string ATTR_NAME_OP_INFER_DEPENDS = "_op_infer_depends"; | |||
const std::string ATTR_NAME_OPT_INPUT = "_opt_input"; | |||
const std::string ATTR_NAME_INPUT_NAME_IDX_KEY = "_input_name_idx_key"; | |||
const std::string ATTR_NAME_INPUT_NAME_IDX_VALUE = "_input_name_idx_value"; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDesc::OpDesc() { | |||
op_def_.InitDefault(); | |||
if (op_def_.GetProtoMsg() != nullptr) { | |||
@@ -211,8 +205,7 @@ graphStatus OpDesc::AddInputDesc(uint32_t index, const ge::GeTensorDesc &input_d | |||
} | |||
graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { | |||
auto input_name_idx = GetAllInputName(); | |||
if (input_name_idx.find(name) != input_name_idx.end()) { | |||
if (input_name_idx_.find(name) != input_name_idx_.end()) { | |||
GELOGI("input %s is exist, update it", name.c_str()); | |||
graphStatus ret = UpdateInputDesc(name, input_desc); | |||
return ret; | |||
@@ -224,17 +217,15 @@ graphStatus OpDesc::AddInputDesc(const string &name, const ge::GeTensorDesc &inp | |||
return GRAPH_FAILED; | |||
} | |||
inputs_desc_.push_back(in_desc); | |||
(void)input_name_idx.insert(make_pair(name, index)); | |||
SetAllInputName(input_name_idx); | |||
(void)input_name_idx_.insert(make_pair(name, index)); | |||
return GRAPH_SUCCESS; | |||
} | |||
} | |||
graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int num, size_t index) { | |||
auto input_name_idx = GetAllInputName(); | |||
for (unsigned int i = 0; i < num; i++) { | |||
string input_name = name + std::to_string(i); | |||
GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED, | |||
GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED, | |||
"Add input tensor_desc is existed. name[%s]", input_name.c_str()); | |||
std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc()); | |||
@@ -251,24 +242,22 @@ graphStatus OpDesc::AddInputDescMiddle(const string &name, const unsigned int nu | |||
(void)inputs_desc_.insert(inputs_desc_.begin() + index + i, in_desc); | |||
// Update index in input_name_idx | |||
for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) { | |||
for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) { | |||
if (it->second >= (index + i)) { | |||
it->second += 1; | |||
} | |||
} | |||
(void)input_name_idx.insert(make_pair(input_name, i + index)); | |||
(void)input_name_idx_.insert(make_pair(input_name, i + index)); | |||
} | |||
SetAllInputName(input_name_idx); | |||
return GRAPH_SUCCESS; | |||
} | |||
graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int num) { | |||
auto input_name_idx = GetAllInputName(); | |||
for (unsigned int i = 0; i < num; i++) { | |||
string input_name = name + std::to_string(i); | |||
GE_CHK_BOOL_RET_STATUS((input_name_idx.find(input_name) == input_name_idx.end()), GRAPH_FAILED, | |||
GE_CHK_BOOL_RET_STATUS((input_name_idx_.find(input_name) == input_name_idx_.end()), GRAPH_FAILED, | |||
"Add input tensor_desc is existed. name[%s]", input_name.c_str()); | |||
std::shared_ptr<GeTensorDesc> in_desc = ComGraphMakeShared<GeTensorDesc>(GeTensorDesc()); | |||
@@ -279,13 +268,12 @@ graphStatus OpDesc::AddInputDescForward(const string &name, const unsigned int n | |||
(void)inputs_desc_.insert(inputs_desc_.begin(), in_desc); | |||
// Update index in input_name_idx | |||
for (auto it = input_name_idx.begin(); it != input_name_idx.end(); ++it) { | |||
for (auto it = input_name_idx_.begin(); it != input_name_idx_.end(); ++it) { | |||
it->second += 1; | |||
} | |||
(void)input_name_idx.insert(make_pair(input_name, 0)); | |||
(void)input_name_idx_.insert(make_pair(input_name, 0)); | |||
} | |||
SetAllInputName(input_name_idx); | |||
return GRAPH_SUCCESS; | |||
} | |||
@@ -316,19 +304,10 @@ graphStatus OpDesc::AddOutputDescForward(const string &name, const unsigned int | |||
graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorDesc &input_desc) { | |||
if (OpDesc::AddInputDesc(name, input_desc) == GRAPH_FAILED) return GRAPH_FAILED; | |||
vector<string> optional_input_names; | |||
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||
optional_input_names.push_back(name); | |||
(void)AttrUtils::SetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||
(void)optional_input_names_.insert(name); | |||
return GRAPH_SUCCESS; | |||
} | |||
std::vector<string> OpDesc::GetAllOptionalInputName() const { | |||
vector<string> optional_input_names; | |||
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||
return optional_input_names; | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | |||
OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | |||
GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); | |||
@@ -343,12 +322,11 @@ OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescMembersAreEqual(const OpDesc &r_op_desc) const { | |||
return ( | |||
IsEqual(this->GetAllInputName(), r_op_desc.GetAllInputName(), "OpDesc.GetAllInputName()") && | |||
IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") && | |||
IsEqual(this->GetAllOptionalInputName(), r_op_desc.GetAllOptionalInputName(), "OpDesc.GetAllOptionalInputName()") && | |||
IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") && | |||
IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_")); | |||
return (IsEqual(this->input_name_idx_, r_op_desc.input_name_idx_, "OpDesc.input_name_idx_") && | |||
IsEqual(this->output_name_idx_, r_op_desc.output_name_idx_, "OpDesc.output_name_idx_") && | |||
IsEqual(this->optional_input_names_, r_op_desc.optional_input_names_, "OpDesc.optional_input_names_") && | |||
IsEqual(this->engine_name_, r_op_desc.engine_name_, "OpDesc.engine_name_") && | |||
IsEqual(this->op_kernel_lib_name_, r_op_desc.op_kernel_lib_name_, "OpDesc.op_kernel_lib_name_")); | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::OpDescAttrsAreEqual(const OpDesc &r_op_desc) const { | |||
@@ -422,9 +400,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool OpDesc::operator==(const OpD | |||
} | |||
graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc &tensor_Desc) { | |||
auto input_name_idx = GetAllInputName(); | |||
auto it = input_name_idx.find(name); | |||
if (it == input_name_idx.end()) { | |||
auto it = input_name_idx_.find(name); | |||
if (it == input_name_idx_.end()) { | |||
GELOGW("Cann't find the input desc. name[%s]", name.c_str()); | |||
return GRAPH_FAILED; | |||
} | |||
@@ -444,9 +421,8 @@ graphStatus OpDesc::UpdateInputDesc(const string &name, const ge::GeTensorDesc & | |||
} | |||
bool OpDesc::InputIsSet(const string &name) const { | |||
auto input_name_idx = GetAllInputName(); | |||
auto it = input_name_idx.find(name); | |||
if (it != input_name_idx.end()) { | |||
auto it = input_name_idx_.find(name); | |||
if (it != input_name_idx_.end()) { | |||
GE_IF_BOOL_EXEC(it->second >= inputs_desc_.size(), GELOGE(GRAPH_FAILED, "it->second is invalid."); return false); | |||
auto tensor_desc = inputs_desc_[it->second]; | |||
GE_IF_BOOL_EXEC(tensor_desc == nullptr, GELOGE(GRAPH_FAILED, "tensor_desc is null."); return false); | |||
@@ -464,9 +440,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc OpDesc::GetInputDesc | |||
} | |||
GeTensorDesc OpDesc::GetInputDesc(const string &name) const { | |||
auto input_name_idx = GetAllInputName(); | |||
auto it = input_name_idx.find(name); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), GeTensorDesc()); | |||
auto it = input_name_idx_.find(name); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), GeTensorDesc()); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it->second < inputs_desc_.size(), GeTensorDesc()); | |||
return *(inputs_desc_[it->second].get()); | |||
} | |||
@@ -476,7 +451,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDescPtr OpDesc::MutableIn | |||
if (inputs_desc_[index] == nullptr) { | |||
return nullptr; | |||
} | |||
GE_CHK_BOOL_RET_STATUS(inputs_desc_[index]->IsValid() == GRAPH_SUCCESS, nullptr, "input desc is invalid"); | |||
if (inputs_desc_[index]->IsValid() != GRAPH_SUCCESS) { | |||
GELOGW("input desc is invalid"); | |||
return nullptr; | |||
} | |||
return inputs_desc_[index]; | |||
} | |||
@@ -491,12 +469,11 @@ GeTensorDescPtr OpDesc::MutableInputDesc(const string &name) const { | |||
} | |||
GE_FUNC_HOST_VISIBILITY OpDesc::Vistor<string> OpDesc::GetAllInputNames() const { | |||
auto input_name_idx = GetAllInputName(); | |||
vector<string> names; | |||
if (input_name_idx.empty()) { | |||
if (input_name_idx_.empty()) { | |||
return OpDesc::Vistor<string>(shared_from_this(), names); | |||
} | |||
for (std::pair<string, uint32_t> input : input_name_idx) { | |||
for (std::pair<string, uint32_t> input : input_name_idx_) { | |||
names.push_back(input.first); | |||
} | |||
return OpDesc::Vistor<string>(shared_from_this(), names); | |||
@@ -672,9 +649,8 @@ OpDesc::GetInputDescPtrDfault(uint32_t index) const { | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ConstGeTensorDescPtr OpDesc::GetInputDescPtr(const string &name) const { | |||
auto input_name_idx = GetAllInputName(); | |||
auto it = input_name_idx.find(name); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), shared_ptr<const GeTensorDesc>()); | |||
auto it = input_name_idx_.find(name); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), shared_ptr<const GeTensorDesc>()); | |||
return inputs_desc_[it->second]; | |||
} | |||
@@ -708,45 +684,12 @@ graphStatus OpDesc::AddDynamicOutputDesc(const string &name, const unsigned int | |||
} | |||
bool OpDesc::IsOptionalInput(const string &name) const { | |||
vector<string> optional_input_names; | |||
(void)AttrUtils::GetListStr(this, ATTR_NAME_OPT_INPUT, optional_input_names); | |||
for (auto &item : optional_input_names) { | |||
if (item == name) { | |||
return true; | |||
} | |||
} | |||
return false; | |||
return optional_input_names_.find(name) != optional_input_names_.end(); | |||
} | |||
bool OpDesc::IsOptionalInput(uint32_t index) const { return IsOptionalInput(GetInputNameByIndex(index)); } | |||
std::map<string, uint32_t> OpDesc::GetAllInputName() const { | |||
std::map<string, uint32_t> input_name_idx; | |||
std::vector<string> key; | |||
std::vector<uint32_t> value; | |||
(void)AttrUtils::GetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key); | |||
(void)AttrUtils::GetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value); | |||
if (key.size() != value.size()) { | |||
GE_LOGE("twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); | |||
} else { | |||
for (uint32_t i = 0; i < key.size(); ++i) { | |||
input_name_idx.insert(std::pair<string, uint32_t>(key.at(i), value.at(i))); | |||
} | |||
} | |||
return input_name_idx; | |||
} | |||
void OpDesc::SetAllInputName(const std::map<string, uint32_t> &input_name_idx) { | |||
std::vector<string> key; | |||
std::vector<uint32_t> value; | |||
for (auto &item : input_name_idx) { | |||
key.emplace_back(item.first); | |||
value.emplace_back(item.second); | |||
} | |||
(void)AttrUtils::SetListStr(this, ATTR_NAME_INPUT_NAME_IDX_KEY, key); | |||
(void)AttrUtils::SetListInt(this, ATTR_NAME_INPUT_NAME_IDX_VALUE, value); | |||
} | |||
std::map<string, uint32_t> OpDesc::GetAllInputName() const { return input_name_idx_; } | |||
std::map<string, uint32_t> OpDesc::GetAllOutputName() { return output_name_idx_; } | |||
@@ -757,7 +700,6 @@ bool OpDesc::UpdateInputName(std::map<string, uint32_t> input_name_idx) { | |||
auto factory_map_size = input_name_idx.size(); | |||
// It indicates that some inputs have no optionalname. | |||
// The redundant optionalname of factory needs to be deleted and then assigned | |||
auto all_input_name_idx = GetAllInputName(); | |||
if (input_map_size < factory_map_size) { | |||
GELOGI("UpdateInputName org inputname map size: %zu, factory inputname map size: %zu", input_map_size, | |||
factory_map_size); | |||
@@ -770,18 +712,17 @@ bool OpDesc::UpdateInputName(std::map<string, uint32_t> input_name_idx) { | |||
} | |||
if (input_name_idx.size() == input_map_size) { | |||
GELOGI("UpdateInputName"); | |||
all_input_name_idx = input_name_idx; | |||
input_name_idx_ = input_name_idx; | |||
} else { | |||
ret = false; | |||
GELOGW("after UpdateInputName factoryName map size : %zu", input_name_idx.size()); | |||
} | |||
} else if (input_map_size == factory_map_size) { | |||
all_input_name_idx = input_name_idx; | |||
input_name_idx_ = input_name_idx; | |||
} else { | |||
ret = false; | |||
GELOGW("org inputname map size: %zu, factory inputname map size: %zu", input_map_size, factory_map_size); | |||
} | |||
SetAllInputName(all_input_name_idx); | |||
return ret; | |||
} | |||
@@ -924,21 +865,19 @@ graphStatus OpDesc::CommonVerify() const { | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetInputNameByIndex(uint32_t index) const { | |||
auto input_name_idx = GetAllInputName(); | |||
auto it = input_name_idx.begin(); | |||
for (; it != input_name_idx.end(); ++it) { | |||
auto it = input_name_idx_.begin(); | |||
for (; it != input_name_idx_.end(); ++it) { | |||
if (it->second == index) { | |||
break; | |||
} | |||
} | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx.end(), ""); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != input_name_idx_.end(), ""); | |||
return it->first; | |||
} | |||
int OpDesc::GetInputIndexByName(const string &name) const { | |||
auto input_name_idx = GetAllInputName(); | |||
auto it_find = input_name_idx.find(name); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx.end(), -1); | |||
auto it_find = input_name_idx_.find(name); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != input_name_idx_.end(), -1); | |||
return static_cast<int>(it_find->second); | |||
} | |||
@@ -1231,12 +1170,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY vector<bool> OpDesc::GetIsInputCo | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::RestoreInputNameIdx(const string &name, | |||
const int &index) { | |||
auto input_name_idx = GetAllInputName(); | |||
if (input_name_idx.find(name) != input_name_idx.end()) { | |||
if (input_name_idx_.find(name) != input_name_idx_.end()) { | |||
GELOGI("Restore input name index is existed. name[%s]", name.c_str()); | |||
} | |||
(void)input_name_idx.insert(make_pair(name, index)); | |||
SetAllInputName(input_name_idx); | |||
(void)input_name_idx_.insert(make_pair(name, index)); | |||
return GRAPH_SUCCESS; | |||
} | |||
@@ -170,6 +170,7 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile( | |||
// data_nodes has been sorted | |||
// for while, input num must be same as output num | |||
auto input_num = root_node->GetAllInDataAnchorsSize(); | |||
NodePtr netoutput = nullptr; | |||
size_t ref_i = 0; | |||
while (ref_i < input_num) { | |||
@@ -212,10 +213,44 @@ graphStatus RefRelations::Impl::BuildRefRelationsForWhile( | |||
cell_netoutput_in.in_out = NODE_IN; | |||
cell_netoutput_in.in_out_idx = ele.second; | |||
ref_i_all_refs.emplace_back(cell_netoutput_in); | |||
netoutput = ele.first; | |||
} | |||
node_refs.emplace_back(ref_i_all_refs); | |||
ref_i++; | |||
} | |||
/* There exist scene like the follows, it means data0 data1 netoutput 0'th | |||
* and 1'th tensor should be the same addr. | |||
* Data0 Data1 | |||
* \/ | |||
* /\ | |||
* netoutput | |||
*/ | |||
if (netoutput == nullptr) { | |||
return GRAPH_SUCCESS; | |||
} | |||
for (const auto &in_anchor : netoutput->GetAllInDataAnchors()) { | |||
auto peer_out_data_anchor = in_anchor->GetPeerOutAnchor(); | |||
if (peer_out_data_anchor == nullptr) { | |||
continue; | |||
} | |||
auto peer_out_data_node = peer_out_data_anchor->GetOwnerNode(); | |||
if (peer_out_data_node == nullptr || peer_out_data_node->GetOpDesc() == nullptr) { | |||
GELOGW("Node[%s]\'s peer_out_data_node or peer_out_data_node desc is null", (netoutput->GetName()).c_str()); | |||
continue; | |||
} | |||
if (peer_out_data_node->GetType() != DATA) { | |||
continue; | |||
} | |||
auto in_data_anchor_idx = in_anchor->GetIdx(); | |||
auto net_in_desc = netoutput->GetOpDesc()->MutableInputDesc(static_cast<uint32_t>(in_data_anchor_idx)); | |||
int ref_d; | |||
int ref_n; | |||
(void)AttrUtils::GetInt(peer_out_data_node->GetOpDesc(), kRefIndex, ref_d); | |||
(void)AttrUtils::GetInt(net_in_desc, kRefIndex, ref_n); | |||
node_refs[ref_d].insert(node_refs[ref_d].end(), node_refs[ref_n].begin(), node_refs[ref_n].end()); | |||
node_refs[ref_n].insert(node_refs[ref_n].end(), node_refs[ref_d].begin(), node_refs[ref_d].end()); | |||
} | |||
return GRAPH_SUCCESS; | |||
} | |||
@@ -49,10 +49,6 @@ graphStatus ReverseBrushWhileBodySubGraph(const ConstNodePtr &node) { | |||
} | |||
for (const auto &node_sub : sub_graph_body->GetAllNodes()) { | |||
if (node_sub->GetInDataNodes().size() == 0) { | |||
continue; | |||
} | |||
for (size_t i = 0; i < node_sub->GetAllInDataAnchorsSize(); i++) { | |||
auto input_desc = node_sub->GetOpDesc()->MutableInputDesc(i); | |||
(void)input_desc->SetUnknownDimNumShape(); | |||
@@ -303,11 +299,11 @@ graphStatus UpdateParentNodeOutTensor(const ConstNodePtr &node) { | |||
} | |||
} // namespace | |||
void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase) { | |||
if (node == nullptr) { | |||
GELOGE(GRAPH_FAILED, "node is null"); | |||
if (!IsLogEnable(GE, DLOG_DEBUG)) { | |||
return; | |||
} | |||
if (!IsLogEnable(GE, DLOG_DEBUG)) { | |||
if (node == nullptr) { | |||
GELOGE(GRAPH_FAILED, "node is null"); | |||
return; | |||
} | |||
ge::OpDescPtr op_desc = node->GetOpDesc(); | |||
@@ -325,6 +321,18 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str | |||
TypeUtils::FormatToSerialString(input_desc->GetFormat()) + " "; | |||
} | |||
str += input_desc_str; | |||
input_desc_str = "input origin shape: "; | |||
for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { | |||
input_desc_str += "["; | |||
for (int64_t dim : input_desc->GetOriginShape().GetDims()) { | |||
input_desc_str += std::to_string(dim) + " "; | |||
} | |||
input_desc_str += "]"; | |||
input_desc_str += ":" + TypeUtils::DataTypeToSerialString(input_desc->GetOriginDataType()) + ":" + | |||
TypeUtils::FormatToSerialString(input_desc->GetOriginFormat()) + " "; | |||
} | |||
str += input_desc_str; | |||
} | |||
if (op_desc->GetAllOutputsDescSize() != 0) { | |||
@@ -342,6 +350,21 @@ void ShapeRefiner::PrintInOutTensorShape(const ge::NodePtr &node, const std::str | |||
TypeUtils::FormatToSerialString(output_desc->GetFormat()) + " "; | |||
} | |||
str += output_desc_str; | |||
output_desc_str = "output origin shape: "; | |||
for (const auto &output_desc : op_desc->GetAllOutputsDescPtr()) { | |||
if (output_desc == nullptr) { | |||
continue; | |||
} | |||
output_desc_str += "["; | |||
for (int64_t dim : output_desc->GetOriginShape().GetDims()) { | |||
output_desc_str += std::to_string(dim) + " "; | |||
} | |||
output_desc_str += "]"; | |||
output_desc_str += ":" + TypeUtils::DataTypeToSerialString(output_desc->GetOriginDataType()) + ":" + | |||
TypeUtils::FormatToSerialString(output_desc->GetOriginFormat()) + " "; | |||
} | |||
str += output_desc_str; | |||
} | |||
GELOGD("Shape dump [%s], Node name: [%s]. %s", phase.c_str(), node->GetName().c_str(), str.c_str()); | |||
} | |||
@@ -362,7 +385,6 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & | |||
return ret; | |||
} | |||
} | |||
// Get infer func and execute | |||
ret = op_desc->CallInferFunc(op); | |||
if (ret == GRAPH_PARAM_INVALID) { | |||
@@ -479,19 +501,20 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh | |||
GELOGE(GRAPH_FAILED, "Verifying %s failed.", node->GetName().c_str()); | |||
return GRAPH_FAILED; | |||
} | |||
PrintInOutTensorShape(node, "before_infershape"); | |||
Operator op = OpDescUtils::CreateOperatorFromNode(node); | |||
auto inference_context = CreateInferenceContext(context_map, node); | |||
if (inference_context == nullptr) { | |||
GELOGE(GRAPH_FAILED, "inference context is null"); | |||
return GRAPH_FAILED; | |||
bool is_unknown_graph = node->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||
if (!is_unknown_graph) { | |||
auto inference_context = CreateInferenceContext(context_map, node); | |||
if (inference_context == nullptr) { | |||
GELOGE(GRAPH_FAILED, "inference context is null"); | |||
return GRAPH_FAILED; | |||
} | |||
GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size()); | |||
op.SetInferenceContext(inference_context); | |||
} | |||
GELOGD("create context for node:%s, marks %zu", node->GetName().c_str(), inference_context->GetMarks().size()); | |||
PrintInOutTensorShape(node, "before_infershape"); | |||
Operator op = OpDescUtils::CreateOperatorFromNode(node); | |||
op.SetInferenceContext(inference_context); | |||
graphStatus status = InferShapeAndType(node, op, before_subgraph); | |||
if (status == GRAPH_PARAM_INVALID || status == GRAPH_SUCCESS) { | |||
(void)ge::NodeUtils::UpdatePeerNodeInputDesc(node); | |||
@@ -499,16 +522,17 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferSh | |||
GELOGE(GRAPH_FAILED, "%s call infer function failed.", node->GetName().c_str()); | |||
return GRAPH_FAILED; | |||
} | |||
auto ctx_after_infer = op.GetInferenceContext(); | |||
if (ctx_after_infer != nullptr) { | |||
GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); | |||
if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) { | |||
GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); | |||
(void)context_map.emplace(node, ctx_after_infer); | |||
if (!is_unknown_graph) { | |||
auto ctx_after_infer = op.GetInferenceContext(); | |||
if (ctx_after_infer != nullptr) { | |||
GELOGD("[%s] after infershape. mark:%zu", node->GetName().c_str(), ctx_after_infer->GetMarks().size()); | |||
if (!ctx_after_infer->GetOutputHandleShapesAndTypes().empty() || !ctx_after_infer->GetMarks().empty()) { | |||
GELOGD("[%s] set inference context after. mark:%zu", node->GetName().c_str(), | |||
ctx_after_infer->GetMarks().size()); | |||
(void)context_map.emplace(node, ctx_after_infer); | |||
} | |||
} | |||
} | |||
PrintInOutTensorShape(node, "after_infershape"); | |||
return GRAPH_SUCCESS; | |||
@@ -1,18 +1,18 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
*/ | |||
#ifndef COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ | |||
#define COMMON_GRAPH_UTILS_GE_IR_UTILS_H_ | |||
@@ -295,14 +295,16 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||
if (op_desc == nullptr) { | |||
return GRAPH_FAILED; | |||
} | |||
bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||
if (is_unknown_graph) { | |||
return GRAPH_SUCCESS; | |||
} | |||
for (const auto &out_anchor : node_ptr->GetAllOutDataAnchors()) { | |||
auto output_tensor = op_desc->MutableOutputDesc(out_anchor->GetIdx()); | |||
ge::TensorUtils::SetRealDimCnt(*output_tensor, static_cast<uint32_t>(output_tensor->GetShape().GetDims().size())); | |||
bool is_unknown_graph = node_ptr->GetOwnerComputeGraph()->GetGraphUnknownFlag(); | |||
if (!is_unknown_graph) { | |||
output_tensor->SetOriginShape(output_tensor->GetShape()); | |||
output_tensor->SetOriginDataType(output_tensor->GetDataType()); | |||
} | |||
output_tensor->SetOriginShape(output_tensor->GetShape()); | |||
output_tensor->SetOriginDataType(output_tensor->GetDataType()); | |||
GELOGD("node name is %s, origin shape is %ld, origin format is %s, origin data type is %s", | |||
node_ptr->GetName().c_str(), output_tensor->GetOriginShape().GetShapeSize(), | |||
TypeUtils::FormatToSerialString(output_tensor->GetOriginFormat()).c_str(), | |||
@@ -321,8 +323,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||
GELOGI("Peer input opdesc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", | |||
peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), | |||
output_tensor->GetDataType(), output_tensor->GetOriginDataType()); | |||
peer_input_desc->SetShape(output_tensor->GetShape()); | |||
peer_input_desc->SetOriginShape(output_tensor->GetOriginShape()); | |||
peer_input_desc->SetShape(output_tensor->GetShape()); | |||
peer_input_desc->SetDataType(output_tensor->GetDataType()); | |||
peer_input_desc->SetOriginDataType(output_tensor->GetOriginDataType()); | |||
std::vector<std::pair<int64_t, int64_t>> shape_range; | |||
@@ -337,6 +339,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus NodeUtils::UpdatePeer | |||
} | |||
return GRAPH_SUCCESS; | |||
} | |||
bool NodeUtils::IsInNodesEmpty(const Node &node) { | |||
for (const auto &in_anchor : node.in_data_anchors_) { | |||
if (in_anchor != nullptr) { | |||
@@ -446,6 +449,7 @@ std::string NodeUtils::GetNodeType(const Node &node) { | |||
(void)AttrUtils::GetStr(node.GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type); | |||
return type; | |||
} | |||
ComputeGraphPtr NodeUtils::GetSubgraph(const Node &node, uint32_t index) { | |||
auto op_desc = node.GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
@@ -498,6 +502,14 @@ bool NodeUtils::IsSubgraphInput(const NodePtr &node) { | |||
return false; | |||
} | |||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | |||
bool is_unknown_shape = false; | |||
(void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||
if (is_unknown_shape) return false; | |||
} | |||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && | |||
kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && | |||
kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { | |||
return false; | |||
} | |||
@@ -519,7 +531,16 @@ bool NodeUtils::IsSubgraphOutput(const NodePtr &node) { | |||
if (parent_op_desc == nullptr) { | |||
return false; | |||
} | |||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE)) { | |||
bool is_unknown_shape = false; | |||
(void)AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape); | |||
if (is_unknown_shape) return false; | |||
} | |||
if (AttrUtils::HasAttr(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE) && | |||
kCaseOpTypes.count(parent_op_desc->GetType()) == 0 && kWhileOpTypes.count(parent_op_desc->GetType()) == 0 && | |||
kForOpTypes.count(parent_op_desc->GetType()) == 0 && kIfOpTypes.count(parent_op_desc->GetType()) == 0) { | |||
return false; | |||
} | |||
@@ -95,7 +95,18 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
"graph/load/new_model_manager/task_info/task_info.cc" | |||
"graph/manager/*.cc" | |||
"graph/manager/graph_caching_allocator.cc" | |||
"graph/manager/graph_context.cc" | |||
"graph/manager/graph_manager.cc" | |||
"graph/manager/graph_manager_utils.cc" | |||
"graph/manager/graph_mem_allocator.cc" | |||
"graph/manager/graph_var_manager.cc" | |||
"graph/manager/model_manager/event_manager.cc" | |||
"graph/manager/trans_var_data_utils.cc" | |||
"graph/manager/util/debug.cc" | |||
"graph/manager/util/hcom_util.cc" | |||
"graph/manager/util/rt_context_util.cc" | |||
"graph/manager/util/variable_accelerate_ctrl.cc" | |||
"graph/manager/model_manager/event_manager.cc" | |||
"graph/manager/util/debug.cc" | |||
"graph/manager/util/hcom_util.cc" | |||
@@ -240,7 +251,17 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||
"graph/load/new_model_manager/task_info/task_info.cc" | |||
"graph/manager/*.cc" | |||
"graph/manager/graph_caching_allocator.cc" | |||
"graph/manager/graph_context.cc" | |||
"graph/manager/graph_manager.cc" | |||
"graph/manager/graph_manager_utils.cc" | |||
"graph/manager/graph_mem_allocator.cc" | |||
"graph/manager/graph_var_manager.cc" | |||
"graph/manager/model_manager/event_manager.cc" | |||
"graph/manager/trans_var_data_utils.cc" | |||
"graph/manager/util/debug.cc" | |||
"graph/manager/util/rt_context_util.cc" | |||
"graph/manager/util/variable_accelerate_ctrl.cc" | |||
"graph/manager/model_manager/event_manager.cc" | |||
"graph/manager/util/debug.cc" | |||
"graph/manager/util/rt_context_util.cc" | |||
@@ -54,6 +54,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"helper/om_file_helper.cc" | |||
"math/fp16_math.cc" | |||
"model_parser/base.cc" | |||
# "model_parser/graph_parser_util.cc" | |||
"model_saver.cc" | |||
"op/attr_value_util.cc" | |||
"op/ge_op_utils.cc" | |||
@@ -21,7 +21,6 @@ | |||
#include <sstream> | |||
#include <string> | |||
#include <vector> | |||
#include "external/graph/types.h" | |||
#include "graph/ge_tensor.h" | |||
@@ -182,7 +182,7 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { | |||
} | |||
void TBEPluginManager::LoadCustomOpLib() { | |||
LoadPluginSo(); | |||
LoadPluginSo(options_); | |||
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | |||
GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | |||
@@ -193,10 +193,13 @@ void TBEPluginManager::LoadCustomOpLib() { | |||
} | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo( | |||
const std::map<string, string> &options) { | |||
vector<string> file_list; | |||
string caffe_parser_path; | |||
std::string plugin_path; | |||
options_ = options; | |||
GetCustomOpPath(plugin_path); | |||
// Whether there are files in the plugin so path | |||
@@ -48,7 +48,7 @@ class TBEPluginManager { | |||
static void InitPreparation(const std::map<string, string> &options); | |||
void LoadPluginSo(); | |||
void LoadPluginSo(const std::map<string, string> &options); | |||
private: | |||
TBEPluginManager() = default; | |||
@@ -36,6 +36,7 @@ GE_COMMON_LOCAL_SRC_FILES := \ | |||
properties_manager.cc \ | |||
types.cc\ | |||
model_parser/base.cc \ | |||
model_parser/graph_parser_util.cc \ | |||
tbe_kernel_store.cc \ | |||
op/attr_value_util.cc \ | |||
op/ge_op_utils.cc \ | |||
@@ -91,9 +91,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
} | |||
auto ge_model_weight = ge_model->GetWeight(); | |||
GELOGI("WEIGHTS_DATA size is %zu , %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); | |||
if (SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, ge_model_weight.GetData(), | |||
ge_model_weight.GetSize()) != SUCCESS) { | |||
GELOGW("Add weight partition failed"); // weight is not necessary | |||
// weight is not necessary | |||
if (ge_model_weight.GetSize() > 0) { | |||
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, | |||
ge_model_weight.GetData(), ge_model_weight.GetSize()), | |||
"Add weight partition failed"); | |||
} | |||
TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); | |||
@@ -239,45 +241,48 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(const ge::ModelData &model_data) { | |||
if (model_data.model_data == nullptr || model_data.model_len == 0) { | |||
GELOGE(FAILED, "Model_data is nullptr, or model_data_size is 0"); | |||
return FAILED; | |||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "Model_data is nullptr, or model_data_size is 0"); | |||
return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
} | |||
if (is_assign_model_) { | |||
GELOGE(FAILED, "Model helper has already loaded!"); | |||
return FAILED; | |||
GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||
return GE_EXEC_LOAD_MODEL_REPEATED; | |||
} | |||
if (ReleaseLocalModelData() != SUCCESS) { | |||
GELOGE(FAILED, "ReleaseLocalModelData failed."); | |||
return FAILED; | |||
GELOGE(INTERNAL_ERROR, "ReleaseLocalModelData failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | |||
if (ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_) != SUCCESS) { | |||
GELOGE(FAILED, "Parse model content failed!"); | |||
return FAILED; | |||
GELOGE(status, "Parse model content failed!"); | |||
return status; | |||
} | |||
file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | |||
OmFileLoadHelper om_load_helper; | |||
if (om_load_helper.Init(model_addr_tmp_, model_len_tmp_) != SUCCESS) { | |||
GELOGE(FAILED, "Om_load_helper init failed"); | |||
status = om_load_helper.Init(model_addr_tmp_, model_len_tmp_); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "Om_load_helper init failed"); | |||
model_addr_tmp_ = nullptr; | |||
return FAILED; | |||
return status; | |||
} | |||
auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_addr_tmp_); | |||
if (partition_table->num == kOriginalOmPartitionNum) { | |||
model_addr_tmp_ = nullptr; | |||
GELOGE(FAILED, "om model is error,please use executable om model"); | |||
return FAILED; | |||
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model"); | |||
return GE_EXEC_MODEL_PARTITION_NUM_INVALID; | |||
} | |||
// Encrypt model need to del temp model/no encrypt model don't need to del model | |||
model_addr_tmp_ = nullptr; | |||
if (GenerateGeModel(om_load_helper) != SUCCESS) { | |||
GELOGE(FAILED, "GenerateGeModel failed"); | |||
return FAILED; | |||
status = GenerateGeModel(om_load_helper); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "GenerateGeModel failed"); | |||
return status; | |||
} | |||
is_assign_model_ = true; | |||
@@ -289,19 +294,19 @@ Status ModelHelper::GenerateGeModel(OmFileLoadHelper &om_load_helper) { | |||
GE_CHECK_NOTNULL(model_); | |||
Status ret = LoadModelData(om_load_helper); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||
} | |||
ret = LoadWeights(om_load_helper); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||
} | |||
ret = LoadTask(om_load_helper); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||
} | |||
ret = LoadTBEKernelStore(om_load_helper); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -41,8 +41,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(c | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(uint8_t *model_data, | |||
const uint32_t model_data_size) { | |||
if (LoadModelPartitionTable(model_data, model_data_size) != SUCCESS) { | |||
return FAILED; | |||
Status status = LoadModelPartitionTable(model_data, model_data_size); | |||
if (status != SUCCESS) { | |||
return status; | |||
} | |||
is_inited_ = true; | |||
return SUCCESS; | |||
@@ -66,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod | |||
} | |||
if (!found) { | |||
if (type != ModelPartitionType::TBE_KERNELS) { | |||
if (type != ModelPartitionType::TBE_KERNELS && type != ModelPartitionType::WEIGHTS_DATA) { | |||
GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast<int>(type)); | |||
return FAILED; | |||
} | |||
@@ -83,7 +84,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { | |||
// Model length too small | |||
if (model.model_len < (sizeof(ModelFileHeader) + sizeof(ModelPartitionTable))) { | |||
GELOGE(PARAM_INVALID, "Invalid model. length < sizeof(ModelFileHeader) + sizeof(ModelPartitionTable)."); | |||
GELOGE(PARAM_INVALID, | |||
"Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].", | |||
model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable)); | |||
return PARAM_INVALID; | |||
} | |||
@@ -93,9 +96,9 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { | |||
if ((model_header->length != model.model_len - sizeof(ModelFileHeader)) || | |||
(MODEL_FILE_MAGIC_NUM != model_header->magic)) { | |||
GELOGE(PARAM_INVALID, | |||
"Invalid model. file_header->length(%u) + sizeof(ModelFileHeader)(%zu) != model->model_len(%u) || " | |||
"MODEL_FILE_MAGIC_NUM != file_header->magic", | |||
model_header->length, sizeof(ModelFileHeader), model.model_len); | |||
"Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " | |||
"MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", | |||
model_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, model_header->magic); | |||
return PARAM_INVALID; | |||
} | |||
return SUCCESS; | |||
@@ -112,16 +115,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||
// Original model partition include graph-info | |||
if ((partition_table->num != PARTITION_SIZE) && (partition_table->num != (PARTITION_SIZE - 1)) && | |||
(partition_table->num != 1)) { | |||
GELOGE(PARAM_INVALID, "Invalid partition_table->num:%u", partition_table->num); | |||
return PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "Invalid partition_table->num:%u", partition_table->num); | |||
return GE_EXEC_MODEL_PARTITION_NUM_INVALID; | |||
} | |||
size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | |||
GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||
partition_table->num, sizeof(ModelFileHeader), mem_offset); | |||
if (model_data_size <= mem_offset) { | |||
GELOGE(PARAM_INVALID, "invalid model data, partition_table->num:%u, model data size %u", partition_table->num, | |||
model_data_size); | |||
return PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||
partition_table->num, model_data_size); | |||
return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
} | |||
for (uint32_t i = 0; i < partition_table->num; i++) { | |||
ModelPartition partition; | |||
@@ -131,9 +134,9 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||
context_.partition_datas_.push_back(partition); | |||
if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { | |||
GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.", | |||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||
partition.size + mem_offset, model_data_size); | |||
return PARAM_INVALID; | |||
return GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
} | |||
mem_offset += partition.size; | |||
GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||
@@ -35,15 +35,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||
ge::ModelData &model_data) { | |||
std::string real_path = RealPath(model_path); | |||
if (real_path.empty()) { | |||
GELOGE(PARAM_INVALID, "Model file path '%s' is invalid", model_path); | |||
return PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||
return GE_EXEC_MODEL_PATH_INVALID; | |||
} | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return FAILED, "File size not valid."); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(model_path) == -1, return GE_EXEC_READ_MODEL_FILE_FAILED, | |||
"File size not valid."); | |||
std::ifstream fs(real_path.c_str(), std::ifstream::binary); | |||
GE_CHK_BOOL_RET_STATUS(fs.is_open(), FAILED, "Open file failed! path:%s", model_path); | |||
GE_CHK_BOOL_RET_STATUS(fs.is_open(), GE_EXEC_READ_MODEL_FILE_FAILED, "Open file failed! path:%s", model_path); | |||
// get length of file: | |||
(void)fs.seekg(0, std::ifstream::end); | |||
@@ -55,7 +56,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||
char *data = new (std::nothrow) char[len]; | |||
if (data == nullptr) { | |||
GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%ld)", len); | |||
GELOGE(MEMALLOC_FAILED, "Load model From file failed, bad memory allocation occur. (need:%u)", len); | |||
return MEMALLOC_FAILED; | |||
} | |||
@@ -79,31 +80,33 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo | |||
GE_CHECK_NOTNULL(model.model_data); | |||
// Model length too small | |||
GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), PARAM_INVALID, | |||
"Invalid model. length < sizeof(ModelFileHeader)."); | |||
GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||
"Invalid model. Model data size %u must be greater than or equal to %zu.", model.model_len, | |||
sizeof(ModelFileHeader)); | |||
// Get file header | |||
auto file_header = reinterpret_cast<ModelFileHeader *>(model.model_data); | |||
// Determine whether the file length and magic number match | |||
GE_CHK_BOOL_RET_STATUS( | |||
file_header->length == model.model_len - sizeof(ModelFileHeader) && file_header->magic == MODEL_FILE_MAGIC_NUM, | |||
PARAM_INVALID, | |||
"Invalid model. file_header->length + sizeof(ModelFileHeader) != model->model_len || MODEL_FILE_MAGIC_NUM != " | |||
"file_header->magic"); | |||
GE_EXEC_MODEL_DATA_SIZE_INVALID, | |||
"Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " | |||
"MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", | |||
file_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, file_header->magic); | |||
Status res = SUCCESS; | |||
// Get data address | |||
uint8_t *data = reinterpret_cast<uint8_t *>(model.model_data) + sizeof(ModelFileHeader); | |||
if (file_header->is_encrypt == ModelEncryptType::UNENCRYPTED) { // Unencrypted model | |||
GE_CHK_BOOL_RET_STATUS(model.key.empty(), PARAM_INVALID, | |||
GE_CHK_BOOL_RET_STATUS(model.key.empty(), GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, | |||
"Invalid param. model is unencrypted, but key is not empty."); | |||
model_data = data; | |||
model_len = file_header->length; | |||
GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); | |||
} else { | |||
GELOGE(PARAM_INVALID, "Invalid model. ModelEncryptType not supported."); | |||
res = PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported."); | |||
res = GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION; | |||
} | |||
return res; | |||
@@ -0,0 +1,483 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "graph_parser_util.h" | |||
#include <memory> | |||
#include "common/auth/file_saver.h" | |||
#include "common/convert/pb2json.h" | |||
#include "common/debug/log.h" | |||
#include "common/debug/memory_dumper.h" | |||
#include "common/model_parser/base.h" | |||
#include "common/model_saver.h" | |||
#include "common/properties_manager.h" | |||
#include "common/string_util.h" | |||
#include "common/types.h" | |||
#include "common/util.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/omg/parser/parser_inner_ctx.h" | |||
#include "graph/compute_graph.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/optimize/common/params.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "omg/omg_inner_types.h" | |||
#include "omg/parser/model_parser.h" | |||
#include "omg/parser/parser_factory.h" | |||
#include "omg/parser/weights_parser.h" | |||
#include "parser/common/pre_checker.h" | |||
#include "proto/ge_ir.pb.h" | |||
#include "register/op_registry.h" | |||
#include "external/register/register_types.h" | |||
namespace ge { | |||
namespace { | |||
// The function is incomplete. Currently, only l2_optimize, off_optimize is supported. | |||
const char *const kInputShapeSample1 = "\"input_name1:n1,c1,h1,w1\""; | |||
const char *const kInputShapeSample2 = "\"input_name1:1,3,224,224\""; | |||
const char *const kSplitError1 = "size not equal to 2 split by \":\""; | |||
const char *const kEmptyError = "can not be empty"; | |||
const char *const kFloatNumError = "exist float number"; | |||
const char *const kDigitError = "is not digit"; | |||
vector<string> SplitInputShape(const std::string &input_shape) { | |||
vector<string> shape_pair_vec; | |||
size_t pos = input_shape.rfind(":"); | |||
if (pos != std::string::npos) { | |||
shape_pair_vec.emplace_back(input_shape.substr(0, pos)); | |||
shape_pair_vec.emplace_back(input_shape.substr(pos + 1, input_shape.size() - pos)); | |||
} | |||
return shape_pair_vec; | |||
} | |||
static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | |||
{"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; | |||
static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) { | |||
if ((s == "true") || (s == "false")) { | |||
return true; | |||
} else { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, {atc_param, s}); | |||
GELOGE(PARAM_INVALID, "Input parameter[--%s]'s value[%s] must be true or false.", atc_param.c_str(), s.c_str()); | |||
return false; | |||
} | |||
} | |||
bool CheckDigitStr(std::string &str) { | |||
for (char c : str) { | |||
if (!isdigit(c)) { | |||
GELOGE(domi::FAILED, "value[%s] is not positive integer", str.c_str()); | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
Status StringToInt(std::string &str, int32_t &value) { | |||
try { | |||
if (!CheckDigitStr(str)) { | |||
GELOGE(PARAM_INVALID, "Invalid of digit string: %s ", str.c_str()); | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); | |||
return PARAM_INVALID; | |||
} | |||
value = stoi(str); | |||
} catch (std::invalid_argument &) { | |||
GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch invalid_argument.", str.c_str()); | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"output_type", str}); | |||
return PARAM_INVALID; | |||
} catch (std::out_of_range &) { | |||
GELOGE(PARAM_INVALID, "Invalid of digit string: %s, catch out_of_range.", str.c_str()); | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"output_type", str}); | |||
return PARAM_INVALID; | |||
} | |||
return SUCCESS; | |||
} | |||
Status VerifyOutputTypeAndOutNodes(std::vector<std::string> &out_type_vec) { | |||
std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes; | |||
std::set<std::string> out_nodes_info; | |||
for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { | |||
// out_nodes set should include output_type and output_format | |||
std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second); | |||
out_nodes_info.emplace(tmp); | |||
} | |||
for (uint32_t i = 0; i < out_type_vec.size(); ++i) { | |||
if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10059", {"value"}, {out_type_vec[i]}); | |||
GELOGE(domi::FAILED, "Can not find this node (%s) in out_nodes.", out_type_vec[i].c_str()); | |||
return domi::FAILED; | |||
} | |||
} | |||
return domi::SUCCESS; | |||
} | |||
Status ParseOutputType(const std::string &output_type, std::map<std::string, vector<uint32_t>> &out_type_index_map, | |||
std::map<std::string, vector<ge::DataType>> &out_type_dt_map) { | |||
if (output_type.find(':') == std::string::npos) { | |||
GELOGI("output_type is not multiple nodes, means all out nodes"); | |||
auto it = output_type_str_to_datatype.find(output_type); | |||
if (it == output_type_str_to_datatype.end()) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {output_type}); | |||
GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], only support DT_FLOAT, DT_FLOAT16, DT_UINT8!!", | |||
output_type.c_str()); | |||
return domi::FAILED; | |||
} | |||
return domi::SUCCESS; | |||
} | |||
std::vector<std::string> out_type_vec; | |||
vector<string> nodes_v = StringUtils::Split(output_type, ';'); | |||
for (const string &node : nodes_v) { | |||
vector<string> node_index_type_v = StringUtils::Split(node, ':'); | |||
if (node_index_type_v.size() != 3) { // The size must be 3. | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10058", {"value"}, {node}); | |||
GELOGE(PARAM_INVALID, | |||
"The param of output_type is invalid, the correct format is [opname:index:dtype]," | |||
"while the actual input is %s.", | |||
node.c_str()); | |||
return domi::FAILED; | |||
} | |||
ge::DataType tmp_dt; | |||
std::string node_name = StringUtils::Trim(node_index_type_v[0]); | |||
std::string index_str = StringUtils::Trim(node_index_type_v[1]); | |||
int32_t index; | |||
if (StringToInt(index_str, index) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s.", index_str.c_str()); | |||
return domi::FAILED; | |||
} | |||
std::string dt_value = StringUtils::Trim(node_index_type_v[2]); | |||
auto it = output_type_str_to_datatype.find(dt_value); | |||
if (it == output_type_str_to_datatype.end()) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {dt_value}); | |||
GELOGE(ge::PARAM_INVALID, "output_type [%s] is invalid.", dt_value.c_str()); | |||
return domi::FAILED; | |||
} else { | |||
tmp_dt = it->second; | |||
} | |||
out_type_vec.push_back(node_name + ":" + index_str); | |||
auto it_index = out_type_index_map.find(node_name); | |||
if (it_index == out_type_index_map.end()) { | |||
vector<uint32_t> tmp_vec; | |||
tmp_vec.push_back(index); | |||
out_type_index_map.emplace(node_name, tmp_vec); | |||
} else { | |||
it_index->second.push_back(index); | |||
} | |||
auto it_dt = out_type_dt_map.find(node_name); | |||
if (it_dt == out_type_dt_map.end()) { | |||
vector<ge::DataType> tmp_vec; | |||
tmp_vec.push_back(tmp_dt); | |||
out_type_dt_map.emplace(node_name, tmp_vec); | |||
} else { | |||
it_dt->second.push_back(tmp_dt); | |||
} | |||
} | |||
return VerifyOutputTypeAndOutNodes(out_type_vec); | |||
} | |||
Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { | |||
if (op_desc->GetType() == DATA) { | |||
GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", op_desc->GetName().c_str()); | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10068", {"parameter", "value", "reason"}, | |||
{"out_nodes", op_desc->GetName(), "it can not be set input data"}); | |||
return domi::FAILED; | |||
} | |||
int32_t out_size = op_desc->GetOutputsSize(); | |||
if (index < 0 || index >= out_size) { | |||
GELOGE(domi::FAILED, | |||
"out_node [%s] output index:%d must be smaller " | |||
"than node output size:%d and can not be negative!", | |||
op_desc->GetName().c_str(), index, out_size); | |||
std::string fail_reason = "output index:" + to_string(index) + | |||
" must be smaller than output size:" + to_string(out_size) + " and can not be negative!"; | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10060", {"parameter", "value", "reason"}, | |||
{"out_nodes", op_desc->GetName(), fail_reason}); | |||
return domi::FAILED; | |||
} | |||
return domi::SUCCESS; | |||
} | |||
Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||
std::vector<std::string> &output_nodes_name) { | |||
ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); | |||
if (tmpDescPtr == nullptr) { | |||
GELOGE(domi::FAILED, "Get outnode op desc fail."); | |||
return domi::FAILED; | |||
} | |||
size_t size = tmpDescPtr->GetOutputsSize(); | |||
if (node->GetType() != NETOUTPUT) { | |||
for (size_t index = 0; index < size; ++index) { | |||
output_nodes_info.push_back(std::make_pair(node, index)); | |||
output_nodes_name.push_back(node->GetName()); | |||
} | |||
} else { | |||
const auto in_anchors = node->GetAllInDataAnchors(); | |||
for (auto in_anchor : in_anchors) { | |||
auto out_anchor = in_anchor->GetPeerOutAnchor(); | |||
if (out_anchor == nullptr) { | |||
GELOGE(domi::FAILED, "Get leaf node op desc fail."); | |||
return domi::FAILED; | |||
} | |||
auto out_node = out_anchor->GetOwnerNode(); | |||
output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); | |||
output_nodes_name.push_back(out_node->GetName()); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputFp16NodesFormat(const string &is_output_fp16) { | |||
if (is_output_fp16.empty()) { | |||
return SUCCESS; | |||
} | |||
vector<domiTensorFormat_t> &output_formats = domi::GetContext().output_formats; | |||
output_formats.clear(); | |||
vector<string> node_format_vec = StringUtils::Split(is_output_fp16, ','); | |||
for (auto &is_fp16 : node_format_vec) { | |||
StringUtils::Trim(is_fp16); | |||
if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) { | |||
GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]", | |||
is_output_fp16.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
if (is_fp16 == "false") { | |||
output_formats.push_back(DOMI_TENSOR_ND); | |||
} else if (is_fp16 == "true") { | |||
output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, | |||
const std::string &output_type, | |||
const std::string &output) { | |||
ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); | |||
GE_CHECK_NOTNULL(compute_graph); | |||
std::vector<std::pair<std::string, int32_t>> user_out_nodes = domi::GetContext().user_out_nodes; | |||
std::vector<domiTensorFormat_t> output_formats = domi::GetContext().output_formats; | |||
std::vector<std::pair<ge::NodePtr, int32_t>> output_nodes_info; | |||
std::vector<std::string> output_nodes_name; | |||
std::map<std::string, vector<uint32_t>> out_type_index_map; | |||
std::map<std::string, vector<ge::DataType>> out_type_dt_map; | |||
if (!output_type.empty()) { | |||
if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) { | |||
GELOGE(domi::FAILED, "Parse output_type failed."); | |||
return domi::FAILED; | |||
} | |||
} | |||
// User declared outputs | |||
for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { | |||
ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first); | |||
if (out_node == nullptr) { | |||
GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); | |||
return domi::FAILED; | |||
} | |||
auto op_desc = out_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
if (CheckOutNode(op_desc, user_out_nodes[i].second) != SUCCESS) { | |||
GELOGE(domi::FAILED, "Check out node (%s) fail.", user_out_nodes[i].first.c_str()); | |||
return domi::FAILED; | |||
} | |||
if (i < output_formats.size()) { | |||
if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { | |||
GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); | |||
if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) { | |||
GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str()); | |||
} | |||
} | |||
} | |||
auto it_index = out_type_index_map.find(user_out_nodes[i].first); | |||
auto it_dt = out_type_dt_map.find(user_out_nodes[i].first); | |||
if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) { | |||
GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str()); | |||
(void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second); | |||
(void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); | |||
} | |||
output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); | |||
output_nodes_name.push_back(out_node->GetName()); | |||
} | |||
// default output node (leaf) | |||
if (user_out_nodes.empty()) { | |||
for (ge::NodePtr node : compute_graph->GetDirectNode()) { | |||
if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { | |||
Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name); | |||
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); | |||
} | |||
} | |||
} | |||
compute_graph->SetGraphOutNodesInfo(output_nodes_info); | |||
domi::GetContext().net_out_nodes = output_nodes_name; | |||
return domi::SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ParseInputShape( | |||
const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | |||
vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | |||
const int DEFAULT_SHAPE_PAIR_SIZE = 2; | |||
for (const auto &shape : shape_vec) { | |||
vector<string> shape_pair_vec = SplitInputShape(shape); | |||
if (shape_pair_vec.size() != DEFAULT_SHAPE_PAIR_SIZE) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||
{shape, kSplitError1, kInputShapeSample1}); | |||
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||
shape.c_str(), kSplitError1, kInputShapeSample1); | |||
return false; | |||
} | |||
if (shape_pair_vec[1].empty()) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||
{shape, kEmptyError, kInputShapeSample1}); | |||
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||
shape.c_str(), kEmptyError, kInputShapeSample1); | |||
return false; | |||
} | |||
vector<string> shape_value_strs = StringUtils::Split(shape_pair_vec[1], ','); | |||
vector<int64_t> shape_values; | |||
for (auto &shape_value_str : shape_value_strs) { | |||
// stoul: The method may throw an exception: invalid_argument/out_of_range | |||
if (std::string::npos != shape_value_str.find('.')) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||
{shape, kFloatNumError, kInputShapeSample2}); | |||
GELOGW("Parse input parameter [--input_shape]'s shape[%s] failed, reason: %s, correct sample is %s.", | |||
shape.c_str(), kFloatNumError, kInputShapeSample2); | |||
return false; | |||
} | |||
long left_result = 0; | |||
try { | |||
left_result = stol(StringUtils::Trim(shape_value_str)); | |||
if (!shape_value_str.empty() && (shape_value_str.front() == '-')) { | |||
// The value maybe dynamic shape [-1], need substr it and verify isdigit. | |||
shape_value_str = shape_value_str.substr(1); | |||
} | |||
for (char c : shape_value_str) { | |||
if (!isdigit(c)) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"}, | |||
{shape, kDigitError, kInputShapeSample2}); | |||
GELOGE(PARAM_INVALID, "--input_shape's shape value[%s] is not digit", shape_value_str.c_str()); | |||
return false; | |||
} | |||
} | |||
} catch (const std::out_of_range &) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, | |||
{"input_shape", shape_value_str}); | |||
GELOGW("Input parameter[--input_shape]’s value[%s] cause out of range execption!", shape_value_str.c_str()); | |||
return false; | |||
} catch (const std::invalid_argument &) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, | |||
{"input_shape", shape_value_str}); | |||
GELOGW("Input parameter[--input_shape]’s value[%s] cause invalid argument!", shape_value_str.c_str()); | |||
return false; | |||
} catch (...) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10015", {"parameter", "value"}, | |||
{"input_shape", shape_value_str}); | |||
GELOGW("Input parameter[--input_shape]’s value[%s] cause unkown execption!", shape_value_str.c_str()); | |||
return false; | |||
} | |||
int64_t result = left_result; | |||
// - 1 is not currently supported | |||
if (!is_dynamic_input && result <= 0) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10011", {"shape", "result"}, {shape, std::to_string(result)}); | |||
GELOGW( | |||
"Input parameter[--input_shape]’s shape value[%s] is invalid, " | |||
"expect positive integer, but value is %ld.", | |||
shape.c_str(), result); | |||
return false; | |||
} | |||
shape_values.push_back(result); | |||
} | |||
shape_map.emplace(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); | |||
user_shape_map.push_back(make_pair(StringUtils::Trim(shape_pair_vec[0]), shape_values)); | |||
} | |||
return true; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOutputNodes(const string &out_nodes) { | |||
try { | |||
// parse output node | |||
if (!out_nodes.empty()) { | |||
domi::GetContext().out_nodes_map.clear(); | |||
domi::GetContext().user_out_nodes.clear(); | |||
vector<string> nodes_v = StringUtils::Split(out_nodes, ';'); | |||
for (const string &node : nodes_v) { | |||
vector<string> key_value_v = StringUtils::Split(node, ':'); | |||
if (key_value_v.size() != 2) { // The size must be 2. | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E10068", {"parameter", "value", "reason"}, | |||
{"out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); | |||
GELOGE(PARAM_INVALID, | |||
"The input format of --out_nodes is invalid, the correct format is " | |||
"\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", | |||
node.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); | |||
// stoi: The method may throw an exception: invalid_argument/out_of_range | |||
if (!CheckDigitStr(key_value_v[1])) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
int32_t index = stoi(StringUtils::Trim(key_value_v[1])); | |||
if (iter != domi::GetContext().out_nodes_map.end()) { | |||
iter->second.emplace_back(index); | |||
} else { | |||
std::vector<int32_t> index_v; | |||
index_v.emplace_back(index); | |||
domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v); | |||
} | |||
domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index)); | |||
} | |||
} | |||
} catch (std::invalid_argument &) { | |||
GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10014", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||
return PARAM_INVALID; | |||
} catch (std::out_of_range &) { | |||
GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10013", {"parameter", "value"}, {"out_nodes", out_nodes}); | |||
return PARAM_INVALID; | |||
} | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ParseOpConf(const char *op_conf) { | |||
if (op_conf != nullptr && *op_conf != '\0') { | |||
// divided by ":" | |||
PropertiesManager::Instance().SetPropertyDelimiter(OP_CONF_DELIMITER); | |||
// Parsing the op_conf configuration item file | |||
if (!PropertiesManager::Instance().Init(op_conf)) { | |||
GELOGE(FAILED, "op_name_map init failed!"); | |||
return FAILED; | |||
} | |||
// Return map and put it into ATC global variable | |||
domi::GetContext().op_conf_map = PropertiesManager::Instance().GetPropertyMap(); | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -0,0 +1,68 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_COMMON_GRAPH_PARSER_UTIL_H_ | |||
#define GE_COMMON_GRAPH_PARSER_UTIL_H_ | |||
#include <google/protobuf/message.h> | |||
#include <string> | |||
#include <unordered_map> | |||
#include <vector> | |||
#include "framework/common/types.h" | |||
#include "framework/omg/omg_inner_types.h" | |||
#include "framework/omg/parser/parser_inner_ctx.h" | |||
#include "proto/ge_ir.pb.h" | |||
#include "proto/om.pb.h" | |||
#include "graph/compute_graph.h" | |||
#include "graph/graph.h" | |||
#include "graph/model.h" | |||
#include "runtime/kernel.h" | |||
using domi::Status; | |||
using std::pair; | |||
using std::string; | |||
using std::unordered_map; | |||
using std::vector; | |||
namespace ge { | |||
/** | |||
* @ingroup domi_omg | |||
* @brief init omg context | |||
* @return void | |||
*/ | |||
Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | |||
Status ParseOutputFp16NodesFormat(const string &is_output_fp16); | |||
Status ParseOutputNodes(const string &out_nodes); | |||
bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input); | |||
Status ParseOpConf(const char *op_conf); | |||
} // namespace ge | |||
namespace domi { | |||
/** | |||
* @ingroup domi_omg | |||
* @brief get omg context | |||
* @return reference of OmgContext | |||
*/ | |||
ge::OmgContext &GetContext(); | |||
} // namespace domi | |||
#endif // GE_COMMON_GRAPH_PARSER_UTIL_H_ |
@@ -16,15 +16,12 @@ | |||
#include "common/profiling/profiling_manager.h" | |||
#include <nlohmann/json.hpp> | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/debug/log.h" | |||
#include "framework/common/string_util.h" | |||
#include "graph/ge_context.h" | |||
#include "runtime/base.h" | |||
using Json = nlohmann::json; | |||
namespace { | |||
const char *const kJobID = "jobID"; | |||
const char *const kDeviceID = "deviceID"; | |||
@@ -35,6 +32,7 @@ const char *const kEvents = "events"; | |||
const char *const kAiCoreEvents = "ai_core_events"; | |||
const char *const kName = "name"; | |||
const char *const kTraceID = "traceId"; | |||
const char *const kProfDir = "resultPath"; | |||
const size_t kReportMaxLen = 2048; | |||
} // namespace | |||
@@ -100,6 +98,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
Json start_prof_conf = Json::parse(config); | |||
Json &prof_conf = start_prof_conf[kStartCfg][0]; | |||
job_id_ = prof_conf[kJobID]; | |||
auto iter = prof_conf.find(kProfDir); | |||
if (iter != prof_conf.end()) { | |||
prof_dir_ = prof_conf[kProfDir]; | |||
} | |||
Json &device_id = prof_conf[kDeviceID]; | |||
if (device_id.size() != 0) { | |||
vector<int32_t>().swap(device_id_); | |||
@@ -126,23 +128,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
} | |||
} | |||
GELOGI("Profiling json config from acl:%s", config.c_str()); | |||
Json &features = prof_conf[kFeatures]; | |||
if (ParseFeaturesFromAclCfg(features) != SUCCESS) { | |||
GELOGE(FAILED, "Parse feature from acl cfg failed."); | |||
return FAILED; | |||
} | |||
is_profiling_ = true; | |||
} catch (...) { | |||
GELOGE(FAILED, "Json conf is not invalid !"); | |||
return ge::PARAM_INVALID; | |||
} | |||
#endif | |||
return ge::SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( | |||
const Json &features) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
try { | |||
for (size_t i = 0; i < features.size(); ++i) { | |||
Json &feature = features[i]; | |||
const Json &feature = features[i]; | |||
if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { | |||
continue; | |||
} | |||
const std::string &name = feature[kName]; | |||
if (name == "op_trace") { | |||
GELOGI("Op trace config from acl"); | |||
Json &conf = feature[kConf]; | |||
Json &events = conf[0][kEvents]; | |||
const Json &conf = feature[kConf]; | |||
const Json &events = conf[0][kEvents]; | |||
const std::string &ai_core_events = events[0][kAiCoreEvents]; | |||
GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); | |||
is_op_trace_ = true; | |||
// op trace get conf | |||
ProfMgrConf prof_mgr_conf; | |||
int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); | |||
if (result != 0) { | |||
@@ -154,10 +169,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); | |||
} else if (name == "task_trace") { | |||
is_op_trace_ = false; | |||
if (feature.find(kConf) != feature.end()) { | |||
const Json &conf = feature[kConf]; | |||
std::stringstream task_trace_conf; | |||
task_trace_conf << conf; | |||
task_trace_conf_ = task_trace_conf.str(); | |||
} | |||
GELOGI("Task trace config from acl"); | |||
} else if (name == "system_trace") { | |||
is_op_trace_ = false; | |||
Json &conf = feature[kConf]; | |||
const Json &conf = feature[kConf]; | |||
std::stringstream system_trace_conf; | |||
system_trace_conf << conf; | |||
system_trace_conf_ = system_trace_conf.str(); | |||
@@ -165,10 +186,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
} | |||
profiling_opts_.push_back(name); | |||
} | |||
is_profiling_ = true; | |||
} catch (...) { | |||
GELOGE(FAILED, "Json conf is not invalid !"); | |||
GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !"); | |||
return ge::PARAM_INVALID; | |||
} | |||
#endif | |||
@@ -235,6 +254,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||
p_device[kDeviceID] = std::to_string(device_id); | |||
p_device[kJobID] = job_id_; | |||
p_device[kTraceID] = std::to_string(GetContext().TraceId()); | |||
if (!prof_dir_.empty()) { | |||
p_device[kProfDir] = prof_dir_; | |||
GELOGI("Prof dir: %s.", prof_dir_.c_str()); | |||
} | |||
Json features; | |||
if (is_op_trace_) { | |||
@@ -258,6 +281,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||
Json f; | |||
if (profiling_opts_[i] == "system_trace") { | |||
f[kConf] = nlohmann::json::parse(system_trace_conf_); | |||
} else if (profiling_opts_[i] == "task_trace") { | |||
if (!task_trace_conf_.empty()) { | |||
f[kConf] = nlohmann::json::parse(task_trace_conf_); | |||
} | |||
} | |||
f[kName] = profiling_opts_[i]; | |||
features[i] = f; | |||
@@ -17,6 +17,7 @@ | |||
#ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | |||
#define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | |||
#include <nlohmann/json.hpp> | |||
#include <map> | |||
#include <string> | |||
#include <vector> | |||
@@ -30,6 +31,7 @@ | |||
using std::map; | |||
using std::string; | |||
using std::vector; | |||
using Json = nlohmann::json; | |||
namespace ge { | |||
const std::string GE_PROFILING_MODULE = "Framework"; | |||
@@ -84,11 +86,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
void PluginUnInit(const std::string &module) const; | |||
private: | |||
ge::Status ParseFeaturesFromAclCfg(const Json &feature); | |||
bool is_profiling_ = false; | |||
bool is_op_trace_ = false; | |||
bool is_load_ = false; | |||
int32_t op_trace_iter_num_ = 0; | |||
string job_id_; | |||
string prof_dir_; | |||
vector<int32_t> device_id_; | |||
vector<string> op_trace_conf_; | |||
vector<string> profiling_opts_; | |||
@@ -96,6 +100,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
string recv_profiling_config_; | |||
string send_profiling_config_; | |||
string system_trace_conf_; | |||
string task_trace_conf_; | |||
const ProfilingEngineImpl engine_; | |||
}; | |||
} // namespace ge | |||
@@ -292,6 +292,7 @@ REGISTER_OPTYPE_DEFINE(BASICLSTMCELL, "BasicLSTMCell"); | |||
REGISTER_OPTYPE_DEFINE(GETNEXT, "GetNext"); | |||
REGISTER_OPTYPE_DEFINE(INITDATA, "InitData"); | |||
REGISTER_OPTYPE_DEFINE(REFIDENTITY, "RefIdentity"); | |||
REGISTER_OPTYPE_DEFINE(BITCAST, "Bitcast"); | |||
/***************Ann special operator*************************/ | |||
REGISTER_OPTYPE_DEFINE(ANN_MEAN, "AnnMean"); | |||
@@ -382,6 +383,8 @@ REGISTER_OPTYPE_DEFINE(HCOMALLREDUCE, "HcomAllReduce"); | |||
REGISTER_OPTYPE_DEFINE(HCOMREDUCESCATTER, "HcomReduceScatter"); | |||
REGISTER_OPTYPE_DEFINE(HCOMSEND, "HcomSend"); | |||
REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); | |||
REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); | |||
REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | |||
REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); | |||
REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | |||
@@ -363,7 +363,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||
std::map<std::string, std::string> args_map; | |||
if (file_path.empty()) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param}); | |||
GELOGW("Input parameter's value is empty."); | |||
GELOGW("Input parameter %s is empty.", file_path.c_str()); | |||
return false; | |||
} | |||
std::string real_path = RealPath(file_path.c_str()); | |||
@@ -181,13 +181,12 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||
GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str()); | |||
return ""; | |||
} | |||
string ge_core_type; | |||
std::string ge_core_type; | |||
Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type); | |||
if (ret != SUCCESS) { | |||
GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE"); | |||
} | |||
string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine; | |||
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE")); | |||
std::string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine; | |||
GELOGD("engine type will exclude: %s", exclude_core_Type.c_str()); | |||
std::map<std::string, std::string> unsupported_reasons; | |||
for (const auto &it : op_infos) { | |||
if (it.engine == exclude_core_Type) { | |||
@@ -204,7 +203,7 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||
checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; | |||
op_desc->SetOpEngineName(it.engine); | |||
op_desc->SetOpKernelLibName(kernel_name); | |||
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(), | |||
GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(), | |||
it.engine.c_str(), op_desc->GetName().c_str()); | |||
return it.engine; | |||
} else { | |||
@@ -222,6 +221,9 @@ std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) { | |||
unsupported_reasons.emplace(kernel_name, unsupported_reason); | |||
GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s", | |||
kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); | |||
if (!op_desc->HasAttr("_is_ge_op")) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("W11001", {"opname"}, {op_desc->GetName()}); | |||
} | |||
} | |||
} else { | |||
GELOGW( | |||
@@ -371,7 +373,7 @@ Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std: | |||
} | |||
Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) { | |||
GELOGI("Begin to read json file"); | |||
GELOGD("Begin to read json file"); | |||
if (file_path.empty()) { | |||
GELOGE(FAILED, "Json path %s is not valid", file_path.c_str()); | |||
return FAILED; | |||
@@ -406,12 +408,12 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h | |||
return FAILED; | |||
} | |||
ifs.close(); | |||
GELOGI("Read json file success"); | |||
GELOGD("Read json file success"); | |||
return SUCCESS; | |||
} | |||
Status DNNEngineManager::CheckJsonFile() { | |||
GELOGI("Begin to check json file"); | |||
GELOGD("Begin to check json file"); | |||
for (auto &it : engines_map_) { | |||
std::string engine_name = it.first; | |||
int count = 0; | |||
@@ -431,7 +433,7 @@ Status DNNEngineManager::CheckJsonFile() { | |||
return FAILED; | |||
} | |||
} | |||
GELOGI("Check json file success"); | |||
GELOGD("Check json file success"); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -60,6 +60,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} | |||
"../graph/load/new_model_manager/task_info/task_info.cc" | |||
"../graph/load/new_model_manager/tbe_handle_store.cc" | |||
"../graph/load/new_model_manager/zero_copy_task.cc" | |||
"../graph/load/new_model_manager/zero_copy_offset.cc" | |||
"../graph/manager/graph_caching_allocator.cc" | |||
"../graph/manager/graph_manager_utils.cc" | |||
"../graph/manager/graph_mem_allocator.cc" | |||
@@ -36,6 +36,9 @@ | |||
#include "mmpa/mmpa_api.h" | |||
#include "single_op/single_op_manager.h" | |||
using std::string; | |||
using std::vector; | |||
namespace { | |||
const size_t kDynamicBatchSizeVecSize = 1; | |||
const size_t kStaticBatchInfoSize = 1; | |||
@@ -102,20 +105,36 @@ void SetDynamicInputDataFlag(const ge::RunModelData &input_data, const std::vect | |||
ge::InputData &inputs) { | |||
inputs.is_dynamic_batch = true; | |||
std::string batch_label; | |||
size_t match_idx = 0; | |||
for (size_t i = 0; i < batch_info.size(); ++i) { | |||
if (batch_info[i].size() == kDynamicBatchSizeVecSize && | |||
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_batch_size)) { | |||
batch_label = kBatchLabel + std::to_string(i); | |||
inputs.batch_label = batch_label; | |||
// dynamic_dims | |||
if (input_data.dynamic_dims.size() != 0) { | |||
bool is_match = true; | |||
for (size_t j = 0; j < static_cast<size_t>(input_data.dynamic_dims.size()); ++j) { | |||
if (static_cast<uint64_t>(batch_info[i][j]) != input_data.dynamic_dims[j]) { | |||
is_match = false; | |||
break; | |||
} | |||
} | |||
if (is_match) { | |||
match_idx = i; | |||
break; | |||
} | |||
// dynamic_batch_size | |||
} else if (batch_info[i].size() == kDynamicBatchSizeVecSize && | |||
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_batch_size)) { | |||
match_idx = i; | |||
break; | |||
// dynamic_image_size | |||
} else if (batch_info[i].size() == kDynamicImageSizeVecSize && | |||
batch_info[i][0] == static_cast<int64_t>(input_data.dynamic_image_height) && | |||
batch_info[i][1] == static_cast<int64_t>(input_data.dynamic_image_width)) { | |||
batch_label = kBatchLabel + std::to_string(i); | |||
inputs.batch_label = batch_label; | |||
match_idx = i; | |||
break; | |||
} | |||
} | |||
batch_label = kBatchLabel + std::to_string(match_idx); | |||
inputs.batch_label = batch_label; | |||
GELOGI("current batch label:%s", batch_label.c_str()); | |||
} | |||
@@ -225,39 +244,41 @@ Status GeExecutor::Finalize() { | |||
Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
uint64_t batch_size) { | |||
if (dynamic_input_addr == nullptr) { | |||
GELOGE(FAILED, "Dynamic input addr is nullptr!"); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!"); | |||
return PARAM_INVALID; | |||
} | |||
uint64_t size = sizeof(uint64_t); | |||
if (length < size) { | |||
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, size); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size); | |||
return PARAM_INVALID; | |||
} | |||
// Verify whether the input dynamic batch matches the model gear | |||
std::vector<std::vector<int64_t>> batch_info; | |||
std::vector<uint64_t> batch_num{batch_size}; | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Get dynamic input info failed."); | |||
return FAILED; | |||
GELOGE(ret, "Get dynamic input info failed."); | |||
return ret; | |||
} | |||
if (!IsDynamicBatchSizeMatchModel(batch_size, batch_info)) { | |||
GELOGE(FAILED, "The current dynamic input does not match the gear of the model."); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); | |||
return PARAM_INVALID; | |||
} | |||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num); | |||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_BATCH)); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Set dynamic size failed"); | |||
return FAILED; | |||
GELOGE(ret, "Set dynamic size failed"); | |||
return ret; | |||
} | |||
// memcpy dynamic_batch_size from host to device | |||
if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "memcpy dynamic batch input data failed!"); | |||
return FAILED; | |||
rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "memcpy dynamic batch input data failed! ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -265,40 +286,42 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad | |||
Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
uint64_t image_height, uint64_t image_width) { | |||
if (dynamic_input_addr == nullptr) { | |||
GELOGE(FAILED, "Dynamic input addr is nullptr!"); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "Dynamic input addr is nullptr!"); | |||
return PARAM_INVALID; | |||
} | |||
uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint64_t); | |||
if (length < dynamic_input_size) { | |||
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||
return PARAM_INVALID; | |||
} | |||
// Verify whether the input dynamic resolution matches the model gear | |||
std::vector<std::vector<int64_t>> batch_info; | |||
std::vector<uint64_t> batch_num{image_height, image_width}; | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Get dynamic input info failed."); | |||
return FAILED; | |||
GELOGE(ret, "Get dynamic input info failed."); | |||
return ret; | |||
} | |||
if (!IsDynamicImageSizeMatchModel(image_height, image_width, batch_info)) { | |||
GELOGE(FAILED, "The current dynamic input does not match the gear of the model."); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "The current dynamic input does not match the gear of the model."); | |||
return PARAM_INVALID; | |||
} | |||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num); | |||
ret = GraphExecutor::SetDynamicSize(model_id, batch_num, static_cast<int32_t>(DYNAMIC_IMAGE)); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Set dynamic size failed"); | |||
return FAILED; | |||
GELOGE(ret, "Set dynamic size failed"); | |||
return ret; | |||
} | |||
// Memcpy dynamic resolution height from host to device | |||
if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != | |||
RT_ERROR_NONE) { | |||
GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); | |||
return FAILED; | |||
rtError_t rt_ret = | |||
rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
uint64_t remain_size = length - sizeof(uint64_t); | |||
@@ -311,16 +334,109 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad | |||
return SUCCESS; | |||
} | |||
Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||
Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length, | |||
const vector<uint64_t> &dynamic_dims) { | |||
if (dynamic_input_addr == nullptr) { | |||
GELOGE(FAILED, "Dynamic input addr is nullptr!"); | |||
return FAILED; | |||
} | |||
Status ret = GraphExecutor::SetDynamicSize(model_id, dynamic_dims, static_cast<int32_t>(DYNAMIC_DIMS)); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Set dynamic size failed"); | |||
return FAILED; | |||
} | |||
vector<uint64_t> cur_dynamic_dims; | |||
if (GetCurDynamicDims(model_id, dynamic_dims, cur_dynamic_dims) != SUCCESS) { | |||
GELOGE(FAILED, "GetCurDynamicDims failed."); | |||
return FAILED; | |||
} | |||
size_t dynamic_dim_num = cur_dynamic_dims.size(); | |||
uint64_t dynamic_input_size = static_cast<uint64_t>(dynamic_dim_num * sizeof(uint64_t)); | |||
if (length < dynamic_input_size) { | |||
GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); | |||
return FAILED; | |||
} | |||
for (uint32_t i = 0; i < dynamic_dim_num; ++i) { | |||
// Memcpy dynamic dim[i] from host to device | |||
if (rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(dynamic_input_addr) + sizeof(uint64_t) * i), | |||
length - sizeof(uint64_t) * i, &cur_dynamic_dims[i], sizeof(uint64_t), | |||
RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> &combined_dims, | |||
vector<uint64_t> &cur_dynamic_dims) { | |||
vector<vector<int64_t>> combined_batch; | |||
if (GraphExecutor::GetCombinedDynamicDims(model_id, combined_batch) != SUCCESS) { | |||
GELOGE(FAILED, "Get combined dynamic dims info failed."); | |||
return FAILED; | |||
} | |||
if (combined_batch.empty()) { | |||
GELOGE(FAILED, "Combined dynamic dims is empty."); | |||
return FAILED; | |||
} | |||
if (combined_dims.size() != combined_batch[0].size()) { | |||
GELOGE(FAILED, "Input dynamic dims's dimension size[%zu] is different from model[%zu].", combined_dims.size(), | |||
combined_batch[0].size()); | |||
return FAILED; | |||
} | |||
bool matched = false; | |||
size_t idx = 0; | |||
for (size_t i = 0; i < combined_batch.size(); i++) { | |||
bool is_match = true; | |||
for (size_t j = 0; j < combined_dims.size(); j++) { | |||
if (combined_dims[j] != static_cast<uint64_t>(combined_batch[i][j])) { | |||
is_match = false; | |||
break; | |||
} | |||
} | |||
if (is_match) { | |||
idx = i; | |||
matched = true; | |||
break; | |||
} | |||
} | |||
if (!matched) { | |||
GELOGE(FAILED, "Input dynamic dims can not match model."); | |||
return FAILED; | |||
} | |||
// batch_info save the dynamic info of combined_dims | |||
vector<vector<int64_t>> batch_info; | |||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
if (GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type) != SUCCESS) { | |||
GELOGE(FAILED, "Get dynamic input info failed."); | |||
return FAILED; | |||
} | |||
cur_dynamic_dims.clear(); | |||
for (size_t i = 0; i < batch_info[idx].size(); i++) { | |||
cur_dynamic_dims.emplace_back(static_cast<uint64_t>(batch_info[idx][i])); | |||
} | |||
return SUCCESS; | |||
} | |||
Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||
GELOGI("Begin to get current shape"); | |||
if (!isInit_) { | |||
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return GE_EXEC_NOT_INIT; | |||
} | |||
Status ret = GraphExecutor::GetCurShape(model_id, batch_info); | |||
Status ret = GraphExecutor::GetCurShape(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Get current shape failed"); | |||
return FAILED; | |||
GELOGE(ret, "Get current shape failed"); | |||
return ret; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -330,12 +446,12 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||
const kAippDynamicPara &aippParms) { | |||
GELOGI("Enter to SetDynamicAippData."); | |||
if (dynamic_input_addr == nullptr) { | |||
GELOGE(FAILED, "Dynamic aipp input addr is nullptr!"); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "Dynamic aipp input addr is nullptr!"); | |||
return PARAM_INVALID; | |||
} | |||
if (aippBatchPara.empty()) { | |||
GELOGE(FAILED, "aippBatchPara is empty."); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "aippBatchPara is empty."); | |||
return PARAM_INVALID; | |||
} | |||
uint64_t batch_num = aippBatchPara.size(); | |||
uint64_t real_aippParms_size = sizeof(kAippDynamicPara) - sizeof(kAippDynamicBatchPara); | |||
@@ -345,24 +461,25 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||
"batch num is %lu, struct_len is %lu", | |||
model_id, length, batch_num, struct_len); | |||
if (struct_len > length) { | |||
GELOGE(FAILED, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); | |||
return PARAM_INVALID; | |||
} | |||
// Memcpy real kAippDynamicBatchPara from host to device | |||
if (rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE) != | |||
RT_ERROR_NONE) { | |||
GELOGE(FAILED, "memcpy real_aippParms_size failed!"); | |||
return FAILED; | |||
rtError_t rt_ret = rtMemcpy(dynamic_input_addr, length, &aippParms, real_aippParms_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "memcpy real_aippParms_size failed! ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
uint64_t remain_len = length - real_aippParms_size; | |||
uint8_t *aipp_batch_para_dev = reinterpret_cast<uint8_t *>(dynamic_input_addr) + real_aippParms_size; | |||
for (uint64_t i = 0; i < batch_num; ++i) { | |||
if (rtMemcpy(reinterpret_cast<void *>(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)), | |||
(remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), sizeof(kAippDynamicBatchPara), | |||
RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "memcpy kAippDynamicBatchPara input data failed!"); | |||
return FAILED; | |||
rt_ret = rtMemcpy(reinterpret_cast<void *>(aipp_batch_para_dev + i * sizeof(kAippDynamicBatchPara)), | |||
(remain_len - i * sizeof(kAippDynamicBatchPara)), &(aippBatchPara[i]), | |||
sizeof(kAippDynamicBatchPara), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "memcpy kAippDynamicBatchPara input data failed! ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
return SUCCESS; | |||
@@ -429,7 +546,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
} | |||
Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed."); | |||
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); | |||
return FAILED; | |||
} | |||
return GraphLoader::UnloadModel(model_id); | |||
@@ -468,17 +585,19 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||
output_formats, new_model_desc); | |||
if (ret != domi::SUCCESS) { | |||
GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | |||
return TransferDomiErrorCode(ret); | |||
return ret; | |||
} | |||
if (input_formats.size() != input_desc_infos.size()) { | |||
GELOGE(ge::FAILED, "input_formats.size() != input_desc_infos.size()."); | |||
return ge::FAILED; | |||
GELOGE(ge::PARAM_INVALID, "input_formats size %zu is not equal to input_desc_infos size %zu.", input_formats.size(), | |||
input_desc_infos.size()); | |||
return ge::PARAM_INVALID; | |||
} | |||
if (output_formats.size() != output_desc_infos.size()) { | |||
GELOGE(ge::FAILED, "output_formats.size() != output_desc_infos.size()."); | |||
return ge::FAILED; | |||
GELOGE(ge::PARAM_INVALID, "output_formats size %zu is not equal to output_desc_infos size %zu.", | |||
output_formats.size(), output_desc_infos.size()); | |||
return ge::PARAM_INVALID; | |||
} | |||
// Transfer data to TensorDesc | |||
@@ -494,16 +613,18 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||
/// @brief Get dynamic batch_info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type) { | |||
GELOGI("Begin to get dynamic batch info."); | |||
if (!isInit_) { | |||
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return GE_EXEC_NOT_INIT; | |||
} | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "GetDynamicBatchInfo failed."); | |||
return ret; | |||
@@ -515,6 +636,30 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get combined dynamic dims info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64_t>> &batch_info) { | |||
GELOGI("Begin to get combined dynamic dims info."); | |||
if (!isInit_) { | |||
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return GE_EXEC_NOT_INIT; | |||
} | |||
Status ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "GetCombinedDynamicDims failed."); | |||
return ret; | |||
} | |||
GELOGI("Get combined dynamic dims succ."); | |||
return SUCCESS; | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get AIPP input format | |||
/// @param [in] model_id | |||
/// @param [in] index | |||
@@ -628,8 +773,8 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||
string filePath = RealPath(path.c_str()); | |||
if (filePath.empty()) { | |||
GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str()); | |||
return ge::FAILED; | |||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "File path is invalid. please check your text file '%s'.", path.c_str()); | |||
return GE_EXEC_MODEL_PATH_INVALID; | |||
} | |||
GELOGI("load modelData from file: %s.", path.c_str()); | |||
std::string key_path; | |||
@@ -710,12 +855,20 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel | |||
GetDomiOutputData(run_output_data, output_data); | |||
if ((run_input_data.dynamic_batch_size != 0) || (run_input_data.dynamic_image_width != 0) || | |||
(run_input_data.dynamic_image_height != 0)) { | |||
(run_input_data.dynamic_image_height != 0) || (run_input_data.dynamic_dims.size() != 0)) { | |||
std::vector<std::vector<int64_t>> batch_info; | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); | |||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Get dynamic input info failed."); | |||
return FAILED; | |||
GELOGE(ret, "Get dynamic input info failed."); | |||
return ret; | |||
} | |||
if (dynamic_type == static_cast<int32_t>(DYNAMIC_DIMS)) { | |||
ret = GraphExecutor::GetCombinedDynamicDims(model_id, batch_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Get dynamic input info failed."); | |||
return FAILED; | |||
} | |||
} | |||
if (!batch_info.empty()) { | |||
SetDynamicInputDataFlag(run_input_data, batch_info, input_data); | |||
@@ -790,6 +943,11 @@ Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelDa | |||
return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op); | |||
} | |||
Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, | |||
DynamicSingleOp **single_op) { | |||
return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op); | |||
} | |||
Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs, | |||
std::vector<DataBuffer> &outputs) { | |||
if (executor == nullptr) { | |||
@@ -800,13 +958,21 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer | |||
return executor->ExecuteAsync(inputs, outputs); | |||
} | |||
ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTensorDesc> &input_desc, | |||
const vector<DataBuffer> &inputs, vector<GeTensorDesc> &output_desc, | |||
vector<DataBuffer> &outputs) { | |||
GE_CHECK_NOTNULL(executor); | |||
return executor->ExecuteAsync(input_desc, inputs, output_desc, outputs); | |||
} | |||
Status GeExecutor::ReleaseSingleOpResource(void *stream) { | |||
return SingleOpManager::GetInstance().ReleaseResource(stream); | |||
} | |||
Status GeExecutor::GetBatchInfoSize(uint32_t model_id, size_t &shape_count) { | |||
std::vector<std::vector<int64_t>> batch_info; | |||
Status ret = GetDynamicBatchInfo(model_id, batch_info); | |||
int32_t dynamic_type = static_cast<int32_t>(FIXED); | |||
Status ret = GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Calc batch info size failed. ret = %d", ret); | |||
return ret; | |||
@@ -26,6 +26,7 @@ local_ge_executor_src_files := \ | |||
../graph/load/new_model_manager/data_inputer.cc \ | |||
../graph/load/new_model_manager/data_dumper.cc \ | |||
../graph/load/new_model_manager/zero_copy_task.cc \ | |||
../graph/load/new_model_manager/zero_copy_offset.cc \ | |||
../graph/load/new_model_manager/task_info/task_info.cc \ | |||
../graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||
@@ -79,6 +80,7 @@ local_ge_executor_shared_library := \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
libmsprof \ | |||
local_ge_executor_ldflags := -lrt -ldl \ | |||
@@ -128,6 +130,7 @@ LOCAL_SHARED_LIBRARIES := \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
libmsprof \ | |||
LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | |||
@@ -153,6 +156,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||
LOCAL_STATIC_LIBRARIES := \ | |||
libge_common \ | |||
libgraph \ | |||
libregister \ | |||
libprotobuf \ | |||
LOCAL_SHARED_LIBRARIES := \ | |||
@@ -184,6 +188,7 @@ LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||
LOCAL_STATIC_LIBRARIES := \ | |||
libge_common \ | |||
libgraph \ | |||
libregister \ | |||
libprotobuf \ | |||
LOCAL_SHARED_LIBRARIES := \ | |||
@@ -70,6 +70,7 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/resource_pair_remove_control_pass.cc \ | |||
graph/passes/pass_utils.cc \ | |||
graph/passes/base_pass.cc \ | |||
graph/passes/bitcast_pass.cc \ | |||
graph/passes/constant_folding_pass.cc \ | |||
graph/passes/aicpu_constant_folding_pass.cc \ | |||
graph/passes/reshape_remove_pass.cc \ | |||
@@ -91,8 +92,10 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/print_op_pass.cc \ | |||
graph/passes/no_use_reshape_remove_pass.cc \ | |||
graph/passes/iterator_op_pass.cc \ | |||
graph/passes/input_output_connection_identify_pass.cc \ | |||
graph/passes/atomic_addr_clean_pass.cc \ | |||
graph/passes/mark_same_addr_pass.cc \ | |||
graph/passes/mark_graph_unknown_status_pass.cc \ | |||
graph/common/omg_util.cc \ | |||
graph/common/bcast.cc \ | |||
graph/passes/dimension_compute_pass.cc \ | |||
@@ -107,6 +110,7 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/isolated_op_remove_pass.cc \ | |||
graph/passes/permute_pass.cc \ | |||
graph/passes/ctrl_edge_transfer_pass.cc \ | |||
graph/passes/end_of_sequence_add_control_pass.cc \ | |||
host_kernels/broadcast_gradient_args_kernel.cc \ | |||
host_kernels/greater_kernel.cc \ | |||
host_kernels/gather_v2_kernel.cc \ | |||
@@ -185,6 +189,8 @@ OMG_HOST_SRC_FILES := \ | |||
graph/passes/hccl_group_pass.cc \ | |||
graph/passes/switch_fusion_pass.cc \ | |||
graph/passes/switch_split_pass.cc \ | |||
graph/passes/memcpy_addr_async_pass.cc \ | |||
graph/passes/set_input_output_offset_pass.cc \ | |||
OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES) | |||
@@ -203,6 +209,7 @@ OME_HOST_SRC_FILES := \ | |||
graph/load/new_model_manager/tbe_handle_store.cc \ | |||
graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||
graph/load/new_model_manager/zero_copy_task.cc \ | |||
graph/load/new_model_manager/zero_copy_offset.cc \ | |||
graph/load/new_model_manager/data_dumper.cc \ | |||
graph/load/new_model_manager/task_info/task_info.cc \ | |||
graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||
@@ -1,5 +1,5 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
@@ -13,7 +13,6 @@ | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||
#define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | |||
@@ -61,5 +61,6 @@ REGISTER_OP_CREATOR(SwitchN, GeDeletedOp); | |||
REGISTER_OP_CREATOR(RefMerge, GeDeletedOp); | |||
REGISTER_OP_CREATOR(RefSwitch, GeDeletedOp); | |||
REGISTER_OP_CREATOR(TransShape, GeDeletedOp); | |||
REGISTER_OP_CREATOR(Bitcast, GeDeletedOp); | |||
} // namespace ge_local | |||
} // namespace ge |
@@ -78,6 +78,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/load/new_model_manager/task_info/task_info.cc \ | |||
graph/load/new_model_manager/tbe_handle_store.cc \ | |||
graph/load/new_model_manager/zero_copy_task.cc \ | |||
graph/load/new_model_manager/zero_copy_offset.cc \ | |||
graph/manager/graph_context.cc \ | |||
graph/manager/graph_manager.cc \ | |||
graph/manager/graph_manager_utils.cc \ | |||
@@ -98,10 +99,13 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/passes/addn_pass.cc \ | |||
graph/passes/aicpu_constant_folding_pass.cc \ | |||
graph/passes/assert_pass.cc \ | |||
graph/passes/input_output_connection_identify_pass.cc \ | |||
graph/passes/atomic_addr_clean_pass.cc \ | |||
graph/passes/mark_same_addr_pass.cc \ | |||
graph/passes/mark_graph_unknown_status_pass.cc \ | |||
graph/partition/dynamic_shape_partition.cc \ | |||
graph/passes/base_pass.cc \ | |||
graph/passes/bitcast_pass.cc \ | |||
graph/passes/cast_remove_pass.cc \ | |||
graph/passes/cast_translate_pass.cc \ | |||
graph/passes/common_subexpression_elimination_pass.cc \ | |||
@@ -214,6 +218,9 @@ LIBGE_LOCAL_SRC_FILES := \ | |||
graph/passes/variable_prepare_op_pass.cc \ | |||
graph/passes/variable_ref_delete_op_pass.cc \ | |||
graph/passes/variable_ref_useless_control_out_delete_pass.cc \ | |||
graph/passes/end_of_sequence_add_control_pass.cc \ | |||
graph/passes/memcpy_addr_async_pass.cc \ | |||
graph/passes/set_input_output_offset_pass.cc \ | |||
graph/preprocess/graph_preprocess.cc \ | |||
graph/preprocess/insert_op/ge_aipp_op.cc \ | |||
graph/preprocess/insert_op/util_insert_aipp_op.cc \ | |||
@@ -49,15 +49,6 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint | |||
return true; | |||
} | |||
bool ModelRunner::DistributeTask(uint32_t model_id) { | |||
auto model_iter = runtime_models_.find(model_id); | |||
if (model_iter == runtime_models_.end()) { | |||
GELOGE(PARAM_INVALID, "Model id %u not found.", model_id); | |||
return false; | |||
} | |||
return model_iter->second->DistributeTask(); | |||
} | |||
bool ModelRunner::LoadModelComplete(uint32_t model_id) { | |||
auto model_iter = runtime_models_.find(model_id); | |||
if (model_iter == runtime_models_.end()) { | |||
@@ -100,16 +91,6 @@ const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRunti | |||
return model_iter->second->GetRuntimeInfoMap(); | |||
} | |||
void *ModelRunner::GetModelHandle(uint32_t model_id) const { | |||
auto model_iter = runtime_models_.find(model_id); | |||
if (model_iter == runtime_models_.end()) { | |||
GELOGW("Model id %u not found.", model_id); | |||
return nullptr; | |||
} | |||
return model_iter->second->GetModelHandle(); | |||
} | |||
bool ModelRunner::UnloadModel(uint32_t model_id) { | |||
auto iter = runtime_models_.find(model_id); | |||
if (iter != runtime_models_.end()) { | |||
@@ -283,16 +283,14 @@ bool RuntimeModel::Load(uint32_t device_id, uint64_t session_id, std::shared_ptr | |||
} | |||
GenerateTask(device_id, session_id, davinci_model); | |||
return status; | |||
} | |||
bool RuntimeModel::DistributeTask() { | |||
bool status = LoadTask(); | |||
status = LoadTask(); | |||
if (!status) { | |||
GELOGE(FAILED, "DistributeTask failed"); | |||
return false; | |||
return status; | |||
} | |||
return true; | |||
return status; | |||
} | |||
bool RuntimeModel::Run() { | |||
@@ -35,12 +35,10 @@ class RuntimeModel { | |||
~RuntimeModel(); | |||
bool Load(uint32_t device_id, uint64_t session_id, std::shared_ptr<DavinciModel> &davinci_model); | |||
bool DistributeTask(); | |||
bool LoadComplete(); | |||
const std::vector<uint32_t> &GetTaskIdList() const; | |||
const std::vector<uint32_t> &GetStreamIdList() const; | |||
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &GetRuntimeInfoMap() const { return runtime_info_map_; } | |||
rtModel_t GetModelHandle() const { return rt_model_handle_; } | |||
bool Run(); | |||
bool CopyInputData(const InputData &input_data); | |||
bool GetInputOutputDescInfo(bool zero_copy, std::vector<InputOutputDescInfo> *input_desc, | |||
@@ -23,15 +23,15 @@ | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "ge/ge_api.h" | |||
#include "graph/ge_context.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/ge_context.h" | |||
#include "graph/manager/graph_manager.h" | |||
#include "graph/manager/util/rt_context_util.h" | |||
#include "graph/opsproto_manager.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "model/ge_model.h" | |||
#include "init/gelib.h" | |||
#include "model/ge_model.h" | |||
using std::map; | |||
using std::string; | |||
@@ -46,6 +46,16 @@ const char *const kFileNameSuffix = "online"; | |||
std::map<ge::OpEngineType, std::string> engine_type_map{ | |||
{ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; | |||
bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { | |||
for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { | |||
if (tensor_desc->MutableShape().IsUnknownShape()) { | |||
GELOGI("Contains unknown shape input. set is_dynamic_input to true."); | |||
return true; | |||
} | |||
} | |||
return false; | |||
} | |||
} // namespace | |||
namespace ge { | |||
@@ -55,6 +65,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||
GELOGI("CheckEngineType: use default engine."); | |||
return SUCCESS; | |||
} | |||
// get op engine name | |||
string op_engine_name; | |||
auto iter = engine_type_map.find(engine_type); | |||
@@ -65,6 +76,12 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi | |||
GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type)); | |||
return FAILED; | |||
} | |||
if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { | |||
op_desc->SetOpEngineName(op_engine_name); | |||
op_desc->SetOpKernelLibName(op_engine_name); | |||
return SUCCESS; | |||
} | |||
// set op engine name and opkernelLib. when engine support | |||
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance(); | |||
if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { | |||
@@ -195,18 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) { | |||
class GeGenerator::Impl { | |||
public: | |||
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); | |||
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models); | |||
Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); | |||
Status SaveParams(GeModelPtr &ge_model, const string &type, const map<string, GeAttrValue> &attrs, | |||
const vector<GeTensor> &inputs, const vector<GeTensor> &outputs); | |||
Status GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id); | |||
Status GenerateInfershapeGraph(const Graph &graph); | |||
GraphManager graph_manager_; | |||
SaveParam save_param_; | |||
bool is_offline_ = true; | |||
bool is_singleop_unregistered_ = false; | |||
private: | |||
static std::string Trim(const std::string &str); | |||
@@ -280,10 +298,9 @@ Status GeGenerator::GenerateOnlineModel(const Graph &graph, const vector<GeTenso | |||
} | |||
Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) { | |||
GraphId graph_id; | |||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
Status ret = impl_->GenerateInfershapeGraph(graph, graph_id); | |||
Status ret = impl_->GenerateInfershapeGraph(graph); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Dump infershape json failed"); | |||
if (impl_->graph_manager_.Finalize() != SUCCESS) { | |||
@@ -422,11 +439,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
} else { | |||
ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx); | |||
} | |||
GraphId graph_id; | |||
GeRootModelPtr ge_root_model = nullptr; | |||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
impl_->is_offline_ = is_offline; | |||
Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); | |||
Status ret = impl_->BuildModel(graph, inputs, ge_root_model); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Build model failed."); | |||
if (impl_->graph_manager_.Finalize() != SUCCESS) { | |||
@@ -478,6 +495,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
return PARAM_INVALID; | |||
} | |||
domi::GetContext().is_dynamic_input = ContainsDynamicInpus(*op_desc); | |||
if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { | |||
impl_->is_singleop_unregistered_ = true; | |||
} | |||
// 0. Save original attributes. | |||
OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); | |||
GE_CHECK_NOTNULL(op_desc_tmp); | |||
@@ -494,9 +517,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
// 2. Create ComputeGraph. | |||
string name = ge::CurrentTimeInStr() + "_" + model_file_name; | |||
ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(name); | |||
if (compute_graph == nullptr) { | |||
return INTERNAL_ERROR; | |||
} | |||
GE_CHECK_NOTNULL_EXEC(compute_graph, return INTERNAL_ERROR); | |||
// 3. Add Node to ComputeGraph. | |||
@@ -529,16 +549,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); | |||
GELOGI("ATC parser success in single op build."); | |||
GraphId graph_id; | |||
GeRootModelPtr ge_root_model = nullptr; | |||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
impl_->is_offline_ = is_offline; | |||
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, graph_id, ge_root_model)); | |||
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model)); | |||
map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs(); | |||
GE_CHECK_NOTNULL(ge_root_model); | |||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
if (name_to_ge_model.empty()) { | |||
GELOGE(PARAM_INVALID, "GetSubgraphInstanceNameToModel is empty."); | |||
return PARAM_INVALID; | |||
} | |||
GeModelPtr &ge_model = name_to_ge_model.begin()->second; | |||
GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); | |||
GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); | |||
@@ -608,7 +631,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr & | |||
return SUCCESS; | |||
} | |||
Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, | |||
Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs, | |||
GeRootModelPtr &ge_root_model) { | |||
static GraphId id = 0; | |||
const std::map<std::string, std::string> options; | |||
@@ -627,19 +650,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||
return INTERNAL_ERROR; | |||
} | |||
uint64_t session_id = static_cast<uint64_t>(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us | |||
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); | |||
if (is_singleop_unregistered_) { | |||
ret = graph_manager_.BuildGraphForUnregisteredOp(id, inputs, ge_root_model, session_id); | |||
} else { | |||
ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); | |||
} | |||
if (ret != SUCCESS) { | |||
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); | |||
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | |||
} | |||
graph_id = id; | |||
id += 1; | |||
return SUCCESS; | |||
} | |||
Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &graph_id) { | |||
Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) { | |||
static GraphId id = 0; | |||
const std::map<std::string, std::string> options; | |||
Status ret = graph_manager_.AddGraph(id, graph, options); | |||
@@ -654,8 +680,6 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g | |||
GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed"); | |||
return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; | |||
} | |||
graph_id = id; | |||
id += 1; | |||
return SUCCESS; | |||
@@ -37,21 +37,6 @@ const int32_t kInvalidPerfLevel = -1; | |||
namespace ge { | |||
GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} | |||
Status GraphBuilder::MarkGraph(ComputeGraphPtr &graph) { | |||
GE_CHECK_NOTNULL(graph); | |||
bool is_unknown_shape = false; | |||
for (const auto &node : graph->GetDirectNode()) { | |||
GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), | |||
"Get node[%s] shape status failed!", node->GetName().c_str()); | |||
if (is_unknown_shape) { | |||
break; | |||
} | |||
} | |||
graph->SetGraphUnknownFlag(is_unknown_shape); | |||
GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); | |||
return SUCCESS; | |||
} | |||
void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | |||
stream_max_parallel_num_ = options.stream_max_parallel_num; | |||
hcom_parallel_ = options.hcom_parallel; | |||
@@ -277,14 +262,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | |||
uint64_t session_id) { | |||
GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | |||
// mark unknown shape attr | |||
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||
auto status = MarkGraph(sub_graph); | |||
if (status != SUCCESS) { | |||
GELOGE(FAILED, "mark graph failed!"); | |||
return status; | |||
} | |||
} | |||
// Update Root Graph Data size | |||
for (auto &node : comp_graph->GetDirectNode()) { | |||
auto op_desc = node->GetOpDesc(); | |||
@@ -297,11 +274,22 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||
} | |||
// | |||
for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { | |||
// exclude functional subgraph in known subgraph | |||
if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { | |||
continue; | |||
} | |||
if (sub_graph->GetGraphUnknownFlag()) { | |||
// unknown shape build flow | |||
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||
"Build for unknown shape graph failed."); | |||
} else { | |||
// reset functional subgraph parent graph as known subgraph | |||
for (const auto &node : sub_graph->GetDirectNode()) { | |||
for (const auto &sub_graph_name : node->GetOpDesc()->GetSubgraphInstanceNames()) { | |||
auto sub_sub_graph = comp_graph->GetSubgraph(sub_graph_name); | |||
GE_CHK_STATUS_RET(sub_graph->AddSubgraph(sub_sub_graph), "Failed add subgraph to known graph."); | |||
} | |||
} | |||
// known shape build flow | |||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||
"Build for known shape graph failed."); | |||
@@ -450,6 +438,11 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) | |||
GELOGI("Begin to calc dynamic shape graph data[%s] size.", op_desc->GetName().c_str()); | |||
// data op only has one output anchor | |||
ge::GeTensorDesc output_desc = op_desc->GetOutputDesc(0); | |||
if (output_desc.MutableShape().IsUnknownShape()) { | |||
GELOGI("No need to update dynamic shape graph data output size for unknown shape data."); | |||
return SUCCESS; | |||
} | |||
int64_t output_size = 0; | |||
if (ge::TensorUtils::GetSize(output_desc, output_size) != SUCCESS) { | |||
GELOGW("Get size failed!"); | |||
@@ -67,7 +67,6 @@ class GraphBuilder { | |||
GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||
Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | |||
uint64_t session_id = INVALID_SESSION_ID); | |||
Status MarkGraph(ComputeGraphPtr &graph); | |||
int build_mode_; | |||
std::map<std::string, int> stream_max_parallel_num_; | |||
@@ -55,6 +55,13 @@ using std::unordered_map; | |||
using std::unordered_set; | |||
using std::vector; | |||
void AlignMemOffset(size_t &mem_align_size) { | |||
if (mem_align_size <= 0) { | |||
return; | |||
} | |||
mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; | |||
} | |||
void MemoryBlock::SetHeadOffset(size_t offset) { | |||
head_offset_ = offset; | |||
size_t child_offset = head_offset_; | |||
@@ -92,7 +99,7 @@ void MemoryBlock::Resize() { | |||
} else { | |||
size_t block_size = (child_block_size > *iter) ? child_block_size : *iter; | |||
if ((block_size > 0) && (block_size % MEM_ALIGN_SIZE != 0)) { | |||
block_size = (block_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; | |||
AlignMemOffset(block_size); | |||
} | |||
block_size_ = block_size; | |||
if (last_continuous_block_) { | |||
@@ -101,6 +108,20 @@ void MemoryBlock::Resize() { | |||
} | |||
} | |||
size_t MemoryBlock::AlignSize() const { | |||
size_t align_block_size = 0; | |||
auto iter = std::max_element(real_size_list_.begin(), real_size_list_.end()); | |||
if (iter == real_size_list_.end()) { | |||
GELOGW("real_size_list_ is empty"); | |||
} else { | |||
align_block_size = *iter; | |||
if ((align_block_size > 0) && (align_block_size % MEM_ALIGN_SIZE != 0)) { | |||
AlignMemOffset(align_block_size); | |||
} | |||
} | |||
return align_block_size; | |||
} | |||
bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { | |||
if (node_type_index_list_.empty()) { | |||
return false; | |||
@@ -133,31 +154,69 @@ bool MemoryBlock::IsSameLabel(std::string &first_batch_label) { | |||
} | |||
bool CanNotLifeReuse(MemoryBlock *block) { | |||
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_ || block->continuous_block_) { | |||
if ((block == nullptr) || !block->reuse_mem_ || block->deleted_block_) { | |||
return true; | |||
} | |||
return false; | |||
} | |||
void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { | |||
// continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block | |||
auto it_block = std::max_element(std::begin(block->NoAlignSizeList()), std::end(block->NoAlignSizeList())); | |||
auto it_this = std::max_element(std::begin(NoAlignSizeList()), std::end(NoAlignSizeList())); | |||
if (it_block != std::end(block->NoAlignSizeList()) && it_this != std::end(NoAlignSizeList())) { | |||
if ((continuous_block_ && block->continuous_block_) || (continuous_block_ && (*it_this < *it_block)) || | |||
(block->continuous_block_ && (*it_this > *it_block))) { | |||
GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", *it_this, | |||
continuous_block_, *it_block, block->continuous_block_); | |||
return; | |||
} | |||
} | |||
MemoryBlock *parent = nullptr; | |||
MemoryBlock *child = nullptr; | |||
// merge small block to large block | |||
if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { | |||
if ((block->child_offset_ + AlignSize()) <= *it_block) { | |||
parent = block; | |||
child = this; | |||
} | |||
} | |||
if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { | |||
parent->child_blocks_.emplace_back(child); | |||
parent->child_offset_ += child->AlignSize(); | |||
child->deleted_block_ = true; | |||
GELOGI( | |||
"Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" | |||
" block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", | |||
child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, | |||
parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); | |||
} | |||
} | |||
void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life) { | |||
if (CanNotLifeReuse(this) || CanNotLifeReuse(block)) { | |||
return; | |||
} | |||
if (block->continuous_block_) { | |||
AddContinuousLifeReuseBlock(block, total_node_depend_stream_life); | |||
return; | |||
} | |||
MemoryBlock *parent = nullptr; | |||
MemoryBlock *child = nullptr; | |||
// merge small block to large block | |||
if (block->GetDependLifeBegin(stream_id_, total_node_depend_stream_life) > GetLifeEnd()) { | |||
if ((child_offset_ + block->block_size_) <= block_size_) { | |||
if ((child_offset_ + block->AlignSize()) <= AlignSize()) { | |||
parent = this; | |||
child = block; | |||
} else if ((block->child_offset_ + block_size_) <= block->block_size_) { | |||
} else if ((block->child_offset_ + AlignSize()) <= block->AlignSize()) { | |||
parent = block; | |||
child = this; | |||
} | |||
} | |||
if ((parent != nullptr) && (child != nullptr) && child->child_blocks_.empty()) { | |||
parent->child_blocks_.emplace_back(child); | |||
parent->child_offset_ += child->block_size_; | |||
parent->child_offset_ += child->AlignSize(); | |||
child->deleted_block_ = true; | |||
GELOGI( | |||
"Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" | |||
@@ -431,7 +490,7 @@ size_t GetBlockSize(size_t size, const vector<int64_t> &ranges) { | |||
} | |||
GELOGW("Memory needed size:%zu is beyond the biggest block in memory ranges.", size); | |||
return 0; | |||
return size; | |||
} | |||
bool IsDirectOutputNode(const NodePtr &node, int idx) { | |||
@@ -465,34 +524,8 @@ void ReduceReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t | |||
} | |||
bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block, | |||
size_t block_size, size_t real_size, bool continuous, int64_t atomic_addr_clean_id) { | |||
size_t block_size, size_t real_size, bool continuous) { | |||
bool can_reuse = false; | |||
// If node is before atomic_addr_clean node, the continus memory can't be reused. | |||
if (!reusable_block.NodeTypeIndexList().empty()) { | |||
auto node = reusable_block.NodeTypeIndexList()[0].node; | |||
if (node != nullptr) { | |||
auto op_desc = node->GetOpDesc(); | |||
if (op_desc != nullptr) { | |||
if ((op_desc->GetId() < atomic_addr_clean_id) && continuous) { | |||
return false; | |||
} | |||
} | |||
} | |||
} | |||
// continuous memory case:only real_size is maximum can be reused and only one continuous memory in one block | |||
if (continuous || reusable_block.continuous_block_) { | |||
auto it = | |||
std::max_element(std::begin(reusable_block.NoAlignSizeList()), std::end(reusable_block.NoAlignSizeList())); | |||
if (it != std::end(reusable_block.NoAlignSizeList())) { | |||
GE_IF_BOOL_EXEC((continuous && reusable_block.continuous_block_) || (continuous && (real_size < *it)) || | |||
(reusable_block.continuous_block_ && (real_size > *it)), | |||
GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", | |||
real_size, continuous, *it, reusable_block.continuous_block_); | |||
return false;); | |||
} | |||
} | |||
if (reusable_block.Size() == block_size) { | |||
can_reuse = true; | |||
} else { | |||
@@ -683,6 +716,34 @@ void BlockMemAssigner::PrintSymbolMap() { | |||
} | |||
} | |||
bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { | |||
if (n == nullptr) { | |||
GELOGE(FAILED, "Node is null."); | |||
return false; | |||
} | |||
// Get the continuous output type of the node, default is false | |||
bool is_output_continuous = false; | |||
auto node_desc = n->GetOpDesc(); | |||
if (node_desc == nullptr) { | |||
GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); | |||
return false; | |||
} | |||
// If GetBool fail, is_output_continuous is false. | |||
(void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); | |||
if (is_output_continuous) { | |||
if (n->GetOwnerComputeGraph() != nullptr) { | |||
string graph_name = n->GetOwnerComputeGraph()->GetName(); | |||
GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), n->GetName().c_str(), | |||
n->GetAllOutDataAnchorsSize()); | |||
return true; | |||
} | |||
} | |||
return false; | |||
} | |||
MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, | |||
MemoryType mem_type, const NodePtr &n, uint32_t out_index, | |||
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | |||
@@ -699,7 +760,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | |||
reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | |||
auto stream_id = node_op_desc->GetStreamId(); | |||
if (is_reuse_memory) { | |||
if (is_reuse_memory && !continuous) { | |||
for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { | |||
MemoryBlock *reusable_block = *it; | |||
if (!IsPostReuse(reusable_block)) { | |||
@@ -709,8 +770,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
} | |||
// A node can reuse blocks of the same stream and preorder streams | |||
auto id = GetAtomicAddrCleanId(); | |||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous, id)) { | |||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { | |||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | |||
if (mem_type == kOutput) { | |||
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | |||
@@ -750,6 +810,47 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
return block; | |||
} | |||
MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||
const bool is_op_reuse_mem) { | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||
auto node_op_desc = n->GetOpDesc(); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||
MemoryBlock *block = nullptr; | |||
int64_t total_size = 0; | |||
for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | |||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | |||
if (output_op_desc == nullptr) { | |||
return nullptr; | |||
} | |||
int64_t size = 0; | |||
if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | |||
GELOGI("Get size failed"); | |||
return nullptr; | |||
} | |||
size_t align_size = static_cast<size_t>(size); | |||
AlignMemOffset(align_size); | |||
total_size += align_size; | |||
// only apply total size in first block | |||
if (index != 0) { | |||
zero_memory_list_.emplace_back(n, kOutput, index); | |||
} | |||
} | |||
auto block_size = GetBlockSize(total_size, ranges); | |||
GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size, | |||
block_size); | |||
vector<bool> workspace_reuse_flag; | |||
block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true); | |||
if (block != nullptr) { | |||
// hccl task need align header and tail | |||
block->first_continuous_block_ = true; | |||
block->last_continuous_block_ = true; | |||
} | |||
return block; | |||
} | |||
MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | |||
const bool is_op_reuse_mem, const bool continuous) { | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||
@@ -991,6 +1092,10 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
// Allocate memory for the current node and release node memory of the same size in the workspace | |||
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | |||
ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id]);) | |||
if (IsContinuousOutput(node)) { | |||
(void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||
return SUCCESS; | |||
} | |||
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | |||
int64_t size = 0; | |||
auto output_op_desc = op_desc->GetOutputDescPtr(i); | |||
@@ -1017,7 +1122,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
continue; | |||
} | |||
// atomic can't be reused | |||
if (is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic) { | |||
bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; | |||
if (need_change) { | |||
is_op_reuse_mem_ = false; | |||
} | |||
MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input); | |||
@@ -1225,10 +1331,12 @@ static bool CompareBlockIndex(MemoryBlock *left, MemoryBlock *right) { | |||
/// @param [in] input blocks need continuous | |||
/// @param [out] blocks after continuous order | |||
/// @param [in/out] blocks ordered | |||
/// @param [in] input or output | |||
/// | |||
void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks, | |||
const std::map<MemoryBlock *, uint32_t> block_map, | |||
std::vector<MemoryBlock *> &dest_blocks, std::vector<MemoryBlock *> &continuous_blocks) { | |||
std::vector<MemoryBlock *> &dest_blocks, std::vector<MemoryBlock *> &continuous_blocks, | |||
const std::string &type) { | |||
for (auto &memory_block : org_blocks) { | |||
if (memory_block == nullptr || memory_block->deleted_block_) { | |||
continue; | |||
@@ -1245,7 +1353,7 @@ void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks, | |||
for (auto &memory_block : continuous_blocks) { | |||
GE_IF_BOOL_EXEC(memory_block == nullptr, continue); | |||
GELOGI("Block continuous input index:%d", memory_block->input_index_); | |||
GELOGI("Block continuous %s index:%d", type.c_str(), memory_block->input_index_); | |||
count++; | |||
if (count == 1) { | |||
memory_block->first_continuous_block_ = true; | |||
@@ -1280,7 +1388,7 @@ void BlockMemAssigner::AssignContinuousBlocks() { | |||
continuous_block_map.size(), continuous_blocks.size()); | |||
continue; | |||
} | |||
ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks); | |||
ReAssignContinuousBlocks(memory_blocks_, continuous_block_map, dest_memory_blocks, continuous_blocks, "input"); | |||
memory_blocks_.swap(dest_memory_blocks); | |||
} | |||
} | |||
@@ -1292,14 +1400,25 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { | |||
} | |||
for (size_t i = 0; i < memory_blocks_.size(); ++i) { | |||
auto parent = memory_blocks_[i]; | |||
if (parent == nullptr || parent->deleted_block_) { | |||
if (parent == nullptr || parent->deleted_block_ || parent->continuous_block_) { | |||
continue; | |||
} | |||
if (parent->reuse_mem_ && !IsPostReuse(parent)) { | |||
parent->reuse_mem_ = false; | |||
} | |||
for (size_t j = i + 1; j < memory_blocks_.size(); ++j) { | |||
parent->AddLifeReuseBlock(memory_blocks_[j], total_node_depend_stream_life_); | |||
auto child = memory_blocks_[j]; | |||
if (child == nullptr) { | |||
continue; | |||
} | |||
// If node is before atomic_addr_clean node, the continus memory can't be reused. | |||
if (!parent->NodeTypeIndexList().empty() && child->continuous_block_) { | |||
auto node = parent->NodeTypeIndexList()[0].node; | |||
if (node == nullptr || node->GetOpDesc() == nullptr || (node->GetOpDesc()->GetId() < GetAtomicAddrCleanId())) { | |||
continue; | |||
} | |||
} | |||
parent->AddLifeReuseBlock(child, total_node_depend_stream_life_); | |||
} | |||
} | |||
} | |||
@@ -1450,8 +1569,8 @@ Status BlockMemAssigner::Assign() { | |||
bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | |||
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | |||
(node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || | |||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||
(node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE); | |||
(node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || | |||
(node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||
(node_type == HVDCALLBACKBROADCAST); | |||
} | |||
} // namespace ge |
@@ -90,6 +90,8 @@ class MemoryBlock { | |||
} | |||
size_t Size() const { return block_size_; } | |||
size_t AlignSize() const; | |||
void SetHeadOffset(size_t offset); | |||
void SetTailOffset(size_t offset); | |||
@@ -118,6 +120,8 @@ class MemoryBlock { | |||
bool IsSameLabel(std::string &first_batch_label); | |||
void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life); | |||
void AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &node_depend_stream_life); | |||
void SetLifeTimeEnd(size_t time); | |||
@@ -362,6 +366,10 @@ class BlockMemAssigner : public MemAssigner { | |||
/// | |||
void ReuseBlocksByLifeTime(size_t range_size); | |||
bool IsContinuousOutput(const NodePtr &n); | |||
MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||
std::unordered_map<int64_t, std::vector<MemoryBlock *>> reusable_blocks_; | |||
std::map<std::string, uint64_t> reusable_block_counts_; | |||
@@ -293,7 +293,8 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
} else if (is_loop_graph) { | |||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); | |||
} else { | |||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, mem_clean_start, mem_clean_size), "SetAtomicCleanAttr failed."); | |||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), | |||
"SetAtomicCleanAttr failed."); | |||
} | |||
} | |||
} | |||
@@ -441,35 +442,33 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||
GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); | |||
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); | |||
if (out_op_desc->GetOutputsSize() > output_list.size()) { | |||
if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { | |||
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", | |||
out_op_desc->GetOutputsSize(), output_list.size()); | |||
return ge::FAILED; | |||
} | |||
memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; | |||
size_t mem_offset = output_list[0]; | |||
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | |||
output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; | |||
size_t pre_mem_offset = memory_offset_[0].mem_offset_; | |||
output_list[out_data_anchor->GetIdx()] = mem_offset; | |||
int64_t tensor_desc_size = 0; | |||
if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != | |||
ge::SUCCESS) { | |||
GELOGE(FAILED, "GetSize failed."); | |||
return FAILED; | |||
} | |||
memory_offset_[0].mem_offset_ += tensor_desc_size; | |||
AlignMemOffset(MEM_ALIGN_SIZE); | |||
mem_offset += tensor_desc_size; | |||
if (mem_offset <= 0) { | |||
return FAILED; | |||
} | |||
mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; | |||
GELOGI( | |||
"[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " | |||
"[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " | |||
"real_size[%ld].", | |||
node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), | |||
pre_mem_offset, out_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size); | |||
output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); | |||
} | |||
out_op_desc->SetOutputOffset(output_list); | |||
memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; | |||
return ge::SUCCESS; | |||
} | |||
@@ -809,14 +808,12 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt | |||
} | |||
Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
if (compute_graph_ == nullptr) { | |||
GELOGE(ge::PARAM_INVALID, "Graph must not be null."); | |||
return ge::PARAM_INVALID; | |||
} | |||
GE_CHECK_NOTNULL(compute_graph_); | |||
// Atomic op memory start addr | |||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||
GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); | |||
vector<NodePtr> connect_netoutput_nodes; | |||
for (auto &node : compute_graph_->GetAllNodes()) { | |||
auto node_op_desc = node->GetOpDesc(); | |||
if (node_op_desc == nullptr) { | |||
@@ -839,36 +836,20 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
return ge::PARAM_INVALID; | |||
} | |||
// Atomic op memory start addr of loop graph | |||
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||
// Reassign atomic node output memory | |||
Status ret = AssignAtomicOutputMemory(node); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Assign atomic output memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||
return ret; | |||
vector<int> is_connect_netoutput; | |||
// If GetBool fail, attr is_connect_netoutput is an empty vector. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); | |||
if (!is_connect_netoutput.empty()) { | |||
connect_netoutput_nodes.emplace_back(node); | |||
continue; | |||
} | |||
// Check atomic workspace | |||
map<string, map<int64_t, int64_t>> sub_node_workspace_info; | |||
sub_node_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, sub_node_workspace_info); | |||
if (!sub_node_workspace_info.empty()) { | |||
bool is_fusion_node = false; | |||
// If GetBool fail, is_fusion_node is false. | |||
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); | |||
if (is_fusion_node) { | |||
// Assign fusion atomic node workspace memory | |||
ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info); | |||
} else { | |||
// Assign single ordinary atomic node workspace memory, not include fusion node | |||
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, sub_node_workspace_info); | |||
} | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||
return ret; | |||
} | |||
// Atomic op memory start addr of loop graph | |||
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||
vector<int64_t> mem_offset_end; | |||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
/// In networks with loop op, atomic op uses atomic_addr_clean op independently, | |||
@@ -883,13 +864,80 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
// Set the address attr of atomic clean operator | |||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||
if (atomic_mem_size != 0) { | |||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, atomic_mem_start, atomic_mem_size), "SetAtomicCleanAttr failed."); | |||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), | |||
"SetAtomicCleanAttr failed."); | |||
} | |||
} | |||
if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { | |||
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | |||
return FAILED; | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, | |||
vector<int64_t> &mem_offset_end) { | |||
auto node_op_desc = node->GetOpDesc(); | |||
// Assign atomic node output memory | |||
Status ret = AssignAtomicOutputMemory(node, mem_offset_end); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str()); | |||
return ret; | |||
} | |||
// Check and assign atomic node workspace memory | |||
map<string, map<int64_t, int64_t>> atomic_workspace_info; | |||
atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info); | |||
if (!atomic_workspace_info.empty()) { | |||
bool is_fusion_node = false; | |||
// If GetBool fail, is_fusion_node is false. | |||
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); | |||
if (is_fusion_node) { | |||
// Assign fusion atomic node workspace memory | |||
ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | |||
} else { | |||
// Assign single ordinary atomic node workspace memory, not include fusion node | |||
ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end); | |||
} | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str()); | |||
return ret; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) { | |||
for (auto &node : connect_netoutput_nodes) { | |||
GE_CHECK_NOTNULL(node); | |||
if (node->GetOpDesc() == nullptr) { | |||
GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str()); | |||
continue; | |||
} | |||
// Atomic memory start addr | |||
int64_t original_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||
GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.", | |||
node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); | |||
vector<int64_t> mem_offset_end; | |||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
// All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. | |||
if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) { | |||
GELOGE(FAILED, "Failed to set atomic attr separately."); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::AssignReferenceMemory() { | |||
for (auto &node : compute_graph_->GetDirectNode()) { | |||
// Get the reference type of the node, default is false | |||
@@ -971,9 +1019,10 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) { | |||
return true; | |||
} | |||
Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||
Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED); | |||
mem_offset_end.clear(); | |||
GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str()); | |||
vector<int64_t> atomic_output_index; | |||
@@ -996,24 +1045,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||
// If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here | |||
bool is_assigned_mem = false; | |||
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | |||
GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index); | |||
return ge::PARAM_INVALID; | |||
} | |||
auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); | |||
GE_CHECK_NOTNULL(out_data_anchor); | |||
auto input_anchors = out_data_anchor->GetPeerInDataAnchors(); | |||
for (auto &input_anchor : input_anchors) { | |||
auto output_node = input_anchor->GetOwnerNode(); | |||
/// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address | |||
/// has been assigned | |||
vector<int64_t> atomic_input_index; | |||
(void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); | |||
if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { | |||
is_assigned_mem = true; | |||
break; | |||
} | |||
if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) { | |||
GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str()); | |||
return ge::FAILED; | |||
} | |||
// If you have already assigned an atomic address, skip it, and you don't need to reassign it. | |||
@@ -1038,6 +1072,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||
memory_offset_[0].mem_offset_ += size; | |||
AlignMemOffset(MEM_ALIGN_SIZE); | |||
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); | |||
} | |||
op_desc->SetOutputOffset(output_list); | |||
@@ -1045,8 +1080,33 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) { | |||
return ge::SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, | |||
bool &is_mem_assigned) { | |||
if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) { | |||
GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index); | |||
return ge::PARAM_INVALID; | |||
} | |||
auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index); | |||
GE_CHECK_NOTNULL(out_data_anchor); | |||
auto input_anchors = out_data_anchor->GetPeerInDataAnchors(); | |||
for (auto &input_anchor : input_anchors) { | |||
auto output_node = input_anchor->GetOwnerNode(); | |||
/// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address | |||
/// has been assigned | |||
vector<int64_t> atomic_input_index; | |||
(void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); | |||
if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { | |||
is_mem_assigned = true; | |||
break; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | |||
map<string, map<int64_t, int64_t>> &workspace_info) { | |||
map<string, map<int64_t, int64_t>> &workspace_info, | |||
vector<int64_t> &mem_offset_end) { | |||
GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); | |||
vector<int64_t> workspace_vector = op_desc->GetWorkspace(); | |||
@@ -1078,6 +1138,7 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||
op_desc->GetStreamId(), workspace_size, workspace_size); | |||
memory_offset_[0].mem_offset_ += workspace_size; | |||
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); | |||
} | |||
} | |||
op_desc->SetWorkspace(workspace_vector); | |||
@@ -1086,7 +1147,8 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc | |||
} | |||
Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | |||
map<string, map<int64_t, int64_t>> &workspace_info) { | |||
map<string, map<int64_t, int64_t>> &workspace_info, | |||
vector<int64_t> &mem_offset_end) { | |||
GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); | |||
map<string, map<int64_t, int64_t>> sub_node_workspace_offset; | |||
@@ -1108,6 +1170,7 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt | |||
op_desc->GetStreamId(), workspace_size, workspace_size); | |||
memory_offset_[0].mem_offset_ += workspace_size; | |||
mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); | |||
index_offset.insert(std::make_pair(workspace_index, workspace_offset)); | |||
} | |||
sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); | |||
@@ -1287,6 +1350,47 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | |||
const vector<int64_t> &mem_offset_end) { | |||
GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start); | |||
// Parsing offset and size vectors | |||
vector<int64_t> memory_offset_start; | |||
vector<int64_t> memory_offset_size; | |||
memory_offset_start.emplace_back(atomic_mem_start); | |||
for (size_t i = 0; i < mem_offset_end.size(); ++i) { | |||
memory_offset_start.emplace_back(mem_offset_end[i]); | |||
// Number 1 means element index | |||
auto size = memory_offset_start[i + 1] - memory_offset_start[i]; | |||
memory_offset_size.emplace_back(size); | |||
} | |||
memory_offset_start.pop_back(); | |||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||
if (!memory_offset_size.empty() && in_control_anchor != nullptr) { | |||
for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||
if (peer_out_control_anchor == nullptr) { | |||
continue; | |||
} | |||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||
auto peer_out_node_desc = peer_out_node->GetOpDesc(); | |||
if (peer_out_node_desc == nullptr) { | |||
continue; | |||
} | |||
GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(), | |||
peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); | |||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | |||
if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) { | |||
GELOGE(FAILED, "Set atomic clean attr failed."); | |||
return FAILED; | |||
} | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { | |||
// set the address attr of atomic clean operator for loop graph | |||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||
@@ -1308,7 +1412,7 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6 | |||
peer_out_node_desc->GetType().c_str()); | |||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | |||
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, atomic_mem_start, atomic_mem_size), | |||
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), | |||
GELOGE(FAILED, "SetAtomicCleanAttr failed."); | |||
return FAILED); | |||
} | |||
@@ -1317,8 +1421,8 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6 | |||
return SUCCESS; | |||
} | |||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t atomic_mem_start, | |||
int64_t atomic_mem_size) { | |||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start, | |||
const vector<int64_t> &atomic_mem_size) { | |||
for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||
auto node_op_desc = node->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | |||
@@ -1327,15 +1431,15 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato | |||
((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { | |||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | |||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | |||
workspace_vector.emplace_back(atomic_mem_start); | |||
workspace_byte_vector.emplace_back(atomic_mem_size); | |||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||
node_op_desc->SetWorkspace(workspace_vector); | |||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | |||
std::vector<int64_t> mem_start_vector; | |||
// If GetListInt fail, mem_start_vector is empty. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||
mem_start_vector.emplace_back(atomic_mem_start); | |||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||
GELOGE(FAILED, "SetListInt failed."); | |||
return FAILED); | |||
@@ -1343,16 +1447,26 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, int64_t ato | |||
std::vector<int64_t> mem_size_vector; | |||
// If GetListInt fail, mem_size_vector is empty. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||
mem_size_vector.emplace_back(atomic_mem_size); | |||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||
GELOGE(FAILED, "SetListInt failed."); | |||
return FAILED); | |||
GELOGI( | |||
"[IMAS]SetAtomicCleanAttr : Set %s name[%s] output[%d] offset to [%ld] streamid[%ld] size[%ld] " | |||
"realsize[%ld].", | |||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), 0, atomic_mem_start, | |||
node->GetOpDesc()->GetStreamId(), atomic_mem_size, atomic_mem_size); | |||
std::stringstream ss; | |||
for (auto iter : atomic_mem_start) { | |||
ss << iter << " "; | |||
} | |||
string atomic_mem_start_str = ss.str(); | |||
ss.clear(); | |||
ss.str(""); | |||
for (auto iter : atomic_mem_size) { | |||
ss << iter << " "; | |||
} | |||
string atomic_mem_size_str = ss.str(); | |||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||
} | |||
} | |||
return SUCCESS; | |||
@@ -147,22 +147,33 @@ class GraphMemoryAssigner { | |||
/// | |||
bool CheckInputIsSupportAtomic(const ge::NodePtr &node); | |||
ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node); | |||
ge::Status GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index, bool &is_mem_assigned); | |||
ge::Status AssignAtomicOutputMemory(const ge::NodePtr &node, std::vector<int64_t> &mem_offset_end); | |||
ge::Status AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | |||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info); | |||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info, | |||
std::vector<int64_t> &mem_offset_end); | |||
ge::Status AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc, | |||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info); | |||
std::map<std::string, std::map<int64_t, int64_t>> &workspace_info, | |||
std::vector<int64_t> &mem_offset_end); | |||
ge::Status AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, std::vector<int64_t> &mem_offset_end); | |||
ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes); | |||
ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | |||
const std::vector<int64_t> &mem_offset_end); | |||
/// | |||
/// @brief set loop graph atomic attr | |||
/// @param node | |||
/// @param node, atomic memory assignment start offset | |||
/// @param atomic_mem_start: atomic op memory start address | |||
/// | |||
ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); | |||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, int64_t atomic_mem_start, int64_t atomic_mem_size); | |||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start, | |||
const std::vector<int64_t> &atomic_mem_size); | |||
void AlignMemOffset(const int64_t &mem_align_size); | |||
@@ -42,10 +42,12 @@ | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "graph/passes/memcpy_addr_async_pass.h" | |||
#include "init/gelib.h" | |||
#include "memory/memory_assigner.h" | |||
#include "omg/version.h" | |||
#include "register/op_registry.h" | |||
#include "graph/passes/set_input_output_offset_pass.h" | |||
using std::map; | |||
using std::set; | |||
@@ -668,12 +670,36 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { | |||
GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(label_num_), "Assign label failed."); | |||
GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels"); | |||
// Add memcpy_addr_async node. | |||
rtFeatureType_t feature_type = FEATURE_TYPE_MEMCPY; | |||
int32_t feature_info = MEMCPY_INFO_SUPPORT_ZEROCOPY; | |||
int64_t value = 0; | |||
rtError_t rt_ret = rtGetRtCapability(feature_type, feature_info, &value); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtGetRtCapability failed."); | |||
return RT_FAILED; | |||
} else { | |||
if (value == RT_CAPABILITY_SUPPORT) { | |||
GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); | |||
MemcpyAddrAsyncPass memcpy_addr; | |||
GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed."); | |||
GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); | |||
} else { | |||
GELOGW("rtGetRtCapability not support memcpy_addr_async."); | |||
} | |||
} | |||
GE_TIMESTAMP_START(AssignMemory); | |||
MemoryAssigner mem_assigner(compute_graph_); | |||
GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_offset_, zero_copy_mem_size_), | |||
"Assign Memory Failed!"); | |||
GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory"); | |||
GE_TIMESTAMP_START(SetInputOutputOffset); | |||
SetInputOutputOffsetPass input_output_offset; | |||
GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); | |||
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run."); | |||
// Compile single op in graph build stage | |||
GE_TIMESTAMP_START(CompileSingleOp); | |||
GE_CHK_STATUS_RET(CompileSingleOp(), "ATC builder CompileSingleOp() return fail."); | |||
@@ -612,6 +612,33 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr | |||
AttrUtils::HasAttr(activate_stream_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE)) { | |||
return false; | |||
} | |||
/// | |||
/// stream_0 --> stream_2 --> stream_3 --> stream_4 | |||
/// /\ | | |||
/// | \/ | |||
/// | stream_1 --> stream_5 --> stream_6 --> stream_7 | |||
/// | /\ | | | |||
/// | | \/ | | |||
/// | |---------- stream_8 | | |||
/// | | | |||
/// |-----------------------------------------------------------| | |||
/// | |||
/// Exit1(S7) Exit2(S7) Exit3(S7) | |||
/// \ / | | |||
/// AddN(S1) NextIteration(S7) | |||
/// | | | |||
/// NextIteration(S1) / | |||
/// | / | |||
/// | / | |||
/// StreamActive(S7) | |||
/// | |||
/// Event between Exit1/Exit2 and AddN should not be optimized | |||
/// | |||
if (IsActiveAfterNextIteration(activate_stream_node)) { | |||
continue; | |||
} | |||
visited_nodes.insert(activate_stream_node); | |||
// nodes in stream link to streamActivate no need to add event before activated node | |||
for (const auto &pre_activate_stream_node : activate_stream_node->GetInNodes()) { | |||
@@ -639,6 +666,18 @@ bool StreamAllocator::IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr | |||
return false; | |||
} | |||
bool StreamAllocator::IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const { | |||
if ((active_node_ptr == nullptr) || active_node_ptr->GetInControlNodes().empty()) { | |||
return false; | |||
} | |||
for (const auto &in_node : active_node_ptr->GetInControlNodes()) { | |||
if ((in_node->GetType() != NEXTITERATION) && (in_node->GetType() != REFNEXTITERATION)) { | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
// Split the stream according to the maximum number of nodes in the stream. | |||
Status StreamAllocator::SplitStreams(vector<set<int64_t>> &split_streams) { | |||
if (enable_single_stream_ || stream_num_ == 0) { | |||
@@ -55,6 +55,7 @@ class StreamAllocator { | |||
Status OptimizeByStreamActivate(); | |||
// Determine if the successor node of RecvNode is directly or indirectly activated by the SendNode precursor node | |||
bool IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const; | |||
bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const; | |||
Status SplitStreams(std::vector<std::set<int64_t>> &split_streams); | |||
bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const; | |||
@@ -86,10 +86,10 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) { | |||
return SUCCESS; | |||
} | |||
Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) { | |||
Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type) { | |||
auto model_manager = ge::ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
Status ret = model_manager->SetDynamicSize(model_id, batch_num); | |||
Status ret = model_manager->SetDynamicSize(model_id, batch_num, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "SetDynamicSize failed"); | |||
return ret; | |||
@@ -486,12 +486,14 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp | |||
/// @brief Get dynamic batch_info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||
Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type) { | |||
auto model_manager = ge::ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info); | |||
Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "GetDynamicBatchInfo failed."); | |||
return ret; | |||
@@ -499,12 +501,30 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::ve | |||
return SUCCESS; | |||
} | |||
Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get combined dynamic dims info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||
auto model_manager = ge::ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
Status ret = model_manager->GetCombinedDynamicDims(model_id, batch_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "GetCombinedDynamicDims failed."); | |||
return ret; | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||
auto model_manager = ge::ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
Status ret = model_manager->GetCurShape(model_id, batch_info); | |||
Status ret = model_manager->GetCurShape(model_id, batch_info, dynamic_type); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "GetCurShape failed"); | |||
GELOGE(ret, "GetCurShape failed"); | |||
return ret; | |||
} | |||
return SUCCESS; | |||
@@ -56,7 +56,7 @@ class GraphExecutor { | |||
Status SetGraphContext(GraphContextPtr graph_context_ptr); | |||
static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num); | |||
static Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||
void SetTrainFlag(bool is_train_graph); | |||
@@ -80,11 +80,22 @@ class GraphExecutor { | |||
/// @brief Get dynamic batch_info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
static Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type); | |||
static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get combined dynamic dims info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
static Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
static Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||
static Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | |||
@@ -104,12 +104,11 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) { | |||
GE_CHECK_NOTNULL(cond_out_desc); | |||
GeTensorDesc pred_desc = cond_out_desc->GetInputDesc(kCondOutputIndex); | |||
GeTensorDesc cond_desc(GeShape(pred_desc.GetShape().GetDims()), pred_desc.GetFormat(), DT_INT32); | |||
// false ==> 0 ==> switch_labels[0] ==> body_leave_index | |||
// true ==> 1 ==> switch_labels[1] ==> body_enter_name | |||
const std::vector<uint32_t> switch_labels = {body_leave_index, body_enter_index}; | |||
NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, cond_desc, switch_labels); | |||
NodePtr switch_node = AddLabelSwitchLeave(cond_graph, cond_leave_name, pred_desc, switch_labels); | |||
if (switch_node == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "Subgraph: %s add label switch failed.", cond_graph->GetName().c_str()); | |||
return FAILED; | |||
@@ -36,20 +36,20 @@ GraphLoader::~GraphLoader() = default; | |||
Status GraphLoader::UnloadModel(uint32_t model_id) { | |||
auto model_manager = ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
GELOGI("UnLoad model begin, model_id:%u.", model_id); | |||
GELOGI("UnLoad model begin, model id:%u.", model_id); | |||
Status ret = model_manager->Stop(model_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "UnloadModel: Stop failed."); | |||
GELOGE(ret, "UnloadModel: Stop failed. model id:%u", model_id); | |||
} | |||
ret = model_manager->Unload(model_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "UnloadModel: Unload failed."); | |||
GELOGE(ret, "UnloadModel: Unload failed. model id:%u", model_id); | |||
CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_UNLOAD); | |||
return ret; | |||
} | |||
GELOGI("UnLoad model success, model_id:%u.", model_id); | |||
GELOGI("UnLoad model success, model id:%u.", model_id); | |||
return SUCCESS; | |||
} | |||
@@ -123,14 +123,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||
Status ret; | |||
try { | |||
if (!CheckInputPathValid(path)) { | |||
GELOGE(PARAM_INVALID, "model path is invalid: %s", path.c_str()); | |||
return PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | |||
return GE_EXEC_MODEL_PATH_INVALID; | |||
} | |||
GELOGI("Load model begin, model path is: %s", path.c_str()); | |||
if (!key_path.empty() && !CheckInputPathValid(key_path)) { | |||
GELOGE(PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||
return PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||
return GE_EXEC_MODEL_KEY_PATH_INVALID; | |||
} | |||
ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | |||
@@ -16,6 +16,7 @@ | |||
#include "graph/load/new_model_manager/cpu_queue_schedule.h" | |||
#include "common/debug/ge_log.h" | |||
#include "common/debug/log.h" | |||
namespace { | |||
const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch | |||
@@ -58,7 +59,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) { | |||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo); | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | |||
@@ -69,7 +70,7 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) { | |||
status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
return SUCCESS; | |||
@@ -84,7 +85,7 @@ Status CpuTaskModelDequeue::Distribute() { | |||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GELOGI("Cpu kernel launch model dequeue task success."); | |||
@@ -98,24 +99,24 @@ Status CpuTaskModelDequeue::Distribute() { | |||
/// @param [in] outside_addrs: model input/output memory addr | |||
/// @return: 0 for success / others for failed | |||
/// | |||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, | |||
std::map<const void *, std::vector<void *>> &outside_addrs) { | |||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||
if ((args_ != nullptr) || (args_size_ > 0)) { | |||
GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | |||
return FAILED; | |||
} | |||
args_size_ = sizeof(AddrMapInfo); | |||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
} | |||
GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | |||
AddrMapInfo addr_map_info; | |||
for (const auto &addrs : outside_addrs) { | |||
addr_map_info.addr_num += addrs.second.size(); | |||
for (auto &addrs : outside_addrs) { | |||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||
addr_map_info.addr_num += virtual_args_addr.second.size(); | |||
} | |||
} | |||
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | |||
@@ -123,38 +124,31 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, | |||
size_t index = 0; | |||
vector<uint64_t> src_addrs; | |||
vector<uint64_t> dst_addrs; | |||
for (const auto &addrs : outside_addrs) { | |||
for (size_t i = 0; i < addrs.second.size(); ++i) { | |||
src_addrs.push_back(mbuf_list.at(index)); | |||
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(addrs.second.at(i)))); | |||
for (auto &addrs : outside_addrs) { | |||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | |||
src_addrs.push_back(mbuf_list.at(index)); | |||
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | |||
} | |||
} | |||
index++; | |||
} | |||
// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | |||
status = rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
} | |||
status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(), | |||
src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
} | |||
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | |||
rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(), | |||
src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); | |||
return RT_ERROR_TO_GE_STATUS(status);) | |||
status = rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
} | |||
GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | |||
status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(), | |||
dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
} | |||
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); | |||
return RT_ERROR_TO_GE_STATUS(status);) | |||
// src_addr_list is init to src_addr, which is the point to src_addrs | |||
if (!src_addrs.empty() && !dst_addrs.empty()) { | |||
@@ -164,10 +158,8 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, | |||
} | |||
status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
} | |||
GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status); | |||
return RT_ERROR_TO_GE_STATUS(status);) | |||
return SUCCESS; | |||
} | |||
@@ -180,7 +172,7 @@ Status CpuTaskZeroCopy::Distribute() { | |||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GELOGI("Cpu kernel launch zero copy task success."); | |||
@@ -225,7 +217,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb | |||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo); | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | |||
@@ -239,7 +231,7 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb | |||
status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
return SUCCESS; | |||
@@ -254,7 +246,7 @@ Status CpuTaskPrepareOutput::Distribute() { | |||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GELOGI("Cpu kernel launch prepare output task success."); | |||
@@ -279,7 +271,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { | |||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | |||
@@ -289,7 +281,7 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) { | |||
status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
return SUCCESS; | |||
@@ -304,7 +296,7 @@ Status CpuTaskModelEnqueue::Distribute() { | |||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GELOGI("Cpu kernel launch model enqueue task success."); | |||
@@ -336,7 +328,7 @@ Status CpuTaskActiveEntry::Distribute() { | |||
rtError_t ret = rtStreamActive(active_stream_, stream_); | |||
if (ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(ret); | |||
} | |||
GELOGI("Cpu kernel launch active entry task success."); | |||
@@ -359,14 +351,14 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) { | |||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | |||
status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
return SUCCESS; | |||
@@ -381,7 +373,7 @@ Status CpuTaskWaitEndGraph::Distribute() { | |||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GELOGI("Cpu kernel launch wait end task success."); | |||
@@ -404,14 +396,14 @@ Status CpuTaskModelRepeat::Init(uint32_t model_id) { | |||
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | |||
status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
return SUCCESS; | |||
@@ -426,7 +418,7 @@ Status CpuTaskModelRepeat::Distribute() { | |||
rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
GELOGI("Cpu kernel launch repeat task success."); | |||
@@ -22,6 +22,7 @@ | |||
#include "common/ge_inner_error_codes.h" | |||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||
#include "graph/load/new_model_manager/zero_copy_offset.h" | |||
#include "runtime/kernel.h" | |||
namespace ge { | |||
@@ -93,7 +94,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { | |||
~CpuTaskZeroCopy() override; | |||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | |||
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, std::vector<void *>> &outside_addrs); | |||
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||
Status Distribute() override; | |||
@@ -487,8 +487,8 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in | |||
size_t proto_size = op_mapping_info.ByteSizeLong(); | |||
bool ret = op_mapping_info.SerializeToString(&proto_str); | |||
if (!ret || proto_size == 0) { | |||
GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||
return PARAM_INVALID; | |||
} | |||
if (dev_mem_load_ != nullptr) { | |||
@@ -499,20 +499,20 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in | |||
rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size) | |||
rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
load_flag_ = true; | |||
@@ -525,8 +525,8 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ | |||
size_t proto_size = op_mapping_info.ByteSizeLong(); | |||
bool ret = op_mapping_info.SerializeToString(&proto_str); | |||
if (!ret || proto_size == 0) { | |||
GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||
return FAILED; | |||
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size); | |||
return PARAM_INVALID; | |||
} | |||
if (dev_mem_unload_ != nullptr) { | |||
@@ -537,20 +537,20 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ | |||
rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size) | |||
rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
load_flag_ = false; | |||
GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); | |||
@@ -588,18 +588,20 @@ Status DataDumper::LoadDumpInfo() { | |||
task.mutable_op()->set_op_type(op_desc->GetType()); | |||
if (dump_properties_.GetDumpMode() == kDumpOutput) { | |||
if (DumpOutput(op_iter, task) != SUCCESS) { | |||
GELOGE(FAILED, "Dump output failed"); | |||
return FAILED; | |||
Status ret = DumpOutput(op_iter, task); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Dump output failed"); | |||
return ret; | |||
} | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
continue; | |||
} | |||
if (dump_properties_.GetDumpMode() == kDumpInput) { | |||
if (op_iter.is_task) { | |||
if (DumpInput(op_iter, task) != SUCCESS) { | |||
GELOGE(FAILED, "Dump input failed"); | |||
return FAILED; | |||
Status ret = DumpInput(op_iter, task); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Dump input failed"); | |||
return ret; | |||
} | |||
} | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
@@ -608,14 +610,14 @@ Status DataDumper::LoadDumpInfo() { | |||
if (dump_properties_.GetDumpMode() == kDumpAll) { | |||
auto ret = DumpOutput(op_iter, task); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Dump output failed when in dumping all"); | |||
return FAILED; | |||
GELOGE(ret, "Dump output failed when in dumping all"); | |||
return ret; | |||
} | |||
if (op_iter.is_task) { | |||
ret = DumpInput(op_iter, task); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Dump input failed when in dumping all"); | |||
return FAILED; | |||
GELOGE(ret, "Dump input failed when in dumping all"); | |||
return ret; | |||
} | |||
} | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
@@ -630,8 +632,8 @@ Status DataDumper::LoadDumpInfo() { | |||
if (!op_list_.empty() || is_op_debug_) { | |||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Execute load dump info failed"); | |||
return FAILED; | |||
GELOGE(ret, "Execute load dump info failed"); | |||
return ret; | |||
} | |||
} | |||
return SUCCESS; | |||
@@ -702,8 +704,8 @@ Status DataDumper::UnloadDumpInfo() { | |||
} | |||
auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Execute unload dump info failed"); | |||
return FAILED; | |||
GELOGE(ret, "Execute unload dump info failed"); | |||
return ret; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -28,14 +28,15 @@ | |||
#include "common/helper/model_helper.h" | |||
#include "common/helper/om_file_helper.h" | |||
#include "common/opskernel/ge_task_info.h" | |||
#include "common/types.h" | |||
#include "common/properties_manager.h" | |||
#include "common/types.h" | |||
#include "framework/common/util.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/load/new_model_manager/aipp_utils.h" | |||
#include "graph/load/new_model_manager/data_dumper.h" | |||
#include "graph/load/new_model_manager/data_inputer.h" | |||
#include "graph/load/new_model_manager/model_utils.h" | |||
#include "graph/load/new_model_manager/aipp_utils.h" | |||
#include "graph/load/new_model_manager/zero_copy_offset.h" | |||
#include "graph/load/new_model_manager/zero_copy_task.h" | |||
#include "graph/model.h" | |||
#include "graph/node.h" | |||
@@ -285,11 +286,20 @@ class DavinciModel { | |||
/// @ingroup ge | |||
/// @brief Get dynamic batch_info | |||
/// @param [out] batch_info | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info) const; | |||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const; | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get combined dynamic dims info | |||
/// @param [out] batch_info | |||
/// @return None | |||
/// | |||
void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const; | |||
void GetCurShape(std::vector<int64_t> &batch_info); | |||
void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||
void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||
@@ -416,7 +426,7 @@ class DavinciModel { | |||
void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args, | |||
size_t size, size_t offset); | |||
void SetDynamicSize(const std::vector<uint64_t> &batch_num); | |||
void SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||
bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | |||
@@ -456,6 +466,9 @@ class DavinciModel { | |||
void *cur_args = static_cast<char *>(args_) + offset; | |||
return cur_args; | |||
} | |||
void SetTotalIOAddrs(vector<void *> &io_addrs) { | |||
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | |||
} | |||
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | |||
int64_t GetFixedAddrsSize(string tensor_name); | |||
void *GetCurrentFixedAddr(int64_t offset) const { | |||
@@ -474,7 +487,8 @@ class DavinciModel { | |||
Status MallocKnownArgs(); | |||
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | |||
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | |||
Status UpdateKnownZeroCopyAddr(vector<void *> &io_addrs, uint32_t args_offset); | |||
Status UpdateKnownZeroCopyAddr(); | |||
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | |||
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | |||
@@ -513,22 +527,6 @@ class DavinciModel { | |||
/// | |||
/// @ingroup ge | |||
/// @brief Save Data address info for ZeroCopy. | |||
/// @param [in] const std::vector<void *> &outside_addrs | |||
/// @return None. | |||
/// | |||
void SetInputOutsideAddr(const std::vector<void *> &outside_addrs); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Save NetOutput address info for ZeroCopy. | |||
/// @param [in] const std::vector<void *> &outside_addrs | |||
/// @return None. | |||
/// | |||
void SetOutputOutsideAddr(const std::vector<void *> &outside_addrs); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Copy Check input size and model op size. | |||
/// @param [in] const int64_t &input_size: input size. | |||
/// @param [in] const int64_t &op_size: model op size. | |||
@@ -564,7 +562,7 @@ class DavinciModel { | |||
/// @param [in] batch_label: batch label for multi-batch scenes | |||
/// @return SUCCESS handle successfully / others handle failed | |||
/// | |||
Status UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> &data_info, bool is_input, | |||
Status UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | |||
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label); | |||
Status CopyInputData(const InputData &input_data, bool device_data = false); | |||
@@ -706,8 +704,7 @@ class DavinciModel { | |||
/// | |||
Status BindInputQueue(); | |||
Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | |||
std::map<const void *, std::vector<void *>> &outside_addrs); | |||
Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||
/// | |||
/// @ingroup ge | |||
@@ -816,8 +813,12 @@ class DavinciModel { | |||
vector<OpDescPtr> variable_op_list_; | |||
std::map<uint32_t, std::pair<int64_t, void *>> input_data_info_; // Virtual address from Data output. | |||
std::map<uint32_t, std::pair<int64_t, void *>> output_data_info_; // Virtual address from NetOutput input. | |||
std::map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||
std::map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||
std::map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||
std::map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||
std::vector<void *> real_virtual_addrs_; | |||
// output op: save cce op actual needed memory size | |||
vector<int64_t> output_memory_size_list_; | |||
@@ -849,9 +850,7 @@ class DavinciModel { | |||
std::mutex outside_addrs_mutex_; | |||
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||
std::set<const void *> copy_only_addrs_; // Address need copy to original place. | |||
// {node_addr, {addr_in_task_args}} | |||
std::map<const void *, std::vector<void *>> input_outside_addrs_; // Key is virtual address from Data. | |||
std::map<const void *, std::vector<void *>> output_outside_addrs_; // Key is virtual address from NetOutput. | |||
// {op_id, batch_label} | |||
std::map<int64_t, std::string> zero_copy_op_id_batch_label_; | |||
// {batch_label, addrs} | |||
@@ -920,8 +919,13 @@ class DavinciModel { | |||
int64_t total_fixed_addr_size_ = 0; | |||
std::map<const void *, void *> knonw_input_data_info_; | |||
std::map<const void *, void *> knonw_output_data_info_; | |||
vector<void *> total_io_addrs_; | |||
vector<void *> orig_total_io_addrs_; | |||
bool base_addr_not_changed_ = false; | |||
vector<vector<int64_t>> batch_info_; | |||
std::vector<std::vector<int64_t>> combined_batch_info_; | |||
int32_t dynamic_type_ = 0; | |||
vector<uint64_t> batch_size_; | |||
// key: input tensor name, generally rts op; | |||
@@ -70,11 +70,11 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||
auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size()); | |||
rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
return RT_FAILED;) | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;) | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr)); | |||
param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; | |||
// In the scene of loading once and running many times, the kernel needs to be destroyed many times, | |||
@@ -84,64 +84,64 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||
rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "malloc device memory failed."); | |||
GELOGE(RT_FAILED, "malloc device memory failed. ret: 0x%X", rt_ret); | |||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = | |||
rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "memory copy to device failed."); | |||
GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret); | |||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | |||
GE_CHK_RT(rtFree(devicebase)); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rtStream_t stream = nullptr; | |||
rt_ret = rtStreamCreate(&stream, 0); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "create stream failed."); | |||
GELOGE(RT_FAILED, "create stream failed. ret: 0x%X", rt_ret); | |||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | |||
GE_CHK_RT(rtFree(devicebase)); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "rtKernelLaunchEx failed."); | |||
GELOGE(RT_FAILED, "rtKernelLaunchEx failed. ret: 0x%X", rt_ret); | |||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | |||
GE_CHK_RT(rtFree(devicebase)); | |||
GE_CHK_RT(rtStreamDestroy(stream)); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtStreamSynchronize(stream); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "rtStreamSynchronize failed."); | |||
GELOGE(RT_FAILED, "rtStreamSynchronize failed. ret: 0x%X", rt_ret); | |||
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); | |||
GE_CHK_RT(rtFree(devicebase)); | |||
GE_CHK_RT(rtStreamDestroy(stream)); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (aicpu_kernel_addr != nullptr) { | |||
rt_ret = rtFree(aicpu_kernel_addr); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "free memory failed."); | |||
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(devicebase)); | |||
GE_CHK_RT(rtStreamDestroy(stream)); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
rt_ret = rtFree(devicebase); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "free memory failed."); | |||
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); | |||
GE_CHK_RT(rtStreamDestroy(stream)); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtStreamDestroy(stream); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "rtStreamDestroy failed."); | |||
return FAILED; | |||
GELOGE(RT_FAILED, "rtStreamDestroy failed. ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -168,8 +168,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||
std::lock_guard<std::mutex> lock(map_mutex_); | |||
auto it = model_map_.find(model_id); | |||
if (it == model_map_.end()) { | |||
GELOGE(PARAM_INVALID, "model id %u does not exists.", model_id); | |||
return PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | |||
return GE_EXEC_MODEL_ID_INVALID; | |||
} | |||
uint64_t session_id = it->second->GetSessionId(); | |||
GELOGI("Destroy aicpu session for infer, session id is %u.", session_id); | |||
@@ -223,10 +223,11 @@ Status ModelManager::SetDevice(int32_t deviceId) const { | |||
return SUCCESS; | |||
} | |||
ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num) { | |||
ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, | |||
int32_t dynamic_type) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHECK_NOTNULL(davinci_model); | |||
davinci_model->SetDynamicSize(batch_num); | |||
davinci_model->SetDynamicSize(batch_num, dynamic_type); | |||
return SUCCESS; | |||
} | |||
@@ -332,8 +333,8 @@ Status ModelManager::DeleteModel(uint32_t id) { | |||
} else if (hybrid_model_it != hybrid_model_map_.end()) { | |||
(void)hybrid_model_map_.erase(hybrid_model_it); | |||
} else { | |||
GELOGE(PARAM_INVALID, "model id %u does not exists.", id); | |||
return PARAM_INVALID; | |||
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); | |||
return GE_EXEC_MODEL_ID_INVALID; | |||
} | |||
return SUCCESS; | |||
@@ -386,7 +387,7 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d | |||
std::shared_ptr<DavinciModel> model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid Model ID %u in InputData! ", model_id); | |||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); | |||
GE_IF_BOOL_EXEC(model->GetDataInputTid() == 0, model->SetDataInputTid(mmGetTid())); | |||
@@ -422,7 +423,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||
for (size_t i = 0; i < inputs.size(); ++i) { | |||
DataBuffer data; | |||
data.data = inputs[i].data; | |||
data.length = static_cast<uint32_t>(inputs[i].length); | |||
data.length = inputs[i].length; | |||
input_data.blobs.push_back(data); | |||
} | |||
@@ -442,7 +443,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||
return SUCCESS; | |||
} | |||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid Model ID %u in InputData! ", model_id); | |||
GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); | |||
DataInputer *inputer = model->GetDataInputer(); | |||
GE_CHECK_NOTNULL(inputer); | |||
@@ -472,7 +473,7 @@ Status ModelManager::Start(uint32_t model_id) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to start! ", model_id); | |||
Status status = davinci_model->ModelRunStart(); | |||
if (status == SUCCESS) { | |||
@@ -499,7 +500,7 @@ Status ModelManager::Stop(uint32_t model_id) { | |||
} | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to stop!", model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to stop!", model_id); | |||
Status status = davinci_model->ModelRunStop(); | |||
if (status == SUCCESS) { | |||
@@ -661,7 +662,7 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz | |||
} | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid Model ID %u !", | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid model id %u!", | |||
model_id); | |||
max_size = davinci_model->TotalMemSize(); | |||
@@ -671,8 +672,8 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz | |||
Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", | |||
model_id); | |||
return davinci_model->GetInputOutputDescInfo(input_desc, output_desc); | |||
} | |||
@@ -682,8 +683,8 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | |||
bool new_model_desc) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | |||
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | |||
davinci_model->SetModelDescVersion(new_model_desc); | |||
@@ -697,18 +698,35 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) { | |||
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | |||
"GetDynamicBatchInfo failed, Invalid model id %u!", model_id); | |||
return davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get combined dynamic dims info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vector<int64_t>> &batch_info) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetDynamicBatchInfo Failed, Invalid Model ID %u !", | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetCombinedDynamicDims Failed, Invalid Model ID %u!", | |||
model_id); | |||
return davinci_model->GetDynamicBatchInfo(batch_info); | |||
davinci_model->GetCombinedDynamicDims(batch_info); | |||
return SUCCESS; | |||
} | |||
Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info) { | |||
Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHECK_NOTNULL(davinci_model); | |||
davinci_model->GetCurShape(batch_info); | |||
davinci_model->GetCurShape(batch_info, dynamic_type); | |||
return SUCCESS; | |||
} | |||
@@ -724,8 +742,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||
std::vector<uint32_t> &inputFormats, | |||
std::vector<uint32_t> &outputFormats) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", | |||
model_id); | |||
return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | |||
} | |||
@@ -767,8 +785,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { | |||
Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | |||
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, | |||
"input key file path is not valid, %s", strerror(errno)); | |||
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, GE_EXEC_MODEL_KEY_PATH_INVALID, | |||
"input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); | |||
GenModelId(&model_id); | |||
shared_ptr<DavinciModel> davinci_model = nullptr; | |||
@@ -786,11 +804,11 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
try { | |||
davinci_model = std::make_shared<DavinciModel>(model.priority, listener); | |||
} catch (std::bad_alloc &) { | |||
GELOGE(FAILED, "Make shared failed"); | |||
return FAILED; | |||
GELOGE(MEMALLOC_FAILED, "Make shared failed"); | |||
return MEMALLOC_FAILED; | |||
} catch (...) { | |||
GELOGE(FAILED, "Make shared failed since other exception raise"); | |||
return FAILED; | |||
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); | |||
return INTERNAL_ERROR; | |||
} | |||
ret = davinci_model->Assign(ge_model); | |||
if (ret != SUCCESS) { | |||
@@ -803,7 +821,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
rtError_t rt_ret = rtGetDevice(&device_id); | |||
if (rt_ret != RT_ERROR_NONE || device_id < 0) { | |||
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
davinci_model->SetDeviceId(device_id); | |||
davinci_model->SetOmName(model.om_name); | |||
@@ -851,8 +869,9 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | |||
const std::vector<uint32_t> &input_queue_ids, | |||
const std::vector<uint32_t> &output_queue_ids) { | |||
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, PARAM_INVALID, | |||
"input key file path is not valid, %s", strerror(errno)); | |||
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, | |||
GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s", | |||
model_data.key.c_str(), strerror(errno)); | |||
ModelHelper model_helper; | |||
Status ret = model_helper.LoadModel(model_data); | |||
@@ -863,8 +882,8 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||
shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(model_data.priority, nullptr); | |||
if (davinci_model == nullptr) { | |||
GELOGE(FAILED, "create model failed."); | |||
return FAILED; | |||
GELOGE(MEMALLOC_FAILED, "create model failed."); | |||
return MEMALLOC_FAILED; | |||
} | |||
ret = davinci_model->Assign(model_helper.GetGeModel()); | |||
@@ -916,7 +935,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d | |||
Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, | |||
OutputData &output_data) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); | |||
if (davinci_model->NeedDestroyAicpuKernel()) { | |||
GELOGI("Start to destroy specified aicpu kernel."); | |||
@@ -973,29 +992,30 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me | |||
auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data); | |||
if (partition_table->num == 1) { | |||
GELOGE(FAILED, "om model is error,please use executable om model"); | |||
return FAILED; | |||
GELOGE(GE_EXEC_MODEL_PARTITION_NUM_INVALID, "om model is error,please use executable om model"); | |||
return GE_EXEC_MODEL_PARTITION_NUM_INVALID; | |||
} | |||
ModelPartition task_partition; | |||
if (om_file_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { | |||
GELOGE(FAILED, "get task model partition failed."); | |||
return FAILED; | |||
GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "get task model partition failed."); | |||
return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||
} | |||
std::shared_ptr<domi::ModelTaskDef> model_task_def = MakeShared<domi::ModelTaskDef>(); | |||
if (model_task_def == nullptr) { | |||
return FAILED; | |||
return MEMALLOC_FAILED; | |||
} | |||
if (task_partition.size != 0) { | |||
if (!ReadProtoFromArray(task_partition.data, static_cast<int>(task_partition.size), model_task_def.get())) { | |||
GELOGE(FAILED, "ReadProtoFromArray failed."); | |||
return FAILED; | |||
GELOGE(GE_EXEC_LOAD_TASK_PARTITION_FAILED, "ReadProtoFromArray failed."); | |||
return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||
} | |||
} | |||
ModelPartition partition_weight; | |||
ret = om_file_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition_weight); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Get weight partition failed. ret = %u", ret); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, | |||
"Get weight partition failed. ret = %u", ret); | |||
mem_size = model_task_def->memory_size(); | |||
weight_size = partition_weight.size; | |||
@@ -187,9 +187,19 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
/// @brief Get dynamic batch_info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get combined dynamic dims info | |||
/// @param [in] model_id | |||
/// @param [out] batch_info | |||
/// @return execute result | |||
/// | |||
ge::Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info); | |||
/// | |||
/// @ingroup ge | |||
@@ -215,13 +225,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
std::vector<uint32_t> &inputFormats, | |||
std::vector<uint32_t> &outputFormats); | |||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info); | |||
ge::Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||
ge::Status GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info); | |||
ge::Status SetDevice(int32_t deviceId) const; | |||
ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num); | |||
ge::Status SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||
/// | |||
/// @ingroup domi_ome | |||
@@ -56,6 +56,7 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | |||
if (tensor_size) { | |||
v_input_size.push_back(tensor_size); | |||
} | |||
GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); | |||
continue; | |||
} | |||
@@ -64,6 +65,8 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { | |||
GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); | |||
continue); | |||
GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); | |||
v_input_size.push_back(tensor_size); | |||
} | |||
@@ -34,7 +34,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id()); | |||
return FAILED; | |||
return ret; | |||
} | |||
model_ = davinci_model->GetRtModelHandle(); | |||
@@ -53,14 +53,14 @@ Status EndGraphTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtEndGraphEx failed, ret: 0x%x", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} else { | |||
GELOGI("Start to call rtEndGraph"); | |||
rtError_t rt_ret = rtEndGraph(model_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rtEndGraph failed, ret: 0x%x", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
@@ -69,7 +69,7 @@ Status EndGraphTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
task_id_ = task_id; | |||
stream_id_ = stream_id; | |||
@@ -49,7 +49,7 @@ Status EventRecordTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtEventRecord(event_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
@@ -51,13 +51,13 @@ Status EventWaitTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtStreamWaitEvent(stream_, event_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtEventReset(event_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
@@ -40,7 +40,7 @@ Status FusionStartTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtKernelFusionStart(stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGI("FusionStartTaskInfo Distribute Success."); | |||
@@ -40,7 +40,7 @@ Status FusionStopTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtKernelFusionEnd(stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGI("FusionStopTaskInfo Distribute Success."); | |||
@@ -73,24 +73,24 @@ Status HcclTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_m | |||
// Only in Horovod scenario should get the inputName and GeShape | |||
ret = HcomOmeUtil::GetHorovodInputs(op_desc_, kernel_hccl_infos_); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); | |||
return FAILED; | |||
GELOGE(ret, "davinci_model: GetHorovodInputs fail! domi error: %u", ret); | |||
return ret; | |||
} | |||
Status dmrt = HcomOmeUtil::GetHcclDataType(op_desc_, kernel_hccl_infos_); | |||
if (dmrt != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); | |||
return FAILED; | |||
GELOGE(dmrt, "davinci_model: GetHcomDataType fail! domi error: %u", dmrt); | |||
return dmrt; | |||
} | |||
dmrt = HcomOmeUtil::GetHcclCount(op_desc_, kernel_hccl_infos_); | |||
if (dmrt != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); | |||
return FAILED; | |||
GELOGE(dmrt, "davinci_model: GetHcomCount fail! domi error: %u", dmrt); | |||
return dmrt; | |||
} | |||
// Only HCOMBROADCAST and HVDCALLBACKBROADCAST need to get the rootId | |||
dmrt = HcomOmeUtil::GetAllRootId(op_desc_, kernel_hccl_infos_); | |||
if (dmrt != SUCCESS) { | |||
GELOGE(FAILED, "davinci_model: Get rootId fail! domi error: %u", dmrt); | |||
return FAILED; | |||
GELOGE(dmrt, "davinci_model: Get rootId fail! domi error: %u", dmrt); | |||
return dmrt; | |||
} | |||
// GE's new process: hccl declares the number of streams required, creates a stream by GE, and sends it to hccl | |||
@@ -138,8 +138,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM | |||
uint32_t max_task_count; | |||
ret = rtGetMaxStreamAndTask(RT_NORMAL_STREAM, &max_stream_count, &max_task_count); | |||
if (ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Get max stream and task count by rts failed."); | |||
return FAILED; | |||
GELOGE(RT_FAILED, "Get max stream and task count by rts failed."); | |||
return RT_ERROR_TO_GE_STATUS(ret); | |||
} | |||
max_node_of_hccl_stream_ = max_task_count / kMaxTaskOfStream; | |||
} | |||
@@ -153,8 +153,8 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM | |||
ReuseStream(created_stream_num, davinci_model); | |||
ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Create hccl stream failed."); | |||
return FAILED; | |||
GELOGE(RT_FAILED, "Create hccl stream failed."); | |||
return RT_ERROR_TO_GE_STATUS(ret); | |||
} | |||
} | |||
GELOGI("Initialize hccl slave stream success, hcclStreamNum =%ld", hccl_stream_num); | |||
@@ -179,14 +179,14 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode | |||
rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
// Create slave stream, inactive by default, activated by hccl | |||
rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
(void)rtStreamDestroy(stream); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGD("hccl_stream addr is=%p", stream); | |||
int64_t remain_cap = max_node_of_hccl_stream_ - 1; | |||
@@ -250,8 +250,7 @@ Status HcclTaskInfo::UpdateArgs() { | |||
io_addrs.insert(io_addrs.end(), output_data_addrs_.begin(), output_data_addrs_.end()); | |||
io_addrs.insert(io_addrs.end(), workspace_data_addrs_.begin(), workspace_data_addrs_.end()); | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||
GELOGI("HcclTaskInfo::UpdateArgs success."); | |||
return SUCCESS; | |||
@@ -72,11 +72,11 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
return FAILED;) | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
return FAILED;) | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
} | |||
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||
@@ -113,7 +113,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(input_output_addr)); | |||
void *workspace_base_addr = nullptr; | |||
rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error, ret: Ox%X", rt_ret); return FAILED;); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||
rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), | |||
kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = | |||
@@ -123,20 +124,23 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast<uintptr_t>(ext_info_addr_); | |||
rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
rt_ret = rtMemcpy(kernel_buf_, kernel_buf_size_, static_cast<void *>(&fwk_op_kernel), kernel_buf_size_, | |||
RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
GELOGI("KernelExTaskInfo knonw node Init Success."); | |||
return SUCCESS; | |||
} | |||
// 3. Set workspaceaddr, inputOutputDataAddr | |||
if (CopyTaskInfo(kernel_ex_def, rts_param, op_desc) != SUCCESS) { | |||
GELOGE(FAILED, "copy task info to workspace failed."); | |||
return FAILED; | |||
Status ge_ret = CopyTaskInfo(kernel_ex_def, rts_param, op_desc); | |||
if (ge_ret != SUCCESS) { | |||
GELOGE(ge_ret, "copy task info to workspace failed."); | |||
return ge_ret; | |||
} | |||
const vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc); | |||
@@ -155,11 +159,12 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
auto addrs_size = sizeof(uint64_t) * (io_addrs.size()); | |||
if (addrs_size > 0) { | |||
rtError_t rt_ret = rtMalloc(&input_output_addr_, addrs_size, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); return RT_FAILED;) | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
rt_ret = rtMemcpy(input_output_addr_, addrs_size, io_addrs.data(), addrs_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||
return FAILED;) | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
@@ -177,11 +182,13 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
// 4. Return result | |||
rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
rt_ret = rtMemcpy(kernel_buf_, sizeof(STR_FWK_OP_KERNEL), static_cast<void *>(&fwk_op_kernel), | |||
sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
davinci_model_->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), input_output_addr_, addrs_size, 0); | |||
@@ -254,9 +261,7 @@ Status KernelExTaskInfo::UpdateArgs() { | |||
} | |||
} | |||
} | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||
GELOGI("KernelExTaskInfo::UpdateArgs success."); | |||
return SUCCESS; | |||
} | |||
@@ -286,8 +291,8 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const | |||
rtError_t rt_ret = rtMemcpy(workspace_data_addrs[0], kernel_def.task_info_size(), kernel_def.task_info().data(), | |||
kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "rtMemcpy error: 0x%X", rt_ret); | |||
return FAILED; | |||
GELOGE(RT_FAILED, "rtMemcpy error: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
@@ -298,7 +303,7 @@ Status KernelExTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (davinci_model_ == nullptr) { | |||
@@ -311,7 +316,7 @@ Status KernelExTaskInfo::Distribute() { | |||
rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
task_id_ = task_id; | |||
stream_id_ = stream_id; | |||
@@ -326,7 +331,7 @@ Status KernelExTaskInfo::Release() { | |||
rtError_t rt_ret = rtFree(kernel_buf_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree error, ret: 0x%X", rt_ret); | |||
ret = FAILED; | |||
ret = RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} else { | |||
kernel_buf_ = nullptr; | |||
} | |||
@@ -335,7 +340,7 @@ Status KernelExTaskInfo::Release() { | |||
rtError_t rt_ret = rtFree(input_output_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree error, ret: 0x%X", rt_ret); | |||
ret = FAILED; | |||
ret = RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} else { | |||
input_output_addr_ = nullptr; | |||
} | |||
@@ -344,7 +349,7 @@ Status KernelExTaskInfo::Release() { | |||
rtError_t rt_ret = rtFree(ext_info_addr_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret); | |||
ret = FAILED; | |||
ret = RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} else { | |||
ext_info_addr_ = nullptr; | |||
} | |||
@@ -99,13 +99,13 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", | |||
kernel_def.stub_func().c_str()); | |||
return RT_FAILED;); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||
} else if (kernel_type_ != cce::ccKernelType::AI_CPU) { | |||
rtError_t rt_ret; | |||
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); | |||
return RT_FAILED;); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | |||
} | |||
if (context.origin_op_index_size() > CC_FUSION_OP_MAX) { | |||
@@ -232,7 +232,7 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||
skt_info_.last_dump_flag); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
call_save_dump_ = true; | |||
GE_CHK_STATUS_RET(SKTFinalize(), "Skt finalize failed"); | |||
@@ -241,21 +241,24 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||
// Create super kernel factory | |||
skt::SuperKernelFactory *factory = &skt::SuperKernelFactory::GetInstance(); | |||
// Init super kernel factory | |||
if (factory->Init() != SUCCESS) { | |||
GELOGE(RT_FAILED, "SuperKernelLaunch: SuperKernelFactory init failed"); | |||
return RT_FAILED; | |||
Status ge_ret = factory->Init(); | |||
if (ge_ret != SUCCESS) { | |||
GELOGE(ge_ret, "SuperKernelLaunch: SuperKernelFactory init failed"); | |||
return ge_ret; | |||
} | |||
// Call the fuse API | |||
std::unique_ptr<skt::SuperKernel> superKernel = nullptr; | |||
if (factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel) != SUCCESS) { | |||
GELOGE(RT_FAILED, "SuperKernelLaunch: fuse call failed"); | |||
return RT_FAILED; | |||
ge_ret = factory->FuseKernels(skt_kernel_list, skt_arg_list, skt_info_.last_block_dim, superKernel); | |||
if (ge_ret != SUCCESS) { | |||
GELOGE(ge_ret, "SuperKernelLaunch: fuse call failed"); | |||
return ge_ret; | |||
} | |||
// Launch a super kernel | |||
skt_dump_flag_ = GetDumpFlag(); | |||
if (superKernel->Launch(skt_info_.last_stream, skt_dump_flag_) != SUCCESS) { | |||
GELOGE(RT_FAILED, "SuperKernelLaunch: launch failed"); | |||
return RT_FAILED; | |||
ge_ret = superKernel->Launch(skt_info_.last_stream, skt_dump_flag_); | |||
if (ge_ret != SUCCESS) { | |||
GELOGE(ge_ret, "SuperKernelLaunch: launch failed"); | |||
return ge_ret; | |||
} | |||
GELOGI("SuperKernelLaunch: success[skt_kernel_list size[%zu] skt_arg_list[%zu]]", skt_kernel_list.size(), | |||
skt_arg_list.size()); | |||
@@ -276,9 +279,9 @@ Status KernelTaskInfo::SaveSuperKernelInfo() { | |||
skt_info_.last_dump_flag = dump_flag_; | |||
skt_info_.dump_flag_list.push_back(dump_flag_); | |||
skt_info_.op_desc_list.push_back(op_desc_); | |||
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(dump_args_)); | |||
skt_info_.dump_args_list.push_back(reinterpret_cast<uintptr_t>(skt_dump_args_)); | |||
skt_info_.last_group_key = group_key_; | |||
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(dump_args_); | |||
skt_info_.last_dump_args = reinterpret_cast<uintptr_t>(skt_dump_args_); | |||
skt_info_.last_op = op_desc_; | |||
// last node in a stream, just launch | |||
if (IsMarkedLastNode()) { | |||
@@ -345,15 +348,15 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||
// 1.launch before | |||
ret = SuperKernelLaunch(); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Call SuperKernelLaunch failed!"); | |||
return FAILED; | |||
GELOGE(ret, "Call SuperKernelLaunch failed!"); | |||
return ret; | |||
} | |||
// 2.launch current | |||
rtError_t rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_, | |||
static_cast<rtSmDesc_t *>(sm_desc_), stream_, dump_flag_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return FAILED; | |||
return rt_ret; | |||
} | |||
call_save_dump_ = true; | |||
UpdateTaskId(); | |||
@@ -361,8 +364,8 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||
} else { | |||
ret = SaveSuperKernelInfo(); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "Call SuperKernelLaunch failed!"); | |||
return FAILED; | |||
GELOGE(ret, "Call SuperKernelLaunch failed!"); | |||
return ret; | |||
} | |||
GELOGI("Save Current task [block_dim:%u, size:%zu].", block_dim_, skt_info_.kernel_list.size()); | |||
} | |||
@@ -403,7 +406,7 @@ Status KernelTaskInfo::Distribute() { | |||
} | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
// set for task_id_ | |||
UpdateTaskId(); | |||
@@ -448,9 +451,7 @@ Status KernelTaskInfo::UpdateArgs() { | |||
} | |||
} | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update known node %s zero copy addr failed.", op_desc_->GetName().c_str()); | |||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||
GELOGI("KernelTaskInfo::UpdateArgs success."); | |||
return SUCCESS; | |||
} | |||
@@ -459,26 +460,31 @@ Status KernelTaskInfo::Release() { | |||
if (davinci_model_ != nullptr && davinci_model_->IsKnownNode()) { | |||
return SUCCESS; | |||
} | |||
FreeRtMem(&args_); | |||
FreeRtMem(&superkernel_device_args_addr_); | |||
FreeRtMem(&superkernel_dev_nav_table_); | |||
FreeRtMem(&flowtable_); | |||
FreeRtMem(&custom_info_.input_descs); | |||
FreeRtMem(&custom_info_.input_addrs); | |||
FreeRtMem(&custom_info_.output_descs); | |||
FreeRtMem(&custom_info_.output_addrs); | |||
FreeRtMem(&custom_info_.attr_handle); | |||
FreeRtMem(&aicpu_ext_info_addr_); | |||
rtContext_t ctx = nullptr; | |||
rtError_t ret = rtCtxGetCurrent(&ctx); | |||
if (ret == RT_ERROR_NONE) { | |||
FreeRtMem(&args_); | |||
FreeRtMem(&superkernel_device_args_addr_); | |||
FreeRtMem(&superkernel_dev_nav_table_); | |||
FreeRtMem(&flowtable_); | |||
FreeRtMem(&custom_info_.input_descs); | |||
FreeRtMem(&custom_info_.input_addrs); | |||
FreeRtMem(&custom_info_.output_descs); | |||
FreeRtMem(&custom_info_.output_addrs); | |||
FreeRtMem(&custom_info_.attr_handle); | |||
FreeRtMem(&aicpu_ext_info_addr_); | |||
} | |||
if (ctx_.argsOffset != nullptr) { | |||
delete[] ctx_.argsOffset; | |||
ctx_.argsOffset = nullptr; | |||
} | |||
rtError_t ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE; | |||
ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE; | |||
if (ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", static_cast<int>(ret)); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(ret); | |||
} | |||
sm_desc_ = nullptr; | |||
@@ -508,13 +514,13 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||
rtError_t rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
@@ -591,14 +597,14 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
// copy orign args | |||
rt_ret = rtMemcpy(args_, args_size_, kernel_def.args().data(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
vector<uint8_t> args_info(args_size_); | |||
errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_); | |||
@@ -617,7 +623,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
kAddrLen * tensor_device_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(), | |||
kAddrLen * tensor_device_addrs.size()); | |||
@@ -625,16 +631,17 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
return FAILED; | |||
} | |||
skt_dump_args_ = static_cast<char *>(args_) + offset; | |||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
dump_args_ = static_cast<char *>(args_) + offset; | |||
} | |||
Status ge_ret = UpdateL2Data(kernel_def); | |||
// update origin l2 data | |||
if (UpdateL2Data(kernel_def) != SUCCESS) { | |||
return RT_FAILED; | |||
if (ge_ret != SUCCESS) { | |||
return ge_ret; | |||
} | |||
vector<void *> virtual_io_addrs; // use virtual address for zero copy key. | |||
@@ -698,13 +705,13 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||
rtError_t rt_ret = rtMalloc(&custom_info_.attr_handle, op_attr_size, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtMemcpy(custom_info_.attr_handle, op_attr_size, buffer.GetData(), op_attr_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
// args | |||
@@ -731,14 +738,14 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = | |||
rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
davinci_model_->SetZeroCopyAddr(op_desc, input_data_addrs, input_data_addrs.data(), custom_info_.input_addrs, | |||
@@ -784,7 +791,8 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||
ctx_.genVariableBaseSize = davinci_model_->TotalVarMemSize(); | |||
ctx_.l2ctrlSize = sm_contrl_size; | |||
if (UpdateCceArgs(sm_desc, flowtable, kernel_def) != SUCCESS) { | |||
ret = UpdateCceArgs(sm_desc, flowtable, kernel_def); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "update cce args fail"); | |||
return ret; | |||
} | |||
@@ -800,7 +808,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||
rtError_t rt_ret = rtMalloc(&args_, kernel_def.args_size(), RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size()) | |||
@@ -808,7 +816,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||
rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
// L2 | |||
@@ -816,13 +824,13 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { | |||
rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
return SUCCESS; | |||
@@ -883,7 +891,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", args_size_) | |||
@@ -891,7 +899,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
@@ -912,12 +920,12 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
return FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
@@ -934,7 +942,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||
rtError_t rt_ret = rtMalloc(&custom_info_.input_descs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
for (std::size_t i = 0; i < input_size; ++i) { | |||
@@ -942,7 +950,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
@@ -950,7 +958,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||
rt_ret = rtMalloc(&custom_info_.input_addrs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (!input_data_addrs.empty()) { | |||
@@ -958,7 +966,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||
RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
@@ -966,14 +974,14 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||
rt_ret = rtMalloc(&custom_info_.output_descs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
for (std::size_t i = 0; i < output_size; ++i) { | |||
rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.output_descs) + i, sizeof(opTensor_t), | |||
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
@@ -981,7 +989,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||
rt_ret = rtMalloc(&custom_info_.output_addrs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
if (!output_data_addrs.empty()) { | |||
@@ -989,7 +997,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d | |||
RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
} | |||
@@ -1051,8 +1059,8 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl | |||
Status status = | |||
CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); | |||
if (status != SUCCESS) { | |||
GELOGE(FAILED, "Call cce api failed"); | |||
return FAILED; | |||
GELOGE(status, "Call cce api failed"); | |||
return status; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -1118,14 +1126,14 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe | |||
rtError_t rt_ret = rtMalloc(&flowtable_, flowtable.size(), RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "flowtable refresh of cce scence.", flowtable.size()) | |||
rt_ret = rtMemcpy(flowtable_, flowtable.size(), flowtable.data(), flowtable.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
// modify flowtable addr in args | |||
@@ -165,6 +165,7 @@ class KernelTaskInfo : public TaskInfo { | |||
void *aicpu_ext_info_addr_ = nullptr; | |||
// For super kernel | |||
void *skt_dump_args_ = nullptr; | |||
uint32_t skt_id_; | |||
std::string stub_func_name_; | |||
bool is_l1_fusion_enable_; | |||
@@ -59,7 +59,7 @@ Status LabelGotoExTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtLabelGotoEx(label_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGI("LabelGotoExTaskInfo Distribute Success."); | |||
@@ -59,7 +59,7 @@ Status LabelSetTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtLabelSet(label_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGI("LabelSetTaskInfo Distribute Success."); | |||
@@ -98,13 +98,13 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo | |||
rtError_t rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGI("LabelSwitchByIndexTaskInfo Init success, branch max: %u.", branch_max_); | |||
@@ -19,6 +19,10 @@ | |||
#include "framework/common/debug/ge_log.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
namespace { | |||
const uint32_t kAlignBytes = 64; | |||
} | |||
namespace ge { | |||
Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
GELOGI("MemcpyAddrAsyncTaskInfo Init Start"); | |||
@@ -55,39 +59,40 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel | |||
// malloc args memory | |||
size_t args_size = sizeof(void *) * io_addrs.size(); | |||
rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); | |||
rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
args_align_ = reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(args_) / kAlignBytes + 1) * kAlignBytes); | |||
// copy orign src/dst | |||
GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_, args_size, src_, | |||
static_cast<uint8_t *>(args_) + args_size, dst_, io_addrs.size()); | |||
rt_ret = rtMemcpy(args_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
GELOGI("src_args:%p, destMax:%zu, src_:%p, dst_args:%p, dst_:%p, count=%zu", args_align_, args_size, src_, | |||
static_cast<uint8_t *>(args_align_) + args_size, dst_, io_addrs.size()); | |||
rt_ret = rtMemcpy(args_align_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api for src failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
count_ = memcpy_async.count(); | |||
kind_ = memcpy_async.kind(); | |||
dst_max_ = memcpy_async.dst_max(); | |||
GELOGI("InitMemcpyAddrAsyncTaskInfo, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu, args:%p, size:%zu", | |||
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_, args_size); | |||
memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_, args_align_, args_size); | |||
davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_, args_size, 0); | |||
davinci_model->SetZeroCopyAddr(op_desc, io_addrs, io_addrs.data(), args_align_, args_size, 0); | |||
return SUCCESS; | |||
} | |||
Status MemcpyAddrAsyncTaskInfo::Distribute() { | |||
GELOGI("MemcpyAddrAsyncTaskInfo Distribute Start, dst_max:%lu, count:%lu, kind:%u", dst_max_, count_, kind_); | |||
rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_) + sizeof(void *)), | |||
dst_max_, args_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | |||
rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_align_) + sizeof(void *)), | |||
dst_max_, args_align_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
return SUCCESS; | |||
@@ -22,7 +22,8 @@ | |||
namespace ge { | |||
class MemcpyAddrAsyncTaskInfo : public TaskInfo { | |||
public: | |||
MemcpyAddrAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), count_(0), kind_(0) {} | |||
MemcpyAddrAsyncTaskInfo() | |||
: dst_(nullptr), dst_max_(0), src_(nullptr), args_(nullptr), args_align_(nullptr), count_(0), kind_(0) {} | |||
~MemcpyAddrAsyncTaskInfo() override { | |||
src_ = nullptr; | |||
@@ -46,6 +47,7 @@ class MemcpyAddrAsyncTaskInfo : public TaskInfo { | |||
uint64_t dst_max_; | |||
uint8_t *src_; | |||
void *args_; | |||
void *args_align_; | |||
uint64_t count_; | |||
uint32_t kind_; | |||
}; | |||
@@ -68,7 +68,7 @@ Status MemcpyAsyncTaskInfo::Distribute() { | |||
rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return RT_FAILED; | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
GELOGI("MemcpyAsyncTaskInfo Distribute Success"); | |||
@@ -102,8 +102,7 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { | |||
io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | |||
io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||
GE_CHK_STATUS_RET(davinci_model_->UpdateKnownZeroCopyAddr(io_addrs, args_offset_), | |||
"update memcpyasync in known node zero copy addr failed."); | |||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||
GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | |||
return SUCCESS; | |||