@@ -68,11 +68,12 @@ elseif(DEFINED ENV{D_LINK_PATH}) | |||
find_library(slog libslog.so ${GE_LIB_PATH}) | |||
find_library(mmpa libmmpa.so ${GE_LIB_PATH}) | |||
find_library(runtime libruntime.so ${GE_LIB_PATH}) | |||
find_library(msprof libmsprof.so ${GE_LIB_PATH}) | |||
find_library(msprof libmsprofiler.a ${GE_LIB_PATH}) | |||
find_library(register libregister.so ${GE_LIB_PATH}) | |||
find_library(hccl libhccl.so ${GE_LIB_PATH}) | |||
find_library(resource libresource.so ${GE_LIB_PATH}) | |||
find_library(error_manager liberror_manager.so ${GE_LIB_PATH}) | |||
find_library(adump_server libadump_server.a ${GE_LIB_PATH}) | |||
else() | |||
# Ascend mode | |||
if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | |||
@@ -84,7 +85,7 @@ else() | |||
set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) | |||
find_library(slog libslog.so ${ASCEND_DRIVER_DIR}) | |||
find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) | |||
find_library(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) | |||
find_library(msprof libmsprofiler.a ${ASCEND_RUNTIME_DIR}) | |||
find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||
find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||
@@ -1,7 +1,7 @@ | |||
graphengine_add_pkg(securec | |||
VER 1.1.10 | |||
URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz | |||
MD5 0782dd2351fde6920d31a599b23d8c91 | |||
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz | |||
MD5 193f0ca5246c1dd84920db34d2d8249f | |||
LIBS c_sec | |||
PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 | |||
CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release" | |||
@@ -61,8 +61,10 @@ class StringUtils { | |||
/// @param [in] delim separator | |||
/// @return string array after segmentation | |||
/// | |||
/*lint -e1077*/ | |||
static std::vector<std::string> Split(const std::string &str, char delim) { | |||
std::vector<std::string> elems; | |||
/*lint +e1077*/ | |||
if (str.empty()) { | |||
elems.emplace_back(""); | |||
@@ -92,6 +92,9 @@ struct OmgContext { | |||
std::map<std::string, std::vector<int32_t>> out_nodes_map; | |||
// user-designate out nodes (this is used for determing the orders) | |||
std::vector<std::pair<std::string, int32_t>> user_out_nodes; | |||
// save the output node of the network, value = topName, | |||
// topName indicates the output name of the operator. | |||
std::vector<std::string> user_out_nodes_top_vec; | |||
// net out nodes (where user_out_nodes or leaf nodes) | |||
std::vector<std::string> net_out_nodes; | |||
// net out nodes top names(only caffe has top) | |||
@@ -1052,6 +1052,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | |||
// op dynamic input | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_START; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_END; | |||
// functional ops attr | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH; | |||
@@ -235,7 +235,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||
vector<string> GetOpInferDepends() const; | |||
string GetInputNameByIndex(uint32_t index) const; | |||
string GetValidInputNameByIndex(uint32_t index) const; | |||
int GetValidInputIndexByName(const string &name) const; | |||
int GetInputIndexByName(const string &name) const; | |||
string GetOutputNameByIndex(uint32_t index) const; | |||
@@ -22,8 +22,10 @@ | |||
template <class E, class O> | |||
class RangeVistor { | |||
public: | |||
/*lint -e151*/ | |||
using Iterator = typename std::vector<E>::iterator; | |||
using ConstIterator = typename std::vector<E>::const_iterator; | |||
/*lint +e151*/ | |||
RangeVistor(O owner, const std::vector<E> &vs) : owner_(owner), elements_(vs) {} | |||
@@ -41,7 +43,9 @@ class RangeVistor { | |||
bool empty() const { return elements_.empty(); } | |||
/*lint -e659*/ | |||
E &at(std::size_t index) { return elements_.at(index); } | |||
/*lint +e659*/ | |||
const E &at(std::size_t index) const { return elements_.at(index); } | |||
@@ -53,6 +53,7 @@ class OpDescUtils { | |||
static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node); | |||
static graphStatus SetWeights(ge::Node& node, const vector<ge::GeTensorPtr>& weights); | |||
static graphStatus SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr>& weights); | |||
static graphStatus SetWeights(ge::Node& node, const map<int, ge::GeTensorPtr>& weights_map); | |||
static graphStatus ClearWeights(ge::NodePtr node); | |||
static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index); | |||
@@ -28,7 +28,7 @@ using std::unordered_set; | |||
void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); } | |||
graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) { | |||
if (value.IsEmpty()) { | |||
GELOGE(GRAPH_FAILED, "value is empty, key %s", name.c_str()); | |||
GELOGE(GRAPH_FAILED, "value is empty, key of the attr is %s", name.c_str()); | |||
return GRAPH_FAILED; | |||
} | |||
auto proto_map = MutableAttrMap().GetProtoMsg(); | |||
@@ -1060,6 +1060,10 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node"; | |||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | |||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | |||
// op dynamic input | |||
const std::string ATTR_NAME_DYNAMIC_INPUT_START = "_dynamic_input_index_start"; | |||
const std::string ATTR_NAME_DYNAMIC_INPUT_END = "_dynamic_input_index_end"; | |||
// atc user def dtype&format | |||
const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; | |||
const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; | |||
@@ -762,9 +762,10 @@ graphStatus Node::Verify() const { | |||
if (!is_unknown_graph) { | |||
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | |||
GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); | |||
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || | |||
op_->GetType() == const_type || op_->GetType() == variable_type || | |||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||
bool valid_anchor = | |||
op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type || | |||
op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || | |||
op_->MutableInputDesc(in_anchor_ptr->GetIdx()) == nullptr || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||
if (!valid_anchor) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, | |||
{GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | |||
@@ -347,7 +347,10 @@ graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorD | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | |||
OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | |||
GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); | |||
if (index >= inputs_desc_.size()) { | |||
GELOGW("The index is invalid. index[%u]", index); | |||
return GRAPH_FAILED; | |||
} | |||
inputs_desc_[index] = ComGraphMakeShared<GeTensorDesc>(tensor_Desc); | |||
if (inputs_desc_[index] == nullptr) { | |||
@@ -949,6 +952,43 @@ int OpDesc::GetInputIndexByName(const string &name) const { | |||
return static_cast<int>(it_find->second); | |||
} | |||
int OpDesc::GetValidInputIndexByName(const string &name) const { | |||
map<string, uint32_t> valid_input_name_idx{}; | |||
uint32_t j = 0; | |||
for (size_t i = 0; i < GetAllInputsSize(); i++) { | |||
if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) { | |||
auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i)); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), -1); | |||
valid_input_name_idx.insert({valid_name, j}); | |||
j++; | |||
} | |||
} | |||
auto it_find = valid_input_name_idx.find(name); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != valid_input_name_idx.end(), -1); | |||
return static_cast<int>(it_find->second); | |||
} | |||
string OpDesc::GetValidInputNameByIndex(uint32_t index) const { | |||
map<string, uint32_t> valid_input_name_idx{}; | |||
uint32_t j = 0; | |||
for (size_t i = 0; i < GetAllInputsSize(); i++) { | |||
if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) { | |||
auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i)); | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), ""); | |||
valid_input_name_idx.insert({valid_name, j}); | |||
j++; | |||
} | |||
} | |||
auto it = valid_input_name_idx.begin(); | |||
for (; it != valid_input_name_idx.end(); ++it) { | |||
if (it->second == index) { | |||
break; | |||
} | |||
} | |||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != valid_input_name_idx.end(), ""); | |||
return it->first; | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const { | |||
auto it = output_name_idx_.begin(); | |||
for (; it != output_name_idx_.end(); ++it) { | |||
@@ -56,7 +56,7 @@ class RefRelations::Impl { | |||
} | |||
return GRAPH_SUCCESS; | |||
} | |||
GELOGW("can not find any relations! key value is %s", lookup_key.c_str()); | |||
GELOGW("can not find any relations! key value of dest relation is %s", lookup_key.c_str()); | |||
return GRAPH_SUCCESS; | |||
}; | |||
graphStatus BuildRefRelations(ge::ComputeGraph &root_graph); | |||
@@ -560,6 +560,53 @@ OpDescUtils::SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights) | |||
return GRAPH_SUCCESS; | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | |||
OpDescUtils::SetWeights(ge::Node &node, const map<int, ge::GeTensorPtr> &weights_map) { | |||
GE_CHECK_NOTNULL(node.GetOpDesc()); | |||
// 1. node is const | |||
if (node.GetOpDesc()->GetType() == CONSTANT) { | |||
if (weights_map.size() == CONST_OP_NORMAL_WEIGHT_SIZE) { | |||
return SetWeights(node.GetOpDesc(), weights_map.begin()->second); | |||
} | |||
GELOGE(GRAPH_PARAM_INVALID, "const op %s weight size %zu should be 1", node.GetName().c_str(), weights_map.size()); | |||
return GRAPH_PARAM_INVALID; | |||
} | |||
// 2. node is not const | |||
for (const auto &pair : weights_map) { | |||
auto in_data_anchor = node.GetInDataAnchor(pair.first); | |||
if (in_data_anchor != nullptr && in_data_anchor->GetPeerOutAnchor() != nullptr) { | |||
// a. update const input node | |||
auto out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
auto peer_node = out_anchor->GetOwnerNode(); | |||
if (peer_node == nullptr) { | |||
GELOGE(GRAPH_PARAM_INVALID, "op %s [%d]'s input node is null", node.GetName().c_str(), pair.first); | |||
return GRAPH_PARAM_INVALID; | |||
} | |||
if (peer_node->GetType() != CONSTANT) { | |||
GELOGE(GRAPH_PARAM_INVALID, " op %s [%d]'s input node should be const, but is %s type:%s ", | |||
node.GetName().c_str(), pair.first, peer_node->GetName().c_str(), peer_node->GetType().c_str()); | |||
} | |||
SetWeights(peer_node->GetOpDesc(), pair.second); | |||
} else { | |||
// b. create new const input node | |||
auto const_opdesc = CreateConstOp(pair.second); | |||
GE_CHECK_NOTNULL(const_opdesc); | |||
auto owner_graph = node.GetOwnerComputeGraph(); | |||
if (owner_graph == nullptr) { | |||
GELOGE(GRAPH_PARAM_INVALID, "node's graph is empty, name: %s", node.GetName().c_str()); | |||
return GRAPH_PARAM_INVALID; | |||
} | |||
auto const_node = owner_graph->AddNodeFront(const_opdesc); | |||
if (node.AddLinkFrom(static_cast<uint32_t>(pair.first), const_node) != GRAPH_SUCCESS) { | |||
GELOGE(GRAPH_FAILED, "op %s add const to input index[%d] failed", node.GetName().c_str(), pair.first); | |||
return GRAPH_FAILED; | |||
} | |||
} | |||
} | |||
NodeUtils::UpdateIsInputConst(node); | |||
return GRAPH_SUCCESS; | |||
} | |||
OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { | |||
GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!"); | |||
shared_ptr<OpDesc> const_opdesc = ComGraphMakeShared<OpDesc>(); | |||
@@ -229,6 +229,7 @@ target_link_libraries(ge_runner | |||
${resouce} | |||
${ascend_hal} | |||
${adump_server} | |||
${msprofiler} | |||
rt | |||
dl) | |||
@@ -358,7 +359,10 @@ add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HD | |||
target_compile_definitions(ge_compiler PRIVATE | |||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
REUSE_MEMORY=1 | |||
FMK_HOST_INFER) | |||
FMK_HOST_INFER | |||
FMK_SUPPORT_DUMP | |||
COMPILE_OMG_PACKAGE | |||
REUSE_MEMORY=1) | |||
target_link_libraries(ge_compiler | |||
graph | |||
ge_common | |||
@@ -68,5 +68,7 @@ target_link_libraries(ge_client | |||
${mmpa} | |||
${runtime} | |||
${msprof} | |||
${msprofiler} | |||
${ascend_hal} | |||
rt | |||
dl) |
@@ -16,6 +16,7 @@ | |||
#include "ge/ge_api.h" | |||
#include <iostream> | |||
#include <malloc.h> | |||
#include "common/debug/log.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "common/ge/datatype_util.h" | |||
@@ -163,6 +164,9 @@ Status GEFinalize() { | |||
g_ge_initialized = false; | |||
} | |||
// to avoid memory fragment, use malloc_trim to back free stack to system | |||
malloc_trim(0); | |||
GELOGT(TRACE_STOP, "GEFinalize finished"); | |||
return ret; | |||
} | |||
@@ -70,9 +70,10 @@ LOCAL_SHARED_LIBRARIES := \ | |||
libregister \ | |||
libge_compiler \ | |||
libge_common \ | |||
libmsprof | |||
libmsprof \ | |||
stub/libascend_hal | |||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
@@ -107,6 +108,7 @@ LOCAL_SHARED_LIBRARIES := \ | |||
libge_common \ | |||
libmsprof | |||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
LOCAL_CFLAGS += \ | |||
@@ -172,18 +172,18 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
return RT_FAILED; | |||
} | |||
constexpr int32_t ioAddrNum = 2; | |||
constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t); | |||
char args[argsSize] = {0}; | |||
auto paramHead = reinterpret_cast<aicpu::AicpuParamHead *>(args); | |||
paramHead->length = argsSize; | |||
paramHead->ioAddrNum = ioAddrNum; | |||
auto ioAddr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead)); | |||
ioAddr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_); | |||
ioAddr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_); | |||
constexpr int32_t io_addr_num = 2; | |||
constexpr uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addr_num * sizeof(uint64_t); | |||
char args[args_size] = {0}; | |||
auto param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args); | |||
param_head->length = args_size; | |||
param_head->ioAddrNum = io_addr_num; | |||
auto io_addr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead)); | |||
io_addr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_); | |||
io_addr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_); | |||
rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, | |||
1, // blockDim default 1 | |||
args, argsSize, | |||
args, args_size, | |||
nullptr, // no need smDesc | |||
stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
@@ -34,7 +34,7 @@ std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_ty | |||
{ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, | |||
{ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; | |||
static const std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = { | |||
std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = { | |||
// key:ge datatype,value:proto datatype | |||
{ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, | |||
{ge::DT_FLOAT, ge::proto::DT_FLOAT}, | |||
@@ -51,12 +51,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana | |||
return profiling_manager; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options, | |||
bool convert_2_phy_device_id) { | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
vector<int32_t>().swap(device_id_); | |||
job_id_ = options.job_id; | |||
GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); | |||
Status ret; | |||
if (!recv_profiling_config_.empty()) { | |||
GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); | |||
@@ -64,18 +65,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
} else { | |||
ret = InitFromOptions(options); | |||
if (ret == SUCCESS && is_load_profiling_) { | |||
// profiling need phy device id | |||
if (!convert_2_phy_device_id) { | |||
device_id_.push_back(options.device_id); | |||
} else { | |||
uint32_t phy_device_id = 0; | |||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||
return FAILED; | |||
} | |||
device_id_.push_back(phy_device_id); | |||
} | |||
device_id_.push_back(options.device_id); | |||
} | |||
} | |||
if (ret != SUCCESS) { | |||
@@ -557,25 +547,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
return; | |||
} | |||
GELOGI("current logic_device_id:%d", logic_device_id); | |||
uint32_t phy_device_id = 0; | |||
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||
return; | |||
} | |||
GELOGI("current phy_device_id:%d", phy_device_id); | |||
if (!is_acl_api_mode_) { | |||
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||
auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||
if (ret == device_id_.end()) { | |||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | |||
return; | |||
} | |||
} | |||
GELOGI("start ProfilingTaskDescInfo."); | |||
ProfilingTaskDescInfo(task_desc_info, phy_device_id); | |||
ProfilingTaskDescInfo(task_desc_info, logic_device_id); | |||
GELOGI("start ProfilingGraphDescInfo."); | |||
ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id); | |||
ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); | |||
GELOGI("Report profiling data for GE end."); | |||
#endif | |||
} | |||
@@ -69,7 +69,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
ProfilingManager(); | |||
virtual ~ProfilingManager(); | |||
static ProfilingManager &Instance(); | |||
ge::Status Init(const Options &options, bool convert_2_phy_device_id = false); | |||
ge::Status Init(const Options &options); | |||
ge::Status InitFromOptions(const Options &options); | |||
ge::Status InitFromAclCfg(const std::string &config); | |||
ge::Status StartProfiling(int32_t iter, int32_t device_id); | |||
@@ -472,7 +472,7 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str | |||
return true; | |||
} | |||
ret = regexec(®, str.c_str(), 0, nullptr, 0); | |||
ret = regexec(®, str.c_str(), 0, NULL, 0); | |||
if (ret) { | |||
regerror(ret, ®, ebuff, kMaxBuffSize); | |||
GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff); | |||
@@ -120,6 +120,7 @@ target_link_libraries(ge_executor | |||
${mmpa} | |||
${msprof} | |||
${error_manager} | |||
${ascend_hal} | |||
rt | |||
dl) | |||
@@ -89,6 +89,7 @@ local_ge_executor_shared_library := \ | |||
libregister \ | |||
libmsprof \ | |||
liberror_manager \ | |||
libascend_hal | |||
local_ge_executor_ldflags := -lrt -ldl \ | |||
@@ -104,6 +105,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files) | |||
LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | |||
LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) | |||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||
ifeq ($(device_os),android) | |||
LOCAL_LDFLAGS += -ldl | |||
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | |||
@@ -140,6 +142,9 @@ LOCAL_SHARED_LIBRARIES := \ | |||
libregister \ | |||
libmsprof \ | |||
liberror_manager \ | |||
stub/libascend_hal | |||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||
LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | |||
@@ -355,7 +355,7 @@ LOCAL_MODULE := libge_compiler | |||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||
# from ome_inference.mk | |||
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP | |||
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE | |||
ifeq ($(DEBUG), 1) | |||
LOCAL_CFLAGS += -g -O0 | |||
endif | |||
@@ -418,7 +418,7 @@ include $(CLEAR_VARS) | |||
LOCAL_MODULE := libge_compiler | |||
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE | |||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 | |||
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP | |||
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE | |||
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION | |||
LOCAL_CFLAGS += -O2 | |||
LOCAL_MODULE_CLASS := SHARED_LIBRARIES | |||
@@ -42,7 +42,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) | |||
######### libge_local_engine.so ############# | |||
add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||
target_compile_definitions(ge_local_engine PRIVATE Werror) | |||
target_compile_definitions(ge_local_engine PRIVATE Werror COMPILE_OMG_PACKAGE) | |||
target_link_libraries(ge_local_engine | |||
graph | |||
${PROTOBUF_LIBRARY} | |||
@@ -42,7 +42,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := atclib/libge_local_engine | |||
LOCAL_CFLAGS += -Werror | |||
LOCAL_CFLAGS += -std=c++11 | |||
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE | |||
LOCAL_LDFLAGS := | |||
LOCAL_STATIC_LIBRARIES := | |||
@@ -356,6 +356,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||
LOCAL_STATIC_LIBRARIES := libge_memory \ | |||
libadump_server \ | |||
libmsprofiler \ | |||
LOCAL_SHARED_LIBRARIES := \ | |||
libc_sec \ | |||
@@ -136,6 +136,13 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen | |||
bool attr) { | |||
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | |||
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | |||
auto format = tensor.GetFormat(); | |||
auto data_type = tensor.GetDataType(); | |||
if (format == FORMAT_RESERVED && data_type == DT_UNDEFINED) { | |||
return SUCCESS; | |||
} | |||
string op_type; | |||
if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { | |||
op_type = DATA; | |||
@@ -521,8 +528,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | |||
bool is_offline) { | |||
GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | |||
if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { | |||
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize()); | |||
if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) { | |||
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize()); | |||
return PARAM_INVALID; | |||
} | |||
if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { | |||
@@ -322,11 +322,19 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
GELOGE(ge::FAILED, | |||
"There is an atomic conflict between the current node and the peer out node, not supported!"); | |||
return ge::FAILED; | |||
} else if (is_loop_graph) { | |||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); | |||
} else { | |||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), | |||
"SetAtomicCleanAttr failed."); | |||
} | |||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||
GE_CHECK_NOTNULL(in_control_anchor); | |||
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||
if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | |||
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); | |||
return ret; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
@@ -840,68 +848,37 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt | |||
} | |||
Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
GE_CHECK_NOTNULL(compute_graph_); | |||
// Atomic op memory start addr | |||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||
GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); | |||
vector<NodePtr> connect_netoutput_nodes; | |||
for (auto &node : compute_graph_->GetAllNodes()) { | |||
auto node_op_desc = node->GetOpDesc(); | |||
if (node_op_desc == nullptr) { | |||
continue; | |||
} | |||
bool is_atomic = false; | |||
// If GetBool fail, is_atomic is false. | |||
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | |||
if (!is_atomic) { | |||
continue; | |||
} | |||
bool is_ref = false; | |||
// If GetBool fail, is_ref is false. | |||
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref); | |||
if (is_ref) { | |||
GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.", | |||
node_op_desc->GetName().c_str()); | |||
return ge::PARAM_INVALID; | |||
} | |||
vector<int> is_connect_netoutput; | |||
// If GetBool fail, attr is_connect_netoutput is an empty vector. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); | |||
if (!is_connect_netoutput.empty()) { | |||
connect_netoutput_nodes.emplace_back(node); | |||
continue; | |||
} | |||
map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map; | |||
vector<NodePtr> connecting_output_atomic_nodes; | |||
Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "Failed to filter atomic nodes for memory assignment."); | |||
return status; | |||
} | |||
// Atomic op memory start addr of loop graph | |||
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||
vector<int64_t> mem_offset_end; | |||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
for (auto &iter : normal_atomic_and_clean_nodes_map) { | |||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||
GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start); | |||
/// In networks with loop op, atomic op uses atomic_addr_clean op independently, | |||
/// so we need to set the attr separately. | |||
if (is_loop_graph) { | |||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start)); | |||
for (auto &atomic_node : iter.second) { | |||
vector<int64_t> mem_offset_end; | |||
status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", | |||
atomic_node->GetName().c_str()); | |||
return status; | |||
} | |||
} | |||
} | |||
// In networks without loop op, the same atomic addr clean op is used for atomic op | |||
if (!is_loop_graph) { | |||
// Set the address attr of atomic clean operator | |||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||
if (atomic_mem_size != 0) { | |||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), | |||
"SetAtomicCleanAttr failed."); | |||
int64_t atomic_mem_size = static_cast<int64_t>(memory_offset_[0].mem_offset_) - atomic_mem_start; | |||
status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}); | |||
if (status != SUCCESS) { | |||
GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||
return status; | |||
} | |||
} | |||
if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { | |||
if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) { | |||
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | |||
return FAILED; | |||
} | |||
@@ -909,6 +886,55 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map, | |||
vector<NodePtr> &connecting_output_atomic_nodes) { | |||
GE_CHECK_NOTNULL(compute_graph_); | |||
for (const auto &node : compute_graph_->GetAllNodes()) { | |||
if (node->GetType() == ATOMICADDRCLEAN) { | |||
vector<NodePtr> tmp_normal_atomic_nodes; | |||
const auto &out_control_anchor = node->GetOutControlAnchor(); | |||
GE_CHECK_NOTNULL(out_control_anchor); | |||
for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { | |||
if (peer_in_control_anchor != nullptr) { | |||
auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); | |||
auto peer_in_node_desc = peer_in_node->GetOpDesc(); | |||
if (peer_in_node_desc != nullptr) { | |||
bool is_atomic_node = false; | |||
// If GetBool fail, is_atomic_node is false. | |||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); | |||
if (is_atomic_node) { | |||
bool is_reference = false; | |||
// If GetBool fail, is_reference is false. | |||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | |||
if (is_reference) { | |||
GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.", | |||
peer_in_node_desc->GetName().c_str()); | |||
return ge::PARAM_INVALID; | |||
} | |||
vector<int> is_connecting_output; | |||
// If GetBool fail, attr is_connecting_output is an empty vector. | |||
(void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output); | |||
if (is_connecting_output.empty()) { | |||
tmp_normal_atomic_nodes.emplace_back(peer_in_node); | |||
continue; | |||
} | |||
connecting_output_atomic_nodes.emplace_back(peer_in_node); | |||
tmp_normal_atomic_nodes.clear(); | |||
break; | |||
} | |||
} | |||
} | |||
} | |||
if (!tmp_normal_atomic_nodes.empty()) { | |||
normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes; | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, | |||
vector<int64_t> &mem_offset_end) { | |||
auto node_op_desc = node->GetOpDesc(); | |||
@@ -1331,6 +1357,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||
vector<int64_t> memory_type; | |||
auto tmp_op_desc = node->GetOpDesc(); | |||
origin_input_list = tmp_op_desc->GetInputOffset(); | |||
int64_t valid_input_index = 0; | |||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | |||
for (const auto &anchor : node->GetAllInDataAnchors()) { | |||
vector<int64_t> output_list; | |||
@@ -1344,8 +1371,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||
auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(last_peer_out_op_desc); | |||
output_list = last_peer_out_op_desc->GetOutputOffset(); | |||
if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) { | |||
auto input_index = anchor->GetIdx(); | |||
auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx()); | |||
if (output_list.size() > static_cast<size_t>(out_index)) { | |||
int64_t input_offset = output_list.at(out_index); | |||
if (has_mem_type_attr) { | |||
auto input_size = tmp_op_desc->GetInputsSize(); | |||
auto ori_input_offset_list_size = origin_input_list.size(); | |||
@@ -1359,26 +1387,21 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||
} | |||
// not hbm keep orignal inputoffest | |||
// hbm inputoffset = original inputoffset + outputoffset | |||
input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1 | |||
? origin_input_list[input_index] | |||
: origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx())); | |||
GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", | |||
tmp_op_desc->GetName().c_str(), input_index, | |||
peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | |||
input_list.back()); | |||
} else { | |||
int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); | |||
const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); | |||
if (in_node->GetType() == CONSTANT) { | |||
GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); | |||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); | |||
} | |||
GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), | |||
input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | |||
output_offset); | |||
input_list.emplace_back(output_offset); | |||
input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 | |||
? origin_input_list[valid_input_index] | |||
: origin_input_list[valid_input_index] + output_list.at(out_index)); | |||
} | |||
const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); | |||
if (in_node->GetType() == CONSTANT) { | |||
GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx())); | |||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); | |||
} | |||
GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||
has_mem_type_attr == true ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index, | |||
peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset); | |||
input_list.emplace_back(input_offset); | |||
valid_input_index++; | |||
} | |||
} | |||
return ge::SUCCESS; | |||
@@ -1473,125 +1496,49 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { | |||
// set the address attr of atomic clean operator for loop graph | |||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||
GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.", | |||
atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_); | |||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||
if (atomic_mem_size != 0 && in_control_anchor != nullptr) { | |||
for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||
if (peer_out_control_anchor == nullptr) { | |||
continue; | |||
} | |||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||
auto peer_out_node_desc = peer_out_node->GetOpDesc(); | |||
if (peer_out_node_desc == nullptr) { | |||
continue; | |||
} | |||
GELOGD("SetLoopGraphAtomicAttr, node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(), | |||
peer_out_node_desc->GetType().c_str()); | |||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | |||
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), | |||
GELOGE(FAILED, "SetAtomicCleanAttr failed."); | |||
return FAILED); | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node, | |||
bool &is_independent_atomic_clean_node) { | |||
GE_CHECK_NOTNULL(node); | |||
const auto &out_control_anchor = node->GetOutControlAnchor(); | |||
GE_CHECK_NOTNULL(out_control_anchor); | |||
for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { | |||
if (peer_in_control_anchor != nullptr) { | |||
auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); | |||
auto peer_in_node_desc = peer_in_node->GetOpDesc(); | |||
if (peer_in_node_desc != nullptr) { | |||
bool is_atomic_node = false; | |||
// If GetBool fail, is_atomic_node is false. | |||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); | |||
if (is_atomic_node) { | |||
vector<int> is_connect_netoutput; | |||
// If GetBool fail, attr is_connect_netoutput is an empty vector. | |||
(void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); | |||
if (!is_connect_netoutput.empty()) { | |||
GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str()); | |||
is_independent_atomic_clean_node = true; | |||
break; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start, | |||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start, | |||
const vector<int64_t> &atomic_mem_size) { | |||
for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||
auto node_op_desc = node->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | |||
bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName()); | |||
if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { | |||
bool is_independent_atomic_clean = false; | |||
if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) { | |||
GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node."); | |||
return PARAM_INVALID; | |||
} | |||
is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean); | |||
auto node_op_desc = node->GetOpDesc(); | |||
if (node_op_desc != nullptr) { | |||
GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); | |||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | |||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | |||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||
node_op_desc->SetWorkspace(workspace_vector); | |||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | |||
std::vector<int64_t> mem_start_vector; | |||
// If GetListInt fail, mem_start_vector is empty. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||
GELOGE(FAILED, "SetListInt failed."); | |||
return FAILED); | |||
std::vector<int64_t> mem_size_vector; | |||
// If GetListInt fail, mem_size_vector is empty. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||
GELOGE(FAILED, "SetListInt failed."); | |||
return FAILED); | |||
std::stringstream ss; | |||
for (auto iter : atomic_mem_start) { | |||
ss << iter << " "; | |||
} | |||
if (is_valid_atomic_clean_node) { | |||
GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); | |||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | |||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | |||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||
node_op_desc->SetWorkspace(workspace_vector); | |||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | |||
std::vector<int64_t> mem_start_vector; | |||
// If GetListInt fail, mem_start_vector is empty. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||
GELOGE(FAILED, "SetListInt failed."); | |||
return FAILED); | |||
std::vector<int64_t> mem_size_vector; | |||
// If GetListInt fail, mem_size_vector is empty. | |||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||
GELOGE(FAILED, "SetListInt failed."); | |||
return FAILED); | |||
std::stringstream ss; | |||
for (auto iter : atomic_mem_start) { | |||
ss << iter << " "; | |||
} | |||
string atomic_mem_start_str = ss.str(); | |||
ss.clear(); | |||
ss.str(""); | |||
for (auto iter : atomic_mem_size) { | |||
ss << iter << " "; | |||
} | |||
string atomic_mem_size_str = ss.str(); | |||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||
string atomic_mem_start_str = ss.str(); | |||
ss.clear(); | |||
ss.str(""); | |||
for (auto iter : atomic_mem_size) { | |||
ss << iter << " "; | |||
} | |||
string atomic_mem_size_str = ss.str(); | |||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -135,6 +135,9 @@ class GraphMemoryAssigner { | |||
ge::Status ReAssignAtomicMemory(bool is_loop_graph); | |||
ge::Status FilterAtomicNodesForMemoryAssign(std::map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map, | |||
std::vector<NodePtr> &connecting_output_atomic_nodes); | |||
ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | |||
int64_t &continuous_mem_size); | |||
@@ -165,14 +168,8 @@ class GraphMemoryAssigner { | |||
ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | |||
const std::vector<int64_t> &mem_offset_end); | |||
/// | |||
/// @brief set loop graph atomic attr | |||
/// @param node, atomic memory assignment start offset | |||
/// @param atomic_mem_start: atomic op memory start address | |||
/// | |||
ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); | |||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start, | |||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start, | |||
const std::vector<int64_t> &atomic_mem_size); | |||
ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | |||
@@ -695,11 +695,7 @@ Status DataDumper::LoadDumpInfo() { | |||
} | |||
if (dump_properties_.GetDumpMode() == kDumpInput) { | |||
if (op_iter.is_task) { | |||
Status ret = DumpInput(op_iter, task); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Dump input failed"); | |||
return ret; | |||
} | |||
GE_CHK_STATUS_RET(DumpInput(op_iter, task), "Dump input failed"); | |||
} | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
continue; | |||
@@ -726,7 +722,7 @@ Status DataDumper::LoadDumpInfo() { | |||
SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | |||
if (!op_list_.empty() || is_op_debug_) { | |||
if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { | |||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Execute load dump info failed"); | |||
@@ -740,7 +736,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||
aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || | |||
dump_properties_.GetDumpMode() == kDumpAll) { | |||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||
aicpu::dump::Task task; | |||
task.set_end_graph(true); | |||
task.set_task_id(end_graph_task_id_); | |||
@@ -748,6 +743,14 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||
task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH); | |||
task.mutable_op()->set_op_type(ENDGRAPH); | |||
op_mapping_info.mutable_task()->Add(std::move(task)); | |||
is_end_graph_ = true; | |||
if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) { | |||
GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u", | |||
op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_); | |||
return; | |||
} | |||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||
} | |||
} | |||
@@ -116,6 +116,7 @@ class DataDumper { | |||
std::vector<InnerDumpInfo> op_list_; | |||
uint32_t end_graph_task_id_ = 0; | |||
uint32_t end_graph_stream_id_ = 0; | |||
bool is_end_graph_ = false; | |||
std::multimap<std::string, InnerInputMapping> input_map_; | |||
bool load_flag_; | |||
uint32_t device_id_; | |||
@@ -1928,13 +1928,7 @@ Status DavinciModel::SinkModelProfile() { | |||
name = name_; | |||
} | |||
size_t name_len = name.size(); | |||
// phy device id | |||
uint32_t phy_device_id = 0; | |||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||
return FAILED); | |||
reporter_data.deviceId = phy_device_id; | |||
reporter_data.deviceId = device_id_; | |||
reporter_data.data = (unsigned char *)&name_len; | |||
reporter_data.dataLen = sizeof(int32_t); | |||
GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", | |||
@@ -2103,12 +2097,7 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | |||
return FAILED, "Sink model tag memcpy error."); | |||
// device id | |||
uint32_t phy_device_id = 0; | |||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||
return FAILED); | |||
reporter_data.deviceId = phy_device_id; | |||
reporter_data.deviceId = device_id_; | |||
// Model Header | |||
string name; | |||
@@ -236,7 +236,6 @@ ModelManager::~ModelManager() { | |||
std::lock_guard<std::mutex> lock(map_mutex_); | |||
model_map_.clear(); | |||
model_aicpu_kernel_.clear(); | |||
cust_aicpu_so_.clear(); | |||
GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | |||
} | |||
@@ -400,6 +399,7 @@ Status ModelManager::Unload(uint32_t model_id) { | |||
} | |||
std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | |||
exception_infos_.clear(); | |||
cust_aicpu_so_.clear(); | |||
return SUCCESS; | |||
} | |||
@@ -328,15 +328,14 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); | |||
return v_input_data_addr; | |||
} | |||
for (size_t i = 0; i < inputs_size; ++i) { | |||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||
if (tensor_desc == nullptr) { | |||
GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { | |||
// TBE: add weights address to input | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||
if (tensor_desc == nullptr) { | |||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||
continue; | |||
} | |||
int64_t tensor_size = 0; | |||
GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); | |||
if (tensor_size) { | |||
@@ -89,16 +89,13 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||
nodes.push(node); | |||
static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; | |||
bool merge_flag = false; | |||
bool exit_flag = false; | |||
bool net_output_flag = false; | |||
while (!nodes.empty()) { | |||
NodePtr cur_node = nodes.top(); | |||
nodes.pop(); | |||
if (visited.count(cur_node) > 0) { | |||
continue; | |||
} | |||
if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) { | |||
if (AttachFlag(cur_node, stream_label) != SUCCESS) { | |||
GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
@@ -122,12 +119,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||
GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); | |||
} | |||
bool attach_flag = (merge_flag || exit_flag) && net_output_flag; | |||
if (attach_flag) { | |||
GELOGI("No need to keep on attaching label."); | |||
return SUCCESS; | |||
} | |||
for (const NodePtr &tmp_node : branch_nodes) { | |||
GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); | |||
GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); | |||
@@ -140,13 +131,9 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||
/// @brief attach flag | |||
/// @param [in] node | |||
/// @param [out] stream_label | |||
/// @param [out] merge_flag | |||
/// @param [out] exit_flag | |||
/// @param [out] net_output_flag | |||
/// @return Status | |||
/// | |||
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, | |||
bool &exit_flag, bool &net_output_flag) { | |||
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label) { | |||
const std::string &type = node->GetType(); | |||
if (type == STREAMSWITCH) { | |||
if (node->GetInDataNodes().empty()) { | |||
@@ -164,12 +151,8 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea | |||
} else if (type == STREAMMERGE) { | |||
stream_label = node->GetName(); | |||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | |||
merge_flag = true; | |||
} else if ((type == EXIT) || (type == REFEXIT)) { | |||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | |||
exit_flag = true; | |||
} else if (type == NETOUTPUT) { | |||
net_output_flag = true; | |||
} | |||
return SUCCESS; | |||
@@ -50,13 +50,9 @@ class AttachStreamLabelPass : public GraphPass { | |||
/// @brief attach flag | |||
/// @param [in] node | |||
/// @param [out] stream_label | |||
/// @param [out] merge_flag | |||
/// @param [out] exit_flag | |||
/// @param [out] net_output_flag | |||
/// @return Status | |||
/// | |||
static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag, | |||
bool &net_output_flag); | |||
static Status AttachFlag(const NodePtr &node, std::string &stream_label); | |||
/// | |||
/// @brief Update stream_label for loop_branch | |||
@@ -20,13 +20,14 @@ | |||
#include "framework/common/debug/log.h" | |||
#include "graph/utils/graph_utils.h" | |||
namespace { | |||
const size_t kOutNodesNum = 1; | |||
} | |||
namespace ge { | |||
Status EnterPass::Run(NodePtr &node) { | |||
GELOGD("EnterPass running"); | |||
if (node == nullptr) { | |||
GELOGE(PARAM_INVALID, "param [node] must not be null."); | |||
return PARAM_INVALID; | |||
} | |||
GE_CHECK_NOTNULL(node); | |||
if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) { | |||
return SUCCESS; | |||
@@ -38,18 +39,17 @@ Status EnterPass::Run(NodePtr &node) { | |||
return PARAM_INVALID; | |||
} | |||
NodePtr in_node = node->GetInDataNodes().at(0); | |||
if (in_node == nullptr) { | |||
GELOGE(PARAM_INVALID, "param [in_node] must not be null"); | |||
return PARAM_INVALID; | |||
} | |||
GE_CHECK_NOTNULL(in_node); | |||
if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) { | |||
return SUCCESS; | |||
} | |||
bool need_remove_flag = | |||
in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty() && node->GetOutDataNodes().empty(); | |||
if (need_remove_flag) { | |||
bool need_remove_flag = in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty(); | |||
if (!need_remove_flag) { | |||
return SUCCESS; | |||
} | |||
if (node->GetOutDataNodes().empty()) { | |||
for (auto &out_ctrl_node : node->GetOutControlNodes()) { | |||
if (out_ctrl_node == nullptr) { | |||
continue; | |||
@@ -60,9 +60,47 @@ Status EnterPass::Run(NodePtr &node) { | |||
return FAILED; | |||
} | |||
} | |||
} else { | |||
if (OptimizeEnter(node, in_node) != SUCCESS) { | |||
GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
GELOGD("EnterPass success"); | |||
return SUCCESS; | |||
} | |||
Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { | |||
auto out_nodes_of_in_node = in_node->GetOutAllNodes(); | |||
if (out_nodes_of_in_node.size() != kOutNodesNum) { | |||
return SUCCESS; | |||
} | |||
if (!node->GetOutControlNodes().empty()) { | |||
return SUCCESS; | |||
} | |||
for (const auto &out_node : node->GetOutDataNodes()) { | |||
GE_CHECK_NOTNULL(out_node); | |||
if (out_node->GetType() == MERGE) { | |||
return SUCCESS; | |||
} | |||
} | |||
GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); | |||
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); | |||
auto out_data_anchor = node->GetOutDataAnchor(0); | |||
GE_CHECK_NOTNULL(out_data_anchor); | |||
for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||
GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); | |||
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); | |||
} | |||
auto graph = node->GetOwnerComputeGraph(); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node)) | |||
AddRePassNodesWithInOut(in_node); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -23,6 +23,9 @@ namespace ge { | |||
class EnterPass : public BaseNodePass { | |||
public: | |||
Status Run(NodePtr &node) override; | |||
private: | |||
Status OptimizeEnter(NodePtr &node, NodePtr &in_node); | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_PASSES_ENTER_PASS_H_ |
@@ -41,7 +41,6 @@ | |||
#include "inc/pass_manager.h" | |||
#include "graph/common/local_context.h" | |||
using std::map; | |||
using std::set; | |||
using std::string; | |||
using std::vector; | |||
@@ -266,24 +265,27 @@ Status MultiBatchGraphCopyer::Init() { | |||
} | |||
Status MultiBatchGraphCopyer::LabelStatus() { | |||
map<string, vector<NodePtr>> frame_enters; | |||
InitStatus(frame_enters); | |||
for (const auto &data : origin_data_nodes_) { | |||
auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | |||
if (!IsAllDimsPositive(data_shape.GetDims())) { | |||
origin_nodes_status_[data.get()] = kNodeInBatchBranch; | |||
} | |||
} | |||
bool changed = true; | |||
// If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch | |||
while (changed) { | |||
changed = false; | |||
for (const auto &node : origin_all_nodes_) { | |||
auto iter = origin_nodes_status_.find(node.get()); | |||
if (iter != origin_nodes_status_.end()) { | |||
continue; | |||
} | |||
for (auto &in_node : node->GetInAllNodes()) { | |||
bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && | |||
origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; | |||
if (is_in_batch) { | |||
if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || | |||
origin_nodes_status_[node.get()] != kNodeInBatchBranch) { | |||
origin_nodes_status_[node.get()] = kNodeInBatchBranch; | |||
ResetEnterStatus(frame_enters, node); | |||
changed = true; | |||
} | |||
origin_nodes_status_[node.get()] = kNodeInBatchBranch; | |||
changed = true; | |||
break; | |||
} | |||
} | |||
@@ -314,45 +316,6 @@ Status MultiBatchGraphCopyer::LabelStatus() { | |||
return SUCCESS; | |||
} | |||
void MultiBatchGraphCopyer::InitStatus(map<string, vector<NodePtr>> &frame_enters) { | |||
for (const auto &node : origin_all_nodes_) { | |||
if (node->GetType() != ENTER && node->GetType() != REFENTER) { | |||
continue; | |||
} | |||
auto op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
continue; | |||
} | |||
string frame_name; | |||
if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { | |||
frame_enters[frame_name].emplace_back(node); | |||
} | |||
} | |||
for (const auto &data : origin_data_nodes_) { | |||
auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | |||
if (!IsAllDimsPositive(data_shape.GetDims())) { | |||
origin_nodes_status_[data.get()] = kNodeInBatchBranch; | |||
} | |||
} | |||
} | |||
void MultiBatchGraphCopyer::ResetEnterStatus(map<string, vector<NodePtr>> &frame_enters, const NodePtr &node) { | |||
if (node->GetType() != ENTER && node->GetType() != REFENTER) { | |||
return; | |||
} | |||
for (const auto &frame_enter : frame_enters) { | |||
auto &enters = frame_enter.second; | |||
if (std::find(enters.begin(), enters.end(), node) != enters.end()) { | |||
for (const auto &enter : enters) { | |||
origin_nodes_status_[enter.get()] = kNodeInBatchBranch; | |||
} | |||
break; | |||
} | |||
} | |||
} | |||
Status MultiBatchGraphCopyer::CreateNewNodes() { | |||
shape_data_ = InsertShapeDataNode(); | |||
if (shape_data_ == nullptr) { | |||
@@ -1200,7 +1163,7 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, s | |||
} | |||
} | |||
// Connect NetOutput directly: DTS2020070612498 | |||
// Connect NetOutput directly | |||
void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set<size_t> &dynamic_output_index, | |||
vector<string> &dynamic_output_dims) { | |||
GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | |||
@@ -68,8 +68,6 @@ class MultiBatchGraphCopyer { | |||
// label status for origin_all_nodes_ | |||
Status LabelStatus(); | |||
void InitStatus(std::map<string, vector<NodePtr>> &frame_enters); | |||
void ResetEnterStatus(std::map<string, vector<NodePtr>> &frame_enters, const NodePtr &node); | |||
// add nodes functions | |||
Status CreateNewNodes(); | |||
@@ -40,7 +40,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := atclib/libhost_cpu_engine | |||
LOCAL_CFLAGS += -Werror | |||
LOCAL_CFLAGS += -std=c++11 | |||
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE | |||
LOCAL_LDFLAGS := | |||
LOCAL_STATIC_LIBRARIES := | |||
@@ -165,8 +165,10 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||
} | |||
} | |||
// In train and infer, profiling is always needed. | |||
InitOptions(options); | |||
// In train and infer, profiling is always needed. | |||
InitProfiling(this->options_); | |||
auto model_manager = ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, | |||
@@ -176,21 +178,19 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||
// 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer | |||
// these two case with logical device id | |||
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | |||
InitProfiling(this->options_, true); | |||
status = InitSystemWithOptions(this->options_); | |||
} else { | |||
InitProfiling(this->options_); | |||
status = InitSystemWithoutOptions(); | |||
} | |||
return status; | |||
} | |||
void GELib::InitProfiling(Options &options, bool convert_2_phy_device_id) { | |||
void GELib::InitProfiling(Options &options) { | |||
GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); | |||
std::lock_guard<std::mutex> lock(status_mutex_); | |||
GetContext().Init(); | |||
// Profiling init | |||
if (ProfilingManager::Instance().Init(options, convert_2_phy_device_id) != SUCCESS) { | |||
if (ProfilingManager::Instance().Init(options) != SUCCESS) { | |||
GELOGW("Profiling init failed."); | |||
} | |||
} | |||
@@ -68,7 +68,7 @@ class GELib { | |||
// get incre build cache path | |||
const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; } | |||
void InitProfiling(Options &options, bool convert_2_phy_device_id = false); | |||
void InitProfiling(Options &options); | |||
void ShutDownProfiling(); | |||
Status InitSystemWithoutOptions(); | |||
@@ -522,7 +522,7 @@ void PrintOptionMap(std::map<std::string, std::string> &options, std::string tip | |||
for (auto iter = options.begin(); iter != options.end(); iter++) { | |||
std::string key = iter->first; | |||
std::string option_name = iter->second; | |||
GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||
GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||
} | |||
} | |||
@@ -96,6 +96,12 @@ static graphStatus CheckGlobalOptions(std::map<std::string, std::string> &global | |||
return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); | |||
global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; | |||
// set precision mode default value | |||
std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == global_options.end() | |||
? "force_fp16" | |||
: global_options[ge::ir_option::PRECISION_MODE]; | |||
global_options[ge::ir_option::PRECISION_MODE] = precision_mode; | |||
return GRAPH_SUCCESS; | |||
} | |||
@@ -175,25 +175,25 @@ Status OpsKernelManager::ParsePluginOptions(const map<string, string> &options, | |||
} else if (flag == 1) { | |||
enable_flag = true; | |||
} else { | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), | |||
iter->second.c_str()); | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | |||
plugin_name.c_str(), iter->second.c_str()); | |||
return GE_GRAPH_OPTIONS_INVALID; | |||
} | |||
} catch (std::invalid_argument &) { | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", | |||
iter->second.c_str()); | |||
return GE_GRAPH_OPTIONS_INVALID; | |||
} catch (std::out_of_range &) { | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", | |||
iter->second.c_str()); | |||
return GE_GRAPH_OPTIONS_INVALID; | |||
} catch (...) { | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), | |||
iter->second.c_str()); | |||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | |||
plugin_name.c_str(), iter->second.c_str()); | |||
return GE_GRAPH_OPTIONS_INVALID; | |||
} | |||
} else { | |||
GELOGI("Not find key %s, set to default value false.", plugin_name.c_str()); | |||
GELOGI("Not find option_key %s, set to default value false.", plugin_name.c_str()); | |||
enable_flag = false; | |||
} | |||
@@ -618,11 +618,16 @@ Status ParseOutNodes(const string &out_nodes) { | |||
if (!out_nodes.empty()) { | |||
domi::GetContext().out_nodes_map.clear(); | |||
domi::GetContext().user_out_nodes.clear(); | |||
domi::GetContext().user_out_nodes_top_vec.clear(); | |||
vector<string> nodes_v = StringUtils::Split(out_nodes, ';'); | |||
for (const string &node : nodes_v) { | |||
vector<string> key_value_v = StringUtils::Split(node, ':'); | |||
if (key_value_v.size() != 2) { // The size must be 2. | |||
if (key_value_v.size() == 1 && domi::GetContext().type == domi::CAFFE) { | |||
domi::GetContext().user_out_nodes_top_vec.push_back(node); | |||
continue; | |||
} | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E10001", {"parameter", "value", "reason"}, | |||
{"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); | |||
@@ -632,7 +637,13 @@ Status ParseOutNodes(const string &out_nodes) { | |||
node.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); | |||
if (!domi::GetContext().user_out_nodes_top_vec.empty()) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||
{"--out_nodes", out_nodes, "is not all index or top_name"}); | |||
GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s", | |||
out_nodes.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
// stoi: The method may throw an exception: invalid_argument/out_of_range | |||
if (!CheckDigitStr(key_value_v[1])) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||
@@ -640,7 +651,10 @@ Status ParseOutNodes(const string &out_nodes) { | |||
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); | |||
return PARAM_INVALID; | |||
} | |||
auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); | |||
int32_t index = stoi(StringUtils::Trim(key_value_v[1])); | |||
GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index); | |||
if (iter != domi::GetContext().out_nodes_map.end()) { | |||
iter->second.emplace_back(index); | |||
} else { | |||
@@ -279,7 +279,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, con | |||
if (op_task_->GetOpTaskType() == OP_TASK_TBE) { | |||
return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); | |||
} else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { | |||
return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_); | |||
return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); | |||
} else { | |||
GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", | |||
op_task_->GetOpTaskType()); | |||
@@ -75,8 +75,11 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||
// Conv2D IN[DT_FLOAT16 NC1HWC0[256, 128, 7, 7, 16],DT_FLOAT16 FRACTAL_Z[128, 32, 16, 16]] | |||
// OUT[DT_FLOAT16 NC1HWC0[256, 32, 7, 7, 16]] | |||
ss << op_type << " IN["; | |||
for (uint32_t idx = 0; idx < op_desc->GetInputsSize(); idx++) { | |||
for (uint32_t idx = 0; idx < op_desc->GetAllInputsSize(); idx++) { | |||
const GeTensorDescPtr &input = op_desc->MutableInputDesc(idx); | |||
if (input == nullptr) { | |||
continue; | |||
} | |||
ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | |||
ss << TypeUtils::FormatToSerialString(input->GetFormat()); | |||
ss << VectorToString(input->GetShape().GetDims()); | |||
@@ -34,6 +34,11 @@ constexpr int kLaunchRetryTimes = 1000; | |||
constexpr int kSleepTime = 10; | |||
constexpr uint64_t kReleaseFlag = 1; | |||
constexpr int kCopyNum = 2; | |||
void FreeHbm(void *var) { | |||
if (var) { | |||
(void)rtFree(var); | |||
} | |||
} | |||
} // namespace | |||
Status OpTask::OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream) { | |||
@@ -336,49 +341,23 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor | |||
} | |||
AiCpuTask::~AiCpuTask() { | |||
if (args_ != nullptr) { | |||
(void)rtFree(args_); | |||
} | |||
if (io_addr_ != nullptr) { | |||
(void)rtFree(io_addr_); | |||
} | |||
if (dynamic_flag_ && workspace_addr_ != nullptr) { | |||
(void)rtFree(workspace_addr_); | |||
} | |||
if (copy_workspace_buf_ != nullptr) { | |||
(void)rtFree(copy_workspace_buf_); | |||
} | |||
if (copy_ioaddr_dev_ != nullptr) { | |||
(void)rtFree(copy_ioaddr_dev_); | |||
} | |||
if (copy_input_release_flag_dev_ != nullptr) { | |||
(void)rtFree(copy_input_release_flag_dev_); | |||
} | |||
if (copy_input_data_size_dev_ != nullptr) { | |||
(void)rtFree(copy_input_data_size_dev_); | |||
} | |||
if (copy_input_src_dev_ != nullptr) { | |||
(void)rtFree(copy_input_src_dev_); | |||
} | |||
if (copy_input_dst_dev_ != nullptr) { | |||
(void)rtFree(copy_input_dst_dev_); | |||
} | |||
if (copy_task_args_buf_ != nullptr) { | |||
(void)rtFree(copy_task_args_buf_); | |||
} | |||
FreeHbm(args_); | |||
FreeHbm(io_addr_); | |||
if (dynamic_flag_) { | |||
FreeHbm(workspace_addr_); | |||
} | |||
FreeHbm(copy_workspace_buf_); | |||
FreeHbm(copy_ioaddr_dev_); | |||
FreeHbm(copy_input_release_flag_dev_); | |||
FreeHbm(copy_input_data_size_dev_); | |||
FreeHbm(copy_input_src_dev_); | |||
FreeHbm(copy_input_dst_dev_); | |||
FreeHbm(copy_task_args_buf_); | |||
for (auto summary : output_summary_) { | |||
if (summary != nullptr) { | |||
(void)rtFree(summary); | |||
} | |||
FreeHbm(summary); | |||
} | |||
for (auto out_shape : out_shape_hbm_) { | |||
FreeHbm(out_shape); | |||
} | |||
} | |||
@@ -405,7 +384,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { | |||
return SUCCESS; | |||
} | |||
Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm) { | |||
Status AiCpuTask::PrepareCopyInputs(vector<DataBuffer> &outputs) { | |||
std::vector<uint64_t> copy_input_release_flag; | |||
std::vector<uint64_t> copy_input_data_size; | |||
std::vector<uint64_t> copy_input_src; | |||
@@ -417,11 +396,15 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v | |||
summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); | |||
auto output = outputs[i]; | |||
copy_input_release_flag.emplace_back(kReleaseFlag); | |||
copy_input_data_size.emplace_back(summary.raw_data_size); | |||
if (summary.raw_data_size > 0) { | |||
copy_input_data_size.emplace_back(output.length); | |||
} else { | |||
copy_input_data_size.emplace_back(summary.raw_data_size); | |||
} | |||
copy_input_src.emplace_back(summary.raw_data_ptr); | |||
copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output)); | |||
copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output.data)); | |||
const auto &shape_buffer = out_shape_hbm[i]; | |||
const auto &shape_buffer = out_shape_hbm_[i]; | |||
copy_input_release_flag.emplace_back(kReleaseFlag); | |||
copy_input_data_size.emplace_back(summary.shape_data_size); | |||
copy_input_src.emplace_back(summary.shape_data_ptr); | |||
@@ -441,7 +424,7 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v | |||
return SUCCESS; | |||
} | |||
Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm) { | |||
Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { | |||
for (size_t i = 0; i < num_outputs_; ++i) { | |||
auto &result_summary = output_summary_host_[i]; | |||
@@ -449,36 +432,39 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_sha | |||
sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST)); | |||
auto shape_data_size = result_summary.shape_data_size; | |||
void *shape_buffer = nullptr; | |||
GE_MAKE_GUARD_RTMEM(shape_buffer); | |||
GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); | |||
out_shape_hbm.emplace_back(shape_buffer); | |||
if (shape_data_size > 0) { | |||
GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); | |||
} | |||
out_shape_hbm_.emplace_back(shape_buffer); | |||
} | |||
return SUCCESS; | |||
} | |||
Status AiCpuTask::CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream) { | |||
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm)); | |||
Status AiCpuTask::CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) { | |||
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); | |||
GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); | |||
GE_CHK_RT_RET(rtStreamSynchronize(stream)); | |||
return SUCCESS; | |||
} | |||
Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm) { | |||
Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||
for (size_t i = 0; i < num_outputs_; ++i) { | |||
const auto &result_summary = output_summary_host_[i]; | |||
std::vector<int64_t> shape_dims; | |||
const auto &shape_hbm = out_shape_hbm[i]; | |||
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | |||
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]()); | |||
GE_CHECK_NOTNULL(shape_addr); | |||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, result_summary.shape_data_size, | |||
RT_MEMCPY_DEVICE_TO_HOST)); | |||
for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | |||
shape_dims.emplace_back(shape_addr[dim_idx]); | |||
GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); | |||
if (result_summary.shape_data_size > 0) { | |||
const auto &shape_hbm = out_shape_hbm_[i]; | |||
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | |||
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]()); | |||
GE_CHECK_NOTNULL(shape_addr); | |||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, | |||
result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||
for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | |||
shape_dims.emplace_back(shape_addr[dim_idx]); | |||
GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); | |||
} | |||
} | |||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), | |||
@@ -487,7 +473,7 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, cons | |||
return SUCCESS; | |||
} | |||
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs, | |||
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs, | |||
rtStream_t stream) { | |||
if (num_outputs_ == 0) { | |||
GELOGI("Output num is 0, there is no need to update the output and size."); | |||
@@ -496,13 +482,20 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output | |||
GELOGI("Update shape and data by result summary begin."); | |||
std::vector<void *> out_shape_hbm; | |||
GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm), | |||
"Read ResultSummary and update output shape failed."); | |||
for (auto out_shape : out_shape_hbm_) { | |||
FreeHbm(out_shape); | |||
} | |||
out_shape_hbm_.clear(); | |||
GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), "Read ResultSummary and update output shape failed."); | |||
GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), "Copy data to output failed."); | |||
GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), "Copy data to output failed."); | |||
GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), "Update shape by hbm buffer failed."); | |||
GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), "Update shape by hbm buffer failed."); | |||
for (auto out_shape : out_shape_hbm_) { | |||
FreeHbm(out_shape); | |||
} | |||
out_shape_hbm_.clear(); | |||
GELOGI("Update shape and data by result summary end."); | |||
return SUCCESS; | |||
@@ -603,10 +596,18 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { | |||
return SUCCESS; | |||
} | |||
Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, | |||
rtStream_t stream) { | |||
Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, | |||
const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &output_buffers, rtStream_t stream) { | |||
GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); | |||
std::vector<void *> inputs; | |||
std::vector<void *> outputs; | |||
for (auto &buffer : input_buffers) { | |||
inputs.emplace_back(buffer.data); | |||
} | |||
for (auto &buffer : output_buffers) { | |||
outputs.emplace_back(buffer.data); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); | |||
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); | |||
GE_CHK_RT_RET(rtStreamSynchronize(stream)); | |||
@@ -614,7 +615,7 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, cons | |||
if (unknown_type_ == DEPEND_SHAPE_RANGE) { | |||
GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); | |||
} else if (unknown_type_ == DEPEND_COMPUTE) { | |||
GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, outputs, stream)); | |||
GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, output_buffers, stream)); | |||
} | |||
return SUCCESS; | |||
@@ -658,9 +659,9 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { | |||
return SUCCESS; | |||
} | |||
Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, | |||
rtStream_t stream) { | |||
Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, | |||
const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &output_buffers, rtStream_t stream) { | |||
GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, | |||
"AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_); | |||
@@ -669,11 +670,11 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, co | |||
size_t arg_index = 0; | |||
auto *task_io_addr = reinterpret_cast<uintptr_t *>(io_addr_); | |||
GE_CHECK_NOTNULL(task_io_addr); | |||
for (auto &input : inputs) { | |||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input); | |||
for (auto &input : input_buffers) { | |||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input.data); | |||
} | |||
for (auto &output : outputs) { | |||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output); | |||
for (auto &output : output_buffers) { | |||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output.data); | |||
} | |||
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); | |||
@@ -57,8 +57,9 @@ class OpTask { | |||
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | |||
const OpDescPtr &GetOpdesc() const { return op_desc_; } | |||
Status OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream); | |||
virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) { | |||
virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers, | |||
rtStream_t stream) { | |||
return UNSUPPORTED; | |||
} | |||
@@ -138,8 +139,9 @@ class AiCpuTask : public AiCpuBaseTask { | |||
OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; } | |||
const void *GetIOAddr() const override; | |||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override; | |||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers, | |||
rtStream_t stream) override; | |||
Status SetMemCopyTask(const domi::KernelExDef &kernel_def); | |||
private: | |||
@@ -147,14 +149,14 @@ class AiCpuTask : public AiCpuBaseTask { | |||
// for copy task. | |||
Status InitForSummaryAndCopy(); | |||
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs, | |||
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs, | |||
rtStream_t stream); | |||
Status ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm); | |||
Status ReadResultSummaryAndPrepareMemory(); | |||
Status CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream); | |||
Status PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm); | |||
Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream); | |||
Status PrepareCopyInputs(vector<DataBuffer> &outputs); | |||
Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm); | |||
Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | |||
friend class AiCpuTaskBuilder; | |||
void *workspace_addr_ = nullptr; | |||
@@ -178,6 +180,8 @@ class AiCpuTask : public AiCpuBaseTask { | |||
void *copy_input_data_size_dev_; | |||
void *copy_input_src_dev_; | |||
void *copy_input_dst_dev_; | |||
vector<void *> out_shape_hbm_; | |||
}; | |||
class AiCpuCCTask : public AiCpuBaseTask { | |||
@@ -197,8 +201,9 @@ class AiCpuCCTask : public AiCpuBaseTask { | |||
void SetIoAddr(void *io_addr); | |||
size_t GetArgSize() const; | |||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override; | |||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers, | |||
std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers, | |||
rtStream_t stream) override; | |||
private: | |||
friend class AiCpuCCTaskBuilder; | |||
@@ -25,16 +25,21 @@ | |||
namespace ge { | |||
/** | |||
*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC), | |||
image normalization (by subtracting the mean value or multiplying a factor), image cropping | |||
(by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n | |||
*@brief Performs AI pre-processing (AIPP) on images including color space | |||
conversion (CSC), | |||
image normalization (by subtracting the mean value or multiplying a factor), | |||
image cropping | |||
(by specifying the crop start and cropping the image to the size required by | |||
the neural network), and much more. \n | |||
*@par Inputs: | |||
*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. | |||
*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the | |||
data layer. | |||
*@li params: Dynamic AIPP configuration parameters of type uint8. \n | |||
*@par Attributes: | |||
*aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n | |||
*aipp_config_path: A required string, specifying the path of the AIPP | |||
configuration file. \n | |||
*@par Outputs: | |||
*features: The AIPP-processed output tensor of type float16 or uint8. | |||
@@ -28,9 +28,10 @@ namespace ge { | |||
*@par Inputs: | |||
*Dynamic inputs, including: | |||
* @li x: A list of Tensor objects, each with same shape and type. The supported types are: | |||
* @li x: A list of Tensor objects, each with same shape and type. The supported | |||
types are: | |||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | |||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n | |||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same shape and type as the elements of "x". \n | |||
@@ -121,7 +122,8 @@ REG_OP(MinimumGrad) | |||
*@par Inputs: | |||
*One input: | |||
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | |||
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, | |||
int32, uint32, uint8, | |||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n | |||
*@par Attributes: | |||
@@ -385,7 +387,8 @@ REG_OP(Sign) | |||
*@par Inputs: | |||
*Two inputs, including: \n | |||
*@li x1: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64,complex128 | |||
*@li x1: A Tensor. Must be one of the following types: float16, float32, | |||
float64, int32, int64, complex64,complex128 | |||
*@li x2: A Tensor. Has the same type as "x1". \n | |||
*@par Outputs: | |||
@@ -484,12 +487,16 @@ REG_OP(Equal) | |||
*@par Inputs: | |||
*One input:\n | |||
*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, double, | |||
complex64, complex128. \n | |||
*@par Attributes: | |||
*@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0". | |||
*@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0". | |||
*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". \n | |||
*@li base: An optional attribute of type float32, specifying the base gamma. | |||
Defaults to "-1.0". | |||
*@li scale: An optional attribute of type float32, specifying the scale alpha. | |||
Defaults to "1.0". | |||
*@li shift: An optional attribute of type float32, specifying the shift beta. | |||
Defaults to "0.0". \n | |||
*@par Outputs: | |||
*y: A Tensor of the same type as "x". \n | |||
@@ -510,7 +517,8 @@ REG_OP(Exp) | |||
*@par Inputs: | |||
*One input: | |||
*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, double, | |||
complex64, complex128. \n | |||
*@par Outputs: | |||
*y: A Tensor of the same type as "x". \n | |||
@@ -527,7 +535,9 @@ REG_OP(Expm1) | |||
*@brief: Computes the reciprocal of "x". \n | |||
*@par Inputs:\n | |||
*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, | |||
int32, int64, double, | |||
complex64, complex128. \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x". \n | |||
@@ -749,7 +759,8 @@ REG_OP(Xlogy) | |||
*@par Inputs: | |||
*One input: \n | |||
*x: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128 | |||
*x: A Tensor. Must be one of the following types: float16, float32, float64, | |||
int32, int64, complex64, complex128 | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x". \n | |||
@@ -790,7 +801,8 @@ REG_OP(Rsqrt) | |||
* | |||
*@par Inputs: | |||
* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128. | |||
* x: A tensor. Must be one of the following types: float16, float32, float64, | |||
int32, int64, complex64, complex128. | |||
* | |||
*@par Outputs: | |||
* y: A tensor. Has the same type as "x". | |||
@@ -811,7 +823,8 @@ REG_OP(Asin) | |||
* | |||
*@par Inputs: | |||
*@li y: A tensor of type float16, float32, float64, int32, int64, complex64, complex128. | |||
*@li y: A tensor of type float16, float32, float64, | |||
int32, int64, complex64, complex128. | |||
*@li dy: A tensor of the same type as "y". | |||
* | |||
*@attention Constraints: | |||
@@ -838,7 +851,8 @@ REG_OP(AsinGrad) | |||
* | |||
*@par Inputs: | |||
* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128. | |||
* x: A tensor. Must be one of the following types: float16, float32, float64, | |||
int32, int64, complex64, complex128. | |||
* | |||
*@par Outputs: | |||
* y: A tensor. Has the same type as "x". | |||
@@ -883,7 +897,8 @@ REG_OP(AcosGrad) | |||
* | |||
*@par Inputs: | |||
* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128. | |||
* x: A tensor. Must be one of the following types: float16, float32, float64, | |||
complex64, complex128. | |||
* | |||
*@attention Constraints: | |||
* x Given an input tensor, the function computes inverse hyperbolic cosine of every element.\n | |||
@@ -1160,7 +1175,8 @@ REG_OP(FusedMulAdd) | |||
* | |||
*@par Inputs: | |||
*@li x1: A tensor. Must be one of the following types: float16, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128. | |||
*@li x1: A tensor. Must be one of the following types: float16, float32, float64, | |||
uint8, int8, int16, int32, int64, complex64, complex128. | |||
*@li x2: A tensor of the same type as "x1". | |||
* | |||
*@attention Constraints: | |||
@@ -1189,7 +1205,8 @@ REG_OP(AddV2) | |||
*@brief Updates "ref" by adding "value" to it. \n | |||
*@par Inputs: | |||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. | |||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, | |||
int16, int32, int64, uint8, uint16, uint32, uint64. | |||
*@li value: A Tensor of the same type as "ref". \n | |||
*@par Attributes: | |||
@@ -1218,12 +1235,14 @@ REG_OP(AssignAdd) | |||
*@brief Updates "ref" by assigning "value" to it. \n | |||
*@par Inputs: | |||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. | |||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, | |||
int32, int64, uint8, uint16, uint32, uint64. | |||
*@li value: A Tensor of the same type as "ref". \n | |||
*@par Attributes: | |||
*@li validate_shape: An optional bool. Defaults to "true". | |||
If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to. | |||
If "true", the operation will validate that the shape of "value" | |||
matches the shape of the Tensor being assigned to. | |||
* If "false", "ref" will take on the shape of "value". | |||
* This attribute is reserved. | |||
*@li use_locking: An optional bool. Defaults to True. | |||
@@ -1252,7 +1271,8 @@ REG_OP(Assign) | |||
* | |||
*@par Inputs: | |||
*@li var: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64 | |||
*@li var: A tensor. Must be one of the following types: float32, float64, | |||
int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64 | |||
*@li value: A tensor of the same type as "var". | |||
* | |||
*@par Attributes: | |||
@@ -1644,7 +1664,9 @@ REG_OP(Atan2) | |||
* | |||
*@par Inputs: | |||
*@li x1: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 | |||
*@li x1: A tensor. Must be one of the following types: float32, float64, int32, | |||
uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, | |||
float16, uint32, uint64 | |||
*@li x2: A tensor of the same type as "x1". | |||
* | |||
*@par Attributes: | |||
@@ -1666,16 +1688,18 @@ REG_OP(ApproximateEqual) | |||
/** | |||
*@brief Returns the element-wise sum of a list of tensors.\n | |||
* AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs | |||
to be ready before beginning to sum.\n This can save memory if inputs are ready at different times, | |||
since minimum temporary storage is proportional to the output size rather than the inputs size. | |||
Returns a Tensor of same shape and type as the elements of inputs. \n | |||
* AccumulateNV2 performs the same operation as AddN, but does not wait for all | |||
of its inputs to be ready before beginning to sum.\n This can save memory if | |||
inputs are ready at different times, \n since minimum temporary storage is | |||
proportional to the output size rather than the inputs size.\n Returns a Tensor | |||
of same shape and type as the elements of inputs. \n | |||
* | |||
*@par Inputs: | |||
*Dynamic inputs, including: | |||
* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, | |||
qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. It's a dynamic input. \n | |||
* x: A tensor. Must be one of the following types: float32, float64, int32, | |||
uint8, int16, int8, complex64, int64, \n qint8, quint8, qint32, uint16, | |||
complex128, float16, uint32, uint64. | |||
* | |||
*@par Outputs: | |||
* y: A tensor. Has the same type as "x". | |||
@@ -1731,7 +1755,8 @@ REG_OP(FakeQuantWithMinMaxArgs) | |||
*@par Inputs: | |||
*Two inputs, including: \n | |||
*@li gradients: A Tensor of type float32. Backpropagated gradients above the FakeQuantWithMinMaxArgs operation. | |||
*@li gradients: A Tensor of type float32. Backpropagated gradients | |||
above the FakeQuantWithMinMaxArgs operation. | |||
*@li x: A Tensor of type float32. Has the same type and format as "gradients".\n | |||
* This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n | |||
@@ -2210,9 +2235,13 @@ REG_OP(BiasAdd) | |||
*@par Inputs: | |||
*Two inputs, including: | |||
*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64. | |||
*@li x: A Tensor. Must be one of the following types: float32, float64, int32, | |||
uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, | |||
complex128, float16, uint32, uint64. | |||
*format is ND. | |||
*@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across. | |||
*@li dimension: A Tensor. Must be one of the following types: int32, int64. | |||
Must be in the range [-rank(input x), rank(input x)]. Describes which dimension | |||
of the input Tensor to reduce across. | |||
* The format is ND. | |||
*@par Attributes: | |||
*dtype: The output type, either "int32" or "int64". Defaults to "int64". \n | |||
@@ -2286,6 +2315,7 @@ REG_OP(ArgMaxV2) | |||
.ATTR(dtype, Type, DT_INT64) | |||
.OP_END_FACTORY_REG(ArgMaxV2) | |||
/** | |||
*@brief Returns the index with the largest value across axes of a tensor. \n | |||
@@ -2298,15 +2328,16 @@ REG_OP(ArgMaxV2) | |||
*@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n | |||
*@par Outputs: | |||
*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". \n | |||
*y: A multi-dimensional Tensor of type int32, specifying the index with the | |||
largest value. The dimension is one less than that of "x". \n | |||
*@attention Constraints: | |||
*@li x: If there are multiple maximum values, the index of the first maximum value is used. | |||
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n | |||
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the | |||
dimension length of "x". \n | |||
*@par Third-party framework compatibility | |||
* Compatible with TensorFlow operator ArgMax. | |||
* | |||
* @par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
@@ -2929,9 +2960,13 @@ REG_OP(FusedMulAddN) | |||
*@li bias: An ND tensor of type float16 or float32. \n | |||
*@par Attributes: | |||
*@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". | |||
*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1". | |||
*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n | |||
*@li axis: An optional int32 used to compute the shape of bias input from the | |||
online bottoms. Defaults to "1". | |||
*@li num_axes: An optional int32 used to compute the shape of bias input from a | |||
Caffe model trained offline. Defaults to "1". | |||
*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe | |||
model trained offline. If "false", bias is input from online bottoms. Defaults | |||
to "true". \n | |||
*@par Outputs: | |||
*y: An ND tensor of type float16 or float32. \n | |||
@@ -2939,13 +2974,25 @@ REG_OP(FusedMulAddN) | |||
*@attention Constraints:\n | |||
* Assume that the shape length of "x" is "n" and that of "bias" is "m". | |||
*@li "axis" is within the range [-n, n-1]. num_axes >= -1. | |||
*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n | |||
* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis). | |||
*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1. | |||
*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n | |||
* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). | |||
*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n | |||
* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). | |||
*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis | |||
of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < | |||
n-axis).\n | |||
* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must | |||
have the same size (0 <= i < -axis). | |||
*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with | |||
shape length 1 and dimension size 1. | |||
*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + | |||
num_axes" must be less than or equal to "n" and the ith axis of "bias" and the | |||
(i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n | |||
* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and | |||
the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same | |||
size (0 <= i < num_axes). | |||
*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis | |||
+ m" must be less than or equal to "n" and the ith axis of "bias" and the (i | |||
+"axis")th axis of "x" must have the same size (0 <= i < m).\n | |||
* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith | |||
axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= | |||
i < m). | |||
*@par Third-party framework compatibility | |||
* Compatible with the Caffe operator Bias. | |||
*/ | |||
@@ -3023,10 +3070,12 @@ REG_OP(FusedMulAddNL2loss) | |||
*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n | |||
*@par Attributes: | |||
*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n | |||
*@li threshold: A required float32. Defaults to "0.0". "x" is compared with | |||
"threshold", outputs "1" for inputs above threshold; "0" otherwise. \n | |||
*@par Outputs: | |||
*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. | |||
*@li y: A Tensor with any format. Has the same type as the input. Must be one | |||
of the following types: float16, float32. | |||
*@par Third-party framework compatibility | |||
* Compatible with the Caffe operator Threshold. | |||
*/ | |||
@@ -3044,11 +3093,16 @@ REG_OP(FusedMulAddNL2loss) | |||
*@li x: A tensor. Must be one of the following types: float16, float32. \n | |||
*@par Attributes: | |||
*@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000 | |||
*@li out_max_val: An optional bool. Whether to output the maximum value. If it is True, the maximum value and index are output, otherwise only the index is output. | |||
*@li axis: An optional int. Specify the axis to be cut at the input tensor. If | |||
this parameter is not provided, find the topk for each batch. Defaults to 10000 | |||
*@li out_max_val: An optional bool. Whether to output the maximum value. If it | |||
is True, the maximum value and index are output, otherwise only the index is | |||
output. | |||
* Defaults to False | |||
*@li topk: An optional int. It means the number of top tok in each axis (the value is greater than or equal to 1), and the value range must be in [1,x.shape(axis)]. | |||
* Defaults to 1 | |||
*@li topk: An optional int. It means the number of top tok in each axis (the | |||
value is greater than or equal to 1), and the value range must be in [1,x.shape | |||
(axis)]. | |||
* Defaults to 1 \n | |||
*@par Outputs: | |||
*@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output. | |||
@@ -3168,7 +3222,8 @@ REG_OP(Axpy) | |||
.OP_END_FACTORY_REG(Axpy) | |||
/** | |||
*@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. \n | |||
*@brief Creates a criterion that measures the loss given input tensors x1 x2 | |||
and a Tensor label y with values 1 or -1. \n | |||
*@par Inputs: | |||
*@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32. | |||
@@ -36,7 +36,7 @@ namespace ge { | |||
* if "cond" is a numerical scalar, non-zero means True and zero means False; | |||
* if "cond" is a string scalar, non-empty means True and empty means False; | |||
* if "cond" is not a scalar, non-empty means True and empty means False. | |||
*@li input: The input tensors . It's a dynamic input. \n | |||
*@li input: The input tensors . \n | |||
*@par Graphs: | |||
*@li then_branch: A subgraph takes 'input' and returns a list of tensors, | |||
@@ -69,7 +69,7 @@ REG_OP(_If) | |||
* if "cond" is a numerical scalar, non-zero means True and zero means False; | |||
* if "cond" is a string scalar, non-empty means True and empty means False; | |||
* if "cond" is not a scalar, non-empty means True and empty means False. | |||
*@li input: The input tensors . It's a dynamic input. \n | |||
*@li input: The input tensors . \n | |||
*@par Graphs: | |||
*@li then_branch: A subgraph takes 'input' and returns a list of tensors, | |||
@@ -102,7 +102,7 @@ REG_OP(StatelessIf) | |||
* if "cond" is a numerical scalar, non-zero means True and zero means False; | |||
* if "cond" is a string scalar, non-empty means True and empty means False; | |||
* if "cond" is not a scalar, non-empty means True and empty means False. | |||
*@li input: The input tensors . It's a dynamic input. \n | |||
*@li input: The input tensors . \n | |||
*@par Graphs: | |||
*@li then_branch: A subgraph takes 'input' and returns a list of tensors, | |||
@@ -129,7 +129,7 @@ REG_OP(If) | |||
*@par Inputs: | |||
*@li branch_index: A int32 scalar which determines the selected subgraph. | |||
*@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n | |||
*@li input: The input tensors, which will be passed to the subgraph . \n | |||
*@par Graphs: | |||
*branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, | |||
@@ -152,7 +152,7 @@ REG_OP(Case) | |||
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | |||
*@par Inputs: | |||
*input: The input tensors . It's a dynamic input. \n | |||
*input: The input tensors . \n | |||
*@par Graphs: | |||
*@li cond: A subgraph takes 'input' and returns a tensor. | |||
@@ -183,7 +183,7 @@ REG_OP(_While) | |||
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | |||
*@par Inputs: | |||
*input: The input tensors . It's a dynamic input. \n | |||
*input: The input tensors . \n | |||
*@par Graphs: | |||
*@li cond: A subgraph takes 'input' and returns a tensor. | |||
@@ -215,7 +215,7 @@ REG_OP(While) | |||
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | |||
*@par Inputs: | |||
*input: The input tensors . It's a dynamic input. \n | |||
*input: The input tensors . \n | |||
*@par Graphs: | |||
*@li cond: A subgraph takes 'input' and returns a tensor. | |||
@@ -250,7 +250,7 @@ REG_OP(StatelessWhile) | |||
*@li start: A int32 scalar. The lower bound. | |||
*@li limit: A int32 scalar. The upper bound. | |||
*@li delta: A int32 scalar. The step size. | |||
*@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n | |||
*@li input: The input tensors, which will be passed to "body" . \n | |||
*@par Graphs: | |||
*body: A subgraph takes 'input' and returns a another list of tensors . \n | |||
@@ -274,7 +274,7 @@ REG_OP(For) | |||
*@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n | |||
*@par Inputs: | |||
*args: The input tensors, which will be passed to "f" . It's a dynamic input. \n | |||
*args: The input tensors, which will be passed to "f" . \n | |||
*@par Graphs: | |||
*f: A subgraph takes 'args' and returns a another list of tensors . \n | |||
@@ -303,7 +303,7 @@ REG_OP(PartitionedCall) | |||
*@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n | |||
*@par Inputs: | |||
*args: The input tensors, which will be passed to "f" . It's a dynamic input. \n | |||
*args: The input tensors, which will be passed to "f" . \n | |||
*@par Graphs: | |||
*f: A subgraph takes 'args' and returns a another list of tensors . \n | |||
@@ -160,8 +160,10 @@ REG_OP(CropAndResize) | |||
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n | |||
*@par Attributes: | |||
*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches are resized to this size. | |||
*@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable. | |||
*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches | |||
are resized to this size. | |||
*@li extrapolation_value: An optional float. Defaults to 0. Value used for | |||
extrapolation, when applicable. | |||
*@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n | |||
*@par Outputs: | |||
@@ -172,7 +174,6 @@ REG_OP(CropAndResize) | |||
*@par Third-party framework compatibility | |||
*Compatible with tensorflow CropAndResize operator. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead. | |||
*/ | |||
@@ -87,39 +87,58 @@ REG_OP(L2NormalizeGrad) | |||
*@par Inputs: | |||
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW | |||
for 4D or NC1HWC0 for 5D. | |||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format | |||
NHWC or NCHW. Must be 5D | |||
if input "x" is with format NC1HWC0. Specifies the scaling factor. | |||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||
if input "x" is with format NC1HWC0. Specifies the offset. | |||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format | |||
NHWC or NCHW. Must be 5D | |||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. | |||
Must be "None" if the | |||
operation is used for training. | |||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be | |||
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format | |||
NHWC or NCHW. Must be | |||
5D if input "x" is with format NC1HWC0. Specifies the variance used for | |||
inference. Must be "None" | |||
if the operation is used for training . \n | |||
*@par Attributes: | |||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". | |||
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | |||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | |||
*@li epsilon: An optional float32, specifying the small value added to variance | |||
to avoid dividing by zero. Defaults to "0.0001". | |||
*@li data_format: An optional string, specifying the format of "x". Defaults to | |||
"NHWC". | |||
*@li is_training: An optional bool, specifying if the operation is used for | |||
training or inference. Defaults to "True" . \n | |||
*@par Outputs: | |||
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | |||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", | |||
with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with | |||
format NHWC or NCHW. Must be 5D | |||
if input "x" is with format NC1HWC0. Specifies the mean of "x". | |||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with | |||
format NHWC or NCHW. | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | |||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input | |||
"x" is with format NHWC or NCHW. | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for | |||
gradient computation. Pass "None" to skip this output. | |||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input | |||
"x" is with format NHWC or NCHW. | |||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" | |||
for gradient computation. Pass "None" to skip this output . \n | |||
*@attention Constraints: | |||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | |||
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n | |||
*@li If the operation is used for inference and outputs "reserve_space_1" and | |||
"reserve_space_2" are available, | |||
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has | |||
the same value as "variance". | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | |||
root instruction . \n | |||
*@par Third-party framework compatibility | |||
*@li Compatible with the TensorFlow operator fused_batch_norm. | |||
@@ -166,13 +185,17 @@ is used for training or inference. Defaults to "True" . \n | |||
*@li y: A 4D Tensor of type float16 or float32, for the normalized "x". | |||
*@li batch_mean: A 1D Tensor of type float32, for the mean of "x". | |||
*@li batch_variance: A 1D Tensor of type float32, for the variance of "x". | |||
*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation. | |||
*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n | |||
*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for | |||
gradient computation. | |||
*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" | |||
for gradient computation . \n | |||
*@attention Constraints: | |||
*@li If the operation is used for inference, then output "reserve_space_1" | |||
has the same value as "mean" and output "reserve_space_2" has the same value as "variance". | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n | |||
has the same value as "mean" and output "reserve_space_2" has the same value as | |||
"variance". | |||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | |||
root instruction . \n | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator fused_batch_norm_v2. | |||
@@ -198,23 +221,34 @@ REG_OP(BatchNormExt2) | |||
*@par Inputs: | |||
* Five inputs, including: | |||
*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient. | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | |||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | |||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | |||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||
*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format | |||
NHWC, NCHW, or NC1HWC0, for the gradient. | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, | |||
or NC1HWC0. | |||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or | |||
NC1HWC0. | |||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, | |||
NCHW, or NC1HWC0. It is an output of BatchNorm. | |||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, | |||
NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||
*@par Attributes: | |||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | |||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number | |||
added to the variance of "x". | |||
*@li data_format: An optional string. Defaults to "NHWC". | |||
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | |||
*@par Outputs: | |||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". | |||
*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale". | |||
*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset". | |||
*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output. | |||
*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n | |||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, | |||
or NC1HWC0, for the offset of "x". | |||
*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or | |||
NC1HWC0, for the offset of "scale". | |||
*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or | |||
NC1HWC0, for the offset of "offset". | |||
*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or | |||
NC1HWC0. Pass "None" to skip this output. | |||
*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or | |||
NC1HWC0. Pass "None" to skip this output . \n | |||
*@attention Constraints: | |||
* The preceding layer of this operator must be operator BatchNorm . \n | |||
@@ -244,21 +278,28 @@ REG_OP(BatchNormGrad) | |||
*@par Inputs: | |||
* Five inputs, including: | |||
*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. | |||
*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or | |||
NCHW, for the gradient. | |||
*@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW. | |||
*@li scale: A 4D Tensor of type float32, with format NHWC or NCHW. | |||
*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2. | |||
*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n | |||
*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It | |||
is an output of BatchNormExt2. | |||
*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It | |||
is an output of BatchNormExt2 . \n | |||
*@par Attributes: | |||
*@li epsilon: A required float32. A small float number added to the variance of "x". | |||
*@li data_format: A required string for the format. | |||
*@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n | |||
*@li is_training: A required bool for specifying the operation is for training | |||
(true) or inference (false) . \n | |||
*@par Outputs: | |||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x". | |||
*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale". | |||
*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset". | |||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, | |||
for the offset of "x". | |||
*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for | |||
the offset of "scale". | |||
*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for | |||
the offset of "offset". | |||
*@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW. | |||
*@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n | |||
@@ -290,14 +331,18 @@ REG_OP(BatchNormGradExt2) | |||
*@brief Performs batch normalization . \n | |||
*@par Inputs: | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW | |||
for 4D or NC1HWC0 for 5D. | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" | |||
Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" | |||
Specifies the variance used for inference. | |||
*@li momentum: A Tensor,represents the mean and the variance's scale factor | |||
*@li scale: An optional tensor of type float16 or float32, no use | |||
*@li offset: An optional tensor of type float16 or float32, no use | |||
*@par Attributes: | |||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||
*@li epsilon: An optional float32, specifying the small value added to variance | |||
to avoid dividing by zero. Defaults to "0.00001". | |||
*@li use_global_stats: mean inference mode , only can be "True". | |||
*@li mode: An optional input, not use | |||
*@par Outputs: | |||
@@ -315,16 +360,20 @@ REG_OP(BNInference) | |||
.ATTR(use_global_stats, Bool,true) | |||
.ATTR(mode, Int,1) | |||
.OP_END_FACTORY_REG(BNInference) | |||
/** | |||
*@brief aicpu batch normalization host . \n | |||
*@par Inputs: | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" | |||
Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" | |||
Specifies the variance used for inference. | |||
*@li momentum: An optional float, mean and variance's Scale factor | |||
*@par Attributes: | |||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||
*@li epsilon: An optional float32, specifying the small value added to variance | |||
to avoid dividing by zero. Defaults to "0.00001". | |||
*@li use_global_stats: mean inference mode , only can be "True". | |||
*@li mode: An optional attr, not use | |||
*@par Outputs: | |||
@@ -348,14 +397,19 @@ REG_OP(BnHost) | |||
*@brief Performs batch normalization . \n | |||
*@par Inputs: | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW | |||
for 4D or NC1HWC0 for 5D. | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" | |||
Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" | |||
Specifies the variance used for inference. | |||
*@li scale: An optional tensor of type float16 or float32, no use | |||
*@li offset: An optional tensor of type float16 or float32, no use | |||
*@par Attributes: | |||
*@li momentum: An optional float32 num, represents the mean and the variance's scale factor | |||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||
*@li momentum: An optional float32 num, represents the mean and the variance's | |||
scale factor | |||
*@li epsilon: An optional float32, specifying the small value added to variance | |||
to avoid dividing by zero. Defaults to "0.00001". | |||
*@li use_global_stats: mean inference mode , only can be "True". | |||
*@li mode: An optional attr, not use | |||
*@par Outputs: | |||
@@ -310,9 +310,6 @@ REG_OP(DepthwiseConv2DBackpropInputD) | |||
* @par Third-party framework compatibility | |||
* @li Compatible with the TensorFlow operator DepthwiseConv2D. | |||
* @li Compatible with the Caffe operator DepthwiseConv2D. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(DepthwiseConv2D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | |||
@@ -460,9 +457,9 @@ REG_OP(Conv2DBackpropInputD) | |||
*@par Attributes: | |||
* Six attributes: | |||
* @li strides: A tuple or list of 2 integers. The stride of the sliding window | |||
* for H/W dimension. | |||
* for H/W dimension, defaults to [1,1]. | |||
* @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] | |||
* padding on the feature map. | |||
* padding on the feature map, defaults to [0,0,0,0]. | |||
* @li dilations: A tuple or list of 4 integers. The dilation factor for each | |||
* dimension of input, defaults to [1,1,1,1]. | |||
* @li groups: Number of blocked connections from input channels to | |||
@@ -482,8 +479,8 @@ REG_OP(Deconvolution) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(strides, ListInt, {1, 1}) | |||
.ATTR(pads, ListInt, {0, 0, 0, 0}) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
.ATTR(groups, Int, 1) | |||
.ATTR(data_format, String, "NCHW") | |||
@@ -593,7 +590,7 @@ REG_OP(Conv2DBackpropFilterD) | |||
*@li bias: An optional 1D tensor. Shape is [out_channels]. | |||
*@li offset_w: An optional 1D tensor for quantized convolution. Shape is | |||
* [out_channels]. Reserved. | |||
* [out_channels]. Not supported. | |||
*\n | |||
*\n | |||
* Note that there is a strict data type mapping between the input and output | |||
@@ -622,7 +619,8 @@ REG_OP(Conv2DBackpropFilterD) | |||
* and right padding. | |||
* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate | |||
* to use for dilated convolution. Has the same dimension order and value as | |||
* "strides". Defaults to [1, 1, 1, 1]. | |||
* "strides". Dilation > 1 is not supported for quantized convolution. Defaults | |||
* to [1, 1, 1, 1]. | |||
* @li groups: Optional. An integer of type int32, for the number of blocked | |||
* connections from input channels to output channels. Input channels and output | |||
* channels must both be divisible by "groups". "x" in_channels must be equal to | |||
@@ -704,13 +702,62 @@ REG_OP(Conv2D) | |||
.ATTR(offset_x, Int, 0) | |||
.OP_END_FACTORY_REG(Conv2D) | |||
/** | |||
*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors. | |||
*@par Inputs: | |||
* @li x: A 4D tensor of input images. | |||
* @li filter_compress: A 4D tensor of compressed filters. | |||
* @li compress_index: A 1D Tensor dtype of int8. | |||
* @li bias: An optional 1D tensor. | |||
* @li offset_w: An optional 1D tensor for quantized convolution. Reserved. | |||
* | |||
* The input and output tensor attributes are listed as follows: | |||
* @verbatim | |||
|Tensor | x | filter_compress | bias | offset_w | y | |||
-----------|---------|---------|---------|----------|-------- | |||
|Data Type | float16 | float16 | float16 | _ | float16 | |||
| |---------|---------|---------|----------|-------- | |||
| | float32 | float32 | float32 | _ | float32 | |||
| |---------|---------|---------|----------|-------- | |||
| | int8 | int8 | int32 | int8 | int32 | |||
-----------|---------|---------|---------|----------|-------- | |||
|Format | NCHW | NCHW | ND | ND | NCHW | |||
| | NHWC | NHWC | | | NHWC | |||
| | | HWCN | | | | |||
@endverbatim | |||
* It should be noted that the data types must correspond to each other, but the | |||
* format does not need to . \n | |||
*@par Attributes: | |||
* @li strides: A list of 4 integers. Specifying the strides of the | |||
* convolution along the height and width. The dimension order is determined | |||
* by the data format of "x". By default the N and C dimensions are set to 1. | |||
* @li pads: A list of 4 integers. Specifying the top, bottom, left and right | |||
* padding. | |||
* @li dilations: A list of 4 integers. Specifying the dilation rate to use | |||
* for dilated convolution. Has the same dimension order and value as "strides". | |||
* @li groups: Number of blocked connections from input channels to output | |||
* channels. Input channels and output channels must both be divisible by | |||
* "groups".Type is int32. | |||
* @li offset_x: An optional integer for quantized convolution. Type is int32. | |||
* Defaults to "0". | |||
* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the | |||
* data format of the input and output images. Type is string. | |||
* Defaults to "NHWC". Reserved . \n | |||
*@par Outputs: | |||
* @li y: A 4D Tensor of output images . \n | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS DEPRECATED. | |||
*/ | |||
REG_OP(Conv2DCompress) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||
.INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||
.INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||
.INPUT(compress_index, TensorType({DT_INT8})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
@@ -158,18 +158,25 @@ REG_OP(Iou) | |||
*@par Inputs: | |||
* Three inputs, including: | |||
*@li ydiff: A 5HD gradient input of type float32. | |||
*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, | |||
the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1". | |||
*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n | |||
*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" | |||
indicates the number of ROIs, | |||
the value "5" indicates the indexes of images where the ROIs are located, "x0", | |||
"x1", "y0", and "y1". | |||
*@li rois_n: An optional input, specifying the number of valid ROIs. This | |||
parameter is reserved . \n | |||
*@par Attributes: | |||
*@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign. | |||
*@li pooled_width: A required attribute of type int, specifying the W dimension. | |||
*@li pooled_height: A required attribute of type int, specifying the H dimension. | |||
*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. | |||
*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical | |||
sampling frequency of each output. If this attribute is set to "0", the sampling frequency is | |||
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n | |||
*@li spatial_scale: A required attribute of type float, specifying the scaling | |||
ratio of "features" to the original image. | |||
*@li sample_num: An optional attribute of type int, specifying the horizontal | |||
and vertical | |||
sampling frequency of each output. If this attribute is set to "0", the | |||
sampling frequency is | |||
equal to the rounded up value of "rois", which is a floating point number. | |||
Defaults to "2" . \n | |||
*@par Outputs: | |||
*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". | |||
@@ -876,9 +883,7 @@ REG_OP(YoloV3DetectionOutputV2) | |||
A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. | |||
*@li imginfo: A float16, describing the image information including the required image height and width | |||
and the actual image height and width. | |||
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. | |||
[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] | |||
is formed for the three Yolo outputs, respectively .It's a dynamic input. \n | |||
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n | |||
*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n | |||
@@ -896,29 +896,7 @@ REG_OP(InstanceNormV2) | |||
.ATTR(epsilon, Float, 0.00001) | |||
.OP_END_FACTORY_REG(InstanceNormV2) | |||
/** | |||
*@brief Performs instance normalization for inference. | |||
*@par Inputs:\n | |||
* Five inputs, including: (NC1HWC0 supported) | |||
*@li x: A Tensor of type float16 or float32. | |||
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. | |||
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. | |||
*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean. | |||
*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. | |||
*@li variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. | |||
*@par Outputs:\n | |||
*y: A Tensor of type float16 or float32 for the normalized "x". | |||
*batch_mean: A Tensor of type float32 for the result mean. | |||
*batch_ variance: A Tensor of type float32 for the result variance. | |||
*@attention Constraints: | |||
*For Ascend 310, the result accuracy fails to reach 1<89> due to the square root instruction. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use INInferV2 instead. | |||
*/ | |||
REG_OP(INInferV2D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
.OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT})) | |||
@@ -931,6 +909,20 @@ REG_OP(INInferV2D) | |||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | |||
.OP_END_FACTORY_REG(INInferV2D) | |||
/** | |||
*@brief Performs instance normalization for inference of InHost part. | |||
*@par Inputs:\n | |||
* One input, including: (NC1HWC0 supported) | |||
* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. | |||
*@par Attributes: | |||
* epsilon: An optional float32, specifying the small value added to | |||
variance to avoid dividing by zero. Defaults to "0.00001" . \n | |||
*@par Outputs:\n | |||
* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. | |||
*/ | |||
REG_OP(InHost) | |||
.INPUT(variance, TensorType({DT_FLOAT})) | |||
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | |||
@@ -128,9 +128,6 @@ REG_OP(AvgPool) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator AvgPool3D. | |||
* | |||
* @par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(AvgPool3D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | |||
@@ -111,9 +111,6 @@ REG_OP(ApplyAdaMax) | |||
* | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyAdaMax. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdaMax instead. | |||
*/ | |||
REG_OP(ApplyAdaMaxD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -352,9 +349,6 @@ REG_OP(ApplyMomentum) | |||
* accum: A mutable tensor. Has the same type as input "accum". | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyMomentum. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyMomentum instead. | |||
*/ | |||
REG_OP(ApplyMomentumD) | |||
@@ -681,9 +675,6 @@ REG_OP(ApplyPowerSign) | |||
* | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyPowerSign. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyPowerSign instead. | |||
*/ | |||
REG_OP(ApplyPowerSignD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -804,9 +795,6 @@ REG_OP(ApplyAddSign) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator ApplyAddSign. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAddSign instead. | |||
*/ | |||
REG_OP(ApplyAddSignD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -928,9 +916,6 @@ REG_OP(ApplyCenteredRMSProp) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyCenteredRMSPropD. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyCenteredRMSProp instead. | |||
*/ | |||
REG_OP(ApplyCenteredRMSPropD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1049,9 +1034,6 @@ REG_OP(ApplyAdagrad) | |||
* | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyAdagrad. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagrad instead. | |||
*/ | |||
REG_OP(ApplyAdagradD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1236,9 +1218,6 @@ REG_OP(ApplyAdagradDA) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyAdagradDA. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradDA instead. | |||
*/ | |||
REG_OP(ApplyAdagradDAD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1496,9 +1475,6 @@ REG_OP(ApplyProximalAdagrad) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyProximalAdagradD. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyProximalAdagrad instead. | |||
*/ | |||
REG_OP(ApplyProximalAdagradD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1592,9 +1568,6 @@ REG_OP(SparseApplyProximalAdagrad) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator SparseApplyProximalAdagrad. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyProximalAdagrad instead. | |||
*/ | |||
REG_OP(SparseApplyProximalAdagradD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1681,9 +1654,6 @@ REG_OP(ApplyFtrl) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyFtrl. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrl instead. | |||
*/ | |||
REG_OP(ApplyFtrlD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1775,9 +1745,6 @@ REG_OP(ApplyFtrlV2) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyFtrlV2. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrlV2 instead. | |||
*/ | |||
REG_OP(ApplyFtrlV2D) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1890,9 +1857,6 @@ REG_OP(ApplyAdam) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator ApplyAdam. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdam instead. | |||
*/ | |||
REG_OP(ApplyAdamD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -1981,9 +1945,6 @@ REG_OP(ApplyAdadelta) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator ApplyAdadelta. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdadelta instead. | |||
*/ | |||
REG_OP(ApplyAdadeltaD) | |||
.INPUT(var, TensorType::NumberType()) | |||
@@ -65,9 +65,6 @@ REG_OP(Fill) | |||
* | |||
*@par Outputs: | |||
* y: A tensor. Has the same type as "value". | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead. | |||
*/ | |||
REG_OP(FillD) | |||
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, | |||
@@ -125,9 +122,6 @@ REG_OP(BroadcastTo) | |||
* | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator BroadcastTo. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead. | |||
*/ | |||
REG_OP(BroadcastToD) | |||
.INPUT(x, TensorType::BasicType()) | |||
@@ -175,9 +169,6 @@ REG_OP(Pad) | |||
*@par Third-party framework compatibility: | |||
* Compatible with TensorFlow operator Pad. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | |||
*/ | |||
REG_OP(PadD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | |||
@@ -269,9 +260,6 @@ REG_OP(PadV3D) | |||
*@see Diag() | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Diag. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead. | |||
*/ | |||
REG_OP(DiagD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
@@ -30,7 +30,7 @@ namespace ge { | |||
*@par Inputs: | |||
*Two inputs, including: | |||
*@li rt_nested_splits: A list of at least 1 Tensor objects with the same type | |||
in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input. | |||
in: int32, int64. The row_splits for the RaggedTensor. | |||
*@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor | |||
Must be one of the following types: bool, int8, int16, uint16, int32, | |||
int64, double, float, float16 . \n | |||
@@ -66,7 +66,7 @@ REG_OP(RaggedTensorToSparse) | |||
*@li values:A 1D tensor representing the values of the ragged tensor. | |||
*@li default_value:A `Tensor`. Must have the same type as `values`. | |||
*@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same | |||
type in: `int64`, `int32` . It's a dynamic input.\n | |||
type in: `int64`, `int32` .\n | |||
*@par Attributes: | |||
*@li num_row_partition_tensors:Numbers of row partition tensors. | |||
@@ -374,9 +374,6 @@ REG_OP(DropOutGenMask) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator lin_space. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use LinSpace instead. | |||
*/ | |||
REG_OP(LinSpaceD) | |||
.INPUT(assist, TensorType({DT_FLOAT})) | |||
@@ -353,9 +353,6 @@ REG_OP(ReduceSum) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Sum. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceSum instead. | |||
*/ | |||
REG_OP(ReduceSumD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -381,9 +378,6 @@ REG_OP(ReduceSumD) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator ReduceAll. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAll instead. | |||
*/ | |||
REG_OP(ReduceAllD) | |||
.INPUT(x, TensorType({DT_BOOL})) | |||
@@ -459,9 +453,6 @@ REG_OP(ReduceProd) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator ReduceProd. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceProd instead. | |||
*/ | |||
REG_OP(ReduceProdD) | |||
.INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16})) | |||
@@ -516,9 +507,6 @@ REG_OP(ReduceMean) | |||
*@par Third-party framework compatibility: | |||
* Compatible with the TensorFlow operator ReduceMean. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. | |||
*/ | |||
REG_OP(ReduceMeanD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -573,9 +561,6 @@ REG_OP(ReduceMax) | |||
*@par Third-party framework compatibility | |||
* Compatible with TensorFlow operator Max. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMax instead. | |||
*/ | |||
REG_OP(ReduceMaxD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, | |||
@@ -630,9 +615,6 @@ REG_OP(ReduceMin) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator reduce_min. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. | |||
*/ | |||
REG_OP(ReduceMinD) | |||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | |||
@@ -699,9 +681,6 @@ REG_OP(ReduceAny) | |||
* | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator reduce_any. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAny instead. | |||
*/ | |||
REG_OP(ReduceAnyD) | |||
.INPUT(x, TensorType({DT_BOOL})) | |||
@@ -787,9 +766,6 @@ REG_OP(EuclideanNorm) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator EuclideanNorm. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use EuclideanNorm instead. | |||
*/ | |||
REG_OP(EuclideanNormD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16})) | |||
@@ -92,6 +92,7 @@ REG_OP(DynamicLSTM) | |||
.OUTPUT(output_h, TensorType({DT_FLOAT32})) | |||
.OP_END_FACTORY_REG(DynamicLSTM) | |||
/** | |||
*@brief: DynamicRNNGrad calculation. | |||
*@par Inputs: | |||
@@ -126,7 +127,7 @@ REG_OP(DynamicLSTM) | |||
*@li keep_prob:An float identifying the keep prob in the op. Default to 1. | |||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | |||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | |||
*@li time_major:An bool identifying the time major in the op. Default to false. | |||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. | |||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | |||
*@li is_training:An bool identifying is training in the op. Default to true. | |||
@@ -138,6 +139,9 @@ REG_OP(DynamicLSTM) | |||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||
*/ | |||
REG_OP(DynamicRNNGrad) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | |||
@@ -28,7 +28,7 @@ namespace ge { | |||
/** | |||
*@brief Mark which tensors need to be saved to the ckpt file. | |||
*@par Inputs: | |||
*tensors: A list of input tensor.It's a dynamic input. | |||
*tensors: A list of input tensor. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
@@ -35,16 +35,16 @@ namespace ge { | |||
*rate . \n | |||
*@par Inputs: | |||
*@li sparse_example_indices: a list of vectors which contain example indices.It's a dynamic input. | |||
*@li sparse_feature_indices: a list of vectors which contain feature indices.It's a dynamic input. | |||
*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.It's a dynamic input. | |||
*@li dense_features: a list of matrices which contains the dense feature values.It's a dynamic input. | |||
*@li sparse_example_indices: a list of vectors which contain example indices. | |||
*@li sparse_feature_indices: a list of vectors which contain feature indices. | |||
*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group. | |||
*@li dense_features: a list of matrices which contains the dense feature values. | |||
*@li example_weights: a vector which contains the weight associated with each example. | |||
*@li example_labels: a vector which contains the label/target associated with each example. | |||
*@li sparse_indices: a list of vectors where each value is the indices which has | |||
*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input. | |||
*corresponding weights in sparse_weights. This field maybe omitted for the dense approach. | |||
*@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group. | |||
*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input. | |||
*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group. | |||
*@li example_state_data: a list of vectors containing the example state data. | |||
*@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. | |||
*@li l1: Symmetric l1 regularization strength. | |||
@@ -61,7 +61,6 @@ namespace ge { | |||
*@par Third-party framework compatibility | |||
* Compatible with tensorflow SdcaOptimizerV2 operator. | |||
*/ | |||
REG_OP(SdcaOptimizerV2) | |||
.DYNAMIC_INPUT(sparse_example_indices, TensorType({DT_INT64})) | |||
.DYNAMIC_INPUT(sparse_feature_indices, TensorType({DT_INT64})) | |||
@@ -79,9 +79,6 @@ REG_OP(Range) | |||
*@see Range() | |||
*@since V100R001C33 | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead. | |||
*/ | |||
REG_OP(RangeD) | |||
.INPUT(x, TensorType({DT_FLOAT,DT_INT32})) | |||
@@ -186,7 +183,8 @@ REG_OP(GatherNd) | |||
* uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, | |||
* uint16, complex128, float16, uint32, uint64, complex64, complex128. | |||
* @li indices: A Tensor of type int32 or int64. | |||
* @li axis: A Tensor of type as int32 . \n | |||
* @li axis: A Tensor of type as int32 or int64, | |||
* Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x" . \n | |||
@@ -225,9 +223,6 @@ REG_OP(GatherV2) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator GatherV2. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use GatherV2 instead. | |||
*/ | |||
REG_OP(GatherV2D) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8, | |||
@@ -330,9 +325,6 @@ REG_OP(StridedSlice) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator StridedSlice. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead. | |||
*/ | |||
REG_OP(StridedSliceD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, | |||
@@ -388,9 +380,6 @@ REG_OP(StridedSliceD) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator StridedSliceGradD. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceGrad instead. | |||
*/ | |||
REG_OP(StridedSliceGradD) | |||
.INPUT(dy, TensorType::BasicType()) | |||
@@ -502,9 +491,6 @@ REG_OP(UnsortedSegmentSum) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator UnsortedSegmentSum. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentSum instead. | |||
*/ | |||
REG_OP(UnsortedSegmentSumD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8})) | |||
@@ -729,9 +715,6 @@ REG_OP(OneHot) | |||
*@par Third-party framework compatibility: | |||
* Compatible with the TensorFlow operator OneHot. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use OneHot instead. | |||
*/ | |||
REG_OP(OneHotD) | |||
.INPUT(x, TensorType({DT_UINT8, DT_INT32})) | |||
@@ -807,7 +790,7 @@ REG_OP(SliceD) | |||
* @li assist_seq: A 1D tensor of type float16. | |||
* with size of 2N, which "N" is the last dimension. | |||
* The first N numbers is indices, and the next N numbers is deviation of casting | |||
* int32 to float16. \n | |||
* float16 to int32 . \n | |||
* @par Attributes: | |||
* @li k: A required int that is at least 0, specifying the number of top elements | |||
@@ -816,7 +799,7 @@ REG_OP(SliceD) | |||
* If true, the resulting "k" elements will be sorted by the values in descending | |||
* order. | |||
* @li dim: An optional int. Defaults to -1. For reserved use. | |||
* @li largest: An optional bool. Defaults to true. For reserved use. \n | |||
* @li largest: An optional bool. Defaults to true. For reserved use. | |||
* @par Outputs: | |||
* @li values: A Tensor, specifying the sorted data. Has the same type as "input". | |||
@@ -1270,9 +1253,6 @@ REG_OP(InplaceUpdate) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator InplaceUpdate. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead. | |||
*/ | |||
REG_OP(InplaceUpdateD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
@@ -1325,9 +1305,6 @@ REG_OP(InplaceAdd) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator InplaceAdd. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead. | |||
*/ | |||
REG_OP(InplaceAddD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
@@ -1379,9 +1356,6 @@ REG_OP(InplaceSub) | |||
*@par Third-party framework compatibility | |||
*Compatible with the TensorFlow operator InplaceSub. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead. | |||
*/ | |||
REG_OP(InplaceSubD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
@@ -1433,9 +1407,6 @@ REG_OP(ScatterNonAliasingAdd) | |||
* @par Outputs: | |||
* y: A Tensor of type RealNumberType . \n | |||
* @attention Constraints: | |||
* @li segment_ids must be non-negative tensor. | |||
* @see UnsortedSegmentSum(), UnsortedSegmentProd(), | |||
* @par Third-party framework compatibility | |||
@@ -1463,9 +1434,6 @@ REG_OP(UnsortedSegmentMin) | |||
* @par Outputs: | |||
* y: A Tensor.Must have the same type as input "x" . \n | |||
* @attention Constraints: | |||
* @li segment_ids must be non-negative tensor. | |||
* @see UnsortedSegmentProdD(), UnsortedSegmentSumD(), | |||
* | |||
* @par Restrictions: | |||
@@ -1491,9 +1459,6 @@ REG_OP(UnsortedSegmentMinD) | |||
* @par Outputs: | |||
* y: A Tensor of type RealNumberType . \n | |||
* @attention Constraints: | |||
* @li segment_ids must be non-negative tensor. | |||
* @see UnsortedSegmentSum(), UnsortedSegmentProd(), | |||
* @par Third-party framework compatibility | |||
@@ -1521,9 +1486,6 @@ REG_OP(UnsortedSegmentMax) | |||
* @par Outputs: | |||
* y: A Tensor.Must have the same type as input "x" . \n | |||
* @attention Constraints: | |||
* @li segment_ids must be non-negative tensor. | |||
* @see UnsortedSegmentProdD(), | |||
* | |||
* @par Restrictions: | |||
@@ -1548,9 +1510,6 @@ REG_OP(UnsortedSegmentMaxD) | |||
* @par Outputs: | |||
* y: A Tensor of type NumberType . \n | |||
* @attention Constraints: | |||
* @li segment_ids must be non-negative tensor. | |||
* @see UnsortedSegmentSum(), UnsortedSegmentMin(), | |||
* @par Third-party framework compatibility | |||
@@ -1582,9 +1541,6 @@ REG_OP(UnsortedSegmentProd) | |||
* @li segment_ids must be non-negative tensor. | |||
* @see UnsortedSegmentMinD() | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentProd instead. | |||
*/ | |||
REG_OP(UnsortedSegmentProdD) | |||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) | |||
@@ -1900,9 +1856,6 @@ REG_OP(CumulativeLogsumexp) | |||
*y: A Tensor. Has the same type as "x". | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Cumsum. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use CumulativeLogsumexp instead. | |||
*/ | |||
REG_OP(CumulativeLogsumexpD) | |||
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16})) | |||
@@ -75,9 +75,6 @@ REG_OP(Split) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Split. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Split instead. | |||
*/ | |||
REG_OP(SplitD) | |||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | |||
@@ -144,9 +141,6 @@ Under the caffe framework, the conversion of slice_point through the cut point t | |||
Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect. | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator SplitV. | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SplitV instead. | |||
*/ | |||
REG_OP(SplitVD) | |||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | |||
@@ -164,8 +158,7 @@ REG_OP(SplitVD) | |||
* Two inputs, including: | |||
* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, | |||
* int64, uint8, uint16, uint32, uint64, float16, float32. | |||
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. | |||
* It's a dynamic input. | |||
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. | |||
* @li shape: A Tensor of the same type as "x". | |||
* The final shape of the result. Should be equal to the shapes of any input | |||
* but with the number of input values in the first dimension . \n | |||
@@ -314,7 +307,7 @@ REG_OP(Concat) | |||
*@par Inputs: | |||
* x: A list of N Tensors. Must be one of the following types: int8, int16, int32, | |||
* int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n | |||
* int64, uint8, uint16, uint32, uint64, float16, float32, bool . \n | |||
*@par Attributes: | |||
*@li axis: A optional int, defaultvalue is 0. | |||
@@ -340,7 +333,7 @@ REG_OP(Pack) | |||
*@par Inputs: | |||
*Two inputs, including: | |||
* @li concat_dim: A Tensor of type int32. | |||
* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n | |||
* @li x: A list of 1D Tensor objects of type int32 . \n | |||
*@par Attributes: | |||
*N: A required int . \n | |||
@@ -364,7 +357,7 @@ REG_OP(ConcatOffset) | |||
*@par Inputs: | |||
*Two inputs, including: | |||
* @li concat_dim: A Tensor of type int32. | |||
* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n | |||
* @li x: A list of 1D Tensor objects of type int32 . \n | |||
*@par Attributes: | |||
*@li Concat_dim: A required int. Must be within the rank of input "x". | |||
@@ -235,12 +235,8 @@ REG_OP(BatchToSpaceND) | |||
*@par Outputs: | |||
*y: A Tensor with format NC1HWC0. Has the same type as input "x". | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator BatchToSpaceND. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpaceND instead. | |||
*/ | |||
REG_OP(BatchToSpaceNDD) | |||
.INPUT(x, TensorType::BasicType()) | |||
@@ -287,9 +283,6 @@ REG_OP(SpaceToBatchND) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator SpaceToBatchND. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatchND instead. | |||
*/ | |||
REG_OP(SpaceToBatchNDD) | |||
.INPUT(x, TensorType::BasicType()) | |||
@@ -411,9 +404,6 @@ REG_OP(BatchToSpace) | |||
*@par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator BatchToSpace. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead. | |||
*/ | |||
REG_OP(BatchToSpaceD) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, | |||
@@ -467,9 +457,6 @@ REG_OP(SpaceToBatch) | |||
*y: A Tensor. Has the same type as input "x". | |||
*@par Third-party framework compatibility | |||
*@ Compatible with the TensorFlow operator SpaceToBatch. | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatch instead. | |||
*/ | |||
REG_OP(SpaceToBatchD) | |||
.INPUT(x, TensorType::BasicType()) | |||
@@ -598,9 +585,6 @@ REG_OP(ExtractVolumePatches) | |||
*@par Outputs: | |||
*y: A Tensor. Has the same type as "x". | |||
* | |||
* @par Restrictions: | |||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ConfusionTranspose instead. | |||
*/ | |||
REG_OP(ConfusionTransposeD) | |||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | |||
@@ -664,11 +648,6 @@ REG_OP(FlattenV2) | |||
.ATTR(end_axis, Int, -1) | |||
.OP_END_FACTORY_REG(FlattenV2) | |||
REG_OP(DeConvTrans) | |||
.INPUT(x, TensorType({DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_INT8})) | |||
.OP_END_FACTORY_REG(DeConvTrans) | |||
/** | |||
*@brief Compress large weight to small one. Usually inserted before Conv2d. | |||
* | |||
@@ -19,7 +19,7 @@ | |||
#include <stdint.h> | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -580,7 +580,8 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -19,7 +19,7 @@ | |||
#include "base.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -185,7 +185,7 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); | |||
*/ | |||
RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -19,7 +19,7 @@ | |||
#include "base.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -149,7 +149,7 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint | |||
*/ | |||
RTS_API rtError_t rtGetGroupCount(uint32_t *count); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -19,7 +19,7 @@ | |||
#include "base.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -339,7 +339,7 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -19,7 +19,7 @@ | |||
#include "base.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile(); | |||
*/ | |||
RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -19,7 +19,7 @@ | |||
#include "base.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -229,7 +229,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs | |||
*/ | |||
RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -20,7 +20,7 @@ | |||
#include "base.h" | |||
#include "stream.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -529,7 +529,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream); | |||
* @return RT_ERROR_INVALID_VALUE for error input | |||
*/ | |||
RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -24,7 +24,7 @@ | |||
#include "config.h" | |||
#include "stream.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -491,7 +491,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num); | |||
*/ | |||
RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -19,7 +19,7 @@ | |||
#include "base.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -430,7 +430,7 @@ rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, uint | |||
*/ | |||
RTS_API rtError_t rtDebugUnRegister(rtModel_t model); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -20,7 +20,7 @@ | |||
#include "base.h" | |||
#include "event.h" | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
extern "C" { | |||
#endif | |||
@@ -188,7 +188,7 @@ RTS_API rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream); | |||
*/ | |||
RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr, | |||
uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType); | |||
#ifdef __cplusplus | |||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||
} | |||
#endif | |||
@@ -1,12 +1,18 @@ | |||
/** | |||
* @file adx_datadump_server.h | |||
* | |||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
*/ | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef ADX_DATADUMP_SERVER_H | |||
#define ADX_DATADUMP_SERVER_H | |||
@@ -1,5 +1,5 @@ | |||
diff -Npur -x .git bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt | |||
--- bounds_checking_function/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 | |||
diff -Npur -x .git libboundscheck/CMakeLists.txt securec/CMakeLists.txt | |||
--- libboundscheck/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 | |||
+++ securec/CMakeLists.txt 2020-09-19 16:53:48.689460700 +0800 | |||
@@ -0,0 +1,18 @@ | |||
+cmake_minimum_required(VERSION 3.14) | |||