Merge pull request !83 from HW_KK/masterpull/83/MERGE
@@ -68,7 +68,7 @@ elseif(DEFINED ENV{D_LINK_PATH}) | |||||
find_library(slog libslog.so ${GE_LIB_PATH}) | find_library(slog libslog.so ${GE_LIB_PATH}) | ||||
find_library(mmpa libmmpa.so ${GE_LIB_PATH}) | find_library(mmpa libmmpa.so ${GE_LIB_PATH}) | ||||
find_library(runtime libruntime.so ${GE_LIB_PATH}) | find_library(runtime libruntime.so ${GE_LIB_PATH}) | ||||
find_library(msprof libmsprof.so ${GE_LIB_PATH}) | |||||
find_library(msprof libmsprofiler.a ${GE_LIB_PATH}) | |||||
find_library(register libregister.so ${GE_LIB_PATH}) | find_library(register libregister.so ${GE_LIB_PATH}) | ||||
find_library(hccl libhccl.so ${GE_LIB_PATH}) | find_library(hccl libhccl.so ${GE_LIB_PATH}) | ||||
find_library(resource libresource.so ${GE_LIB_PATH}) | find_library(resource libresource.so ${GE_LIB_PATH}) | ||||
@@ -85,7 +85,7 @@ else() | |||||
set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) | set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) | ||||
find_library(slog libslog.so ${ASCEND_DRIVER_DIR}) | find_library(slog libslog.so ${ASCEND_DRIVER_DIR}) | ||||
find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) | find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR}) | ||||
find_library(msprof libmsprof.so ${ASCEND_DRIVER_DIR}) | |||||
find_library(msprof libmsprofiler.a ${ASCEND_RUNTIME_DIR}) | |||||
find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | ||||
find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | ||||
@@ -61,8 +61,10 @@ class StringUtils { | |||||
/// @param [in] delim separator | /// @param [in] delim separator | ||||
/// @return string array after segmentation | /// @return string array after segmentation | ||||
/// | /// | ||||
/*lint -e1077*/ | |||||
static std::vector<std::string> Split(const std::string &str, char delim) { | static std::vector<std::string> Split(const std::string &str, char delim) { | ||||
std::vector<std::string> elems; | std::vector<std::string> elems; | ||||
/*lint +e1077*/ | |||||
if (str.empty()) { | if (str.empty()) { | ||||
elems.emplace_back(""); | elems.emplace_back(""); | ||||
@@ -92,6 +92,9 @@ struct OmgContext { | |||||
std::map<std::string, std::vector<int32_t>> out_nodes_map; | std::map<std::string, std::vector<int32_t>> out_nodes_map; | ||||
// user-designate out nodes (this is used for determing the orders) | // user-designate out nodes (this is used for determing the orders) | ||||
std::vector<std::pair<std::string, int32_t>> user_out_nodes; | std::vector<std::pair<std::string, int32_t>> user_out_nodes; | ||||
// save the output node of the network, value = topName, | |||||
// topName indicates the output name of the operator. | |||||
std::vector<std::string> user_out_nodes_top_vec; | |||||
// net out nodes (where user_out_nodes or leaf nodes) | // net out nodes (where user_out_nodes or leaf nodes) | ||||
std::vector<std::string> net_out_nodes; | std::vector<std::string> net_out_nodes; | ||||
// net out nodes top names(only caffe has top) | // net out nodes top names(only caffe has top) | ||||
@@ -1052,6 +1052,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_FLAG; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_OP_DEBUG_MODE; | ||||
// op dynamic input | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_START; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_INPUT_END; | |||||
// functional ops attr | // functional ops attr | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_THEN_BRANCH; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_IF_ELSE_BRANCH; | ||||
@@ -235,7 +235,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder { | |||||
vector<string> GetOpInferDepends() const; | vector<string> GetOpInferDepends() const; | ||||
string GetInputNameByIndex(uint32_t index) const; | string GetInputNameByIndex(uint32_t index) const; | ||||
string GetValidInputNameByIndex(uint32_t index) const; | |||||
int GetValidInputIndexByName(const string &name) const; | |||||
int GetInputIndexByName(const string &name) const; | int GetInputIndexByName(const string &name) const; | ||||
string GetOutputNameByIndex(uint32_t index) const; | string GetOutputNameByIndex(uint32_t index) const; | ||||
@@ -22,8 +22,10 @@ | |||||
template <class E, class O> | template <class E, class O> | ||||
class RangeVistor { | class RangeVistor { | ||||
public: | public: | ||||
/*lint -e151*/ | |||||
using Iterator = typename std::vector<E>::iterator; | using Iterator = typename std::vector<E>::iterator; | ||||
using ConstIterator = typename std::vector<E>::const_iterator; | using ConstIterator = typename std::vector<E>::const_iterator; | ||||
/*lint +e151*/ | |||||
RangeVistor(O owner, const std::vector<E> &vs) : owner_(owner), elements_(vs) {} | RangeVistor(O owner, const std::vector<E> &vs) : owner_(owner), elements_(vs) {} | ||||
@@ -41,7 +43,9 @@ class RangeVistor { | |||||
bool empty() const { return elements_.empty(); } | bool empty() const { return elements_.empty(); } | ||||
/*lint -e659*/ | |||||
E &at(std::size_t index) { return elements_.at(index); } | E &at(std::size_t index) { return elements_.at(index); } | ||||
/*lint +e659*/ | |||||
const E &at(std::size_t index) const { return elements_.at(index); } | const E &at(std::size_t index) const { return elements_.at(index); } | ||||
@@ -53,6 +53,7 @@ class OpDescUtils { | |||||
static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node); | static vector<GeTensorPtr> MutableWeights(const ge::NodePtr node); | ||||
static graphStatus SetWeights(ge::Node& node, const vector<ge::GeTensorPtr>& weights); | static graphStatus SetWeights(ge::Node& node, const vector<ge::GeTensorPtr>& weights); | ||||
static graphStatus SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr>& weights); | static graphStatus SetWeights(ge::NodePtr node, const vector<ge::GeTensorPtr>& weights); | ||||
static graphStatus SetWeights(ge::Node& node, const map<int, ge::GeTensorPtr>& weights_map); | |||||
static graphStatus ClearWeights(ge::NodePtr node); | static graphStatus ClearWeights(ge::NodePtr node); | ||||
static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index); | static bool ClearInputDesc(ge::OpDescPtr op_desc, uint32_t index); | ||||
@@ -28,7 +28,7 @@ using std::unordered_set; | |||||
void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); } | void AttrHolder::CopyAttrsFrom(const AttrHolder &holder) { MutableAttrMap().CopyValueFrom(holder.GetAttrMap()); } | ||||
graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) { | graphStatus AttrHolder::SetAttr(const std::string &name, const GeAttrValue &value) { | ||||
if (value.IsEmpty()) { | if (value.IsEmpty()) { | ||||
GELOGE(GRAPH_FAILED, "value is empty, key %s", name.c_str()); | |||||
GELOGE(GRAPH_FAILED, "value is empty, key of the attr is %s", name.c_str()); | |||||
return GRAPH_FAILED; | return GRAPH_FAILED; | ||||
} | } | ||||
auto proto_map = MutableAttrMap().GetProtoMsg(); | auto proto_map = MutableAttrMap().GetProtoMsg(); | ||||
@@ -1060,6 +1060,10 @@ const std::string ATTR_NAME_HCCL_FUSED_FLAG = "_hccl_fused_node"; | |||||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR = "_alloc_fixed_addr"; | ||||
const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | const std::string ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX = "_alloc_fixed_addr_index"; | ||||
// op dynamic input | |||||
const std::string ATTR_NAME_DYNAMIC_INPUT_START = "_dynamic_input_index_start"; | |||||
const std::string ATTR_NAME_DYNAMIC_INPUT_END = "_dynamic_input_index_end"; | |||||
// atc user def dtype&format | // atc user def dtype&format | ||||
const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; | const std::string ATTR_ATC_USER_DEFINE_DATATYPE = "_user_defined_data_type"; | ||||
const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; | const std::string ATTR_ATC_USER_DEFINE_FORMAT = "_user_defined_format"; | ||||
@@ -762,9 +762,10 @@ graphStatus Node::Verify() const { | |||||
if (!is_unknown_graph) { | if (!is_unknown_graph) { | ||||
for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | for (const auto &in_anchor_ptr : GetAllInDataAnchors()) { | ||||
GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); | GE_IF_BOOL_EXEC(in_anchor_ptr == nullptr, GELOGW("in anchor ptr is null"); continue); | ||||
bool valid_anchor = op_->GetType() == data_type || op_->GetType() == aipp_data_type || | |||||
op_->GetType() == const_type || op_->GetType() == variable_type || | |||||
op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||||
bool valid_anchor = | |||||
op_->GetType() == data_type || op_->GetType() == aipp_data_type || op_->GetType() == const_type || | |||||
op_->GetType() == variable_type || op_->IsOptionalInput(in_anchor_ptr->GetIdx()) || | |||||
op_->MutableInputDesc(in_anchor_ptr->GetIdx()) == nullptr || in_anchor_ptr->GetPeerAnchors().size() > 0; | |||||
if (!valid_anchor) { | if (!valid_anchor) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, | ErrorManager::GetInstance().ATCReportErrMessage("E11019", {"opname", "index"}, | ||||
{GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | {GetName(), std::to_string(in_anchor_ptr->GetIdx())}); | ||||
@@ -347,7 +347,10 @@ graphStatus OpDesc::AddOptionalInputDesc(const string &name, const ge::GeTensorD | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | ||||
OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | OpDesc::UpdateInputDesc(uint32_t index, const ge::GeTensorDesc &tensor_Desc) { | ||||
GE_CHK_BOOL_RET_STATUS((index < inputs_desc_.size()), GRAPH_FAILED, "The index is invalid. index[%u]", index); | |||||
if (index >= inputs_desc_.size()) { | |||||
GELOGW("The index is invalid. index[%u]", index); | |||||
return GRAPH_FAILED; | |||||
} | |||||
inputs_desc_[index] = ComGraphMakeShared<GeTensorDesc>(tensor_Desc); | inputs_desc_[index] = ComGraphMakeShared<GeTensorDesc>(tensor_Desc); | ||||
if (inputs_desc_[index] == nullptr) { | if (inputs_desc_[index] == nullptr) { | ||||
@@ -949,6 +952,43 @@ int OpDesc::GetInputIndexByName(const string &name) const { | |||||
return static_cast<int>(it_find->second); | return static_cast<int>(it_find->second); | ||||
} | } | ||||
int OpDesc::GetValidInputIndexByName(const string &name) const { | |||||
map<string, uint32_t> valid_input_name_idx{}; | |||||
uint32_t j = 0; | |||||
for (size_t i = 0; i < GetAllInputsSize(); i++) { | |||||
if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) { | |||||
auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i)); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), -1); | |||||
valid_input_name_idx.insert({valid_name, j}); | |||||
j++; | |||||
} | |||||
} | |||||
auto it_find = valid_input_name_idx.find(name); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it_find != valid_input_name_idx.end(), -1); | |||||
return static_cast<int>(it_find->second); | |||||
} | |||||
string OpDesc::GetValidInputNameByIndex(uint32_t index) const { | |||||
map<string, uint32_t> valid_input_name_idx{}; | |||||
uint32_t j = 0; | |||||
for (size_t i = 0; i < GetAllInputsSize(); i++) { | |||||
if (MutableInputDesc(static_cast<uint32_t>(i)) != nullptr) { | |||||
auto valid_name = GetInputNameByIndex(static_cast<uint32_t>(i)); | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(!valid_name.empty(), ""); | |||||
valid_input_name_idx.insert({valid_name, j}); | |||||
j++; | |||||
} | |||||
} | |||||
auto it = valid_input_name_idx.begin(); | |||||
for (; it != valid_input_name_idx.end(); ++it) { | |||||
if (it->second == index) { | |||||
break; | |||||
} | |||||
} | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(it != valid_input_name_idx.end(), ""); | |||||
return it->first; | |||||
} | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const { | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY string OpDesc::GetOutputNameByIndex(uint32_t index) const { | ||||
auto it = output_name_idx_.begin(); | auto it = output_name_idx_.begin(); | ||||
for (; it != output_name_idx_.end(); ++it) { | for (; it != output_name_idx_.end(); ++it) { | ||||
@@ -56,7 +56,7 @@ class RefRelations::Impl { | |||||
} | } | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
GELOGW("can not find any relations! key value is %s", lookup_key.c_str()); | |||||
GELOGW("can not find any relations! key value of dest relation is %s", lookup_key.c_str()); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
}; | }; | ||||
graphStatus BuildRefRelations(ge::ComputeGraph &root_graph); | graphStatus BuildRefRelations(ge::ComputeGraph &root_graph); | ||||
@@ -560,6 +560,53 @@ OpDescUtils::SetWeights(ge::Node &node, const vector<ge::GeTensorPtr> &weights) | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus | |||||
OpDescUtils::SetWeights(ge::Node &node, const map<int, ge::GeTensorPtr> &weights_map) { | |||||
GE_CHECK_NOTNULL(node.GetOpDesc()); | |||||
// 1. node is const | |||||
if (node.GetOpDesc()->GetType() == CONSTANT) { | |||||
if (weights_map.size() == CONST_OP_NORMAL_WEIGHT_SIZE) { | |||||
return SetWeights(node.GetOpDesc(), weights_map.begin()->second); | |||||
} | |||||
GELOGE(GRAPH_PARAM_INVALID, "const op %s weight size %zu should be 1", node.GetName().c_str(), weights_map.size()); | |||||
return GRAPH_PARAM_INVALID; | |||||
} | |||||
// 2. node is not const | |||||
for (const auto &pair : weights_map) { | |||||
auto in_data_anchor = node.GetInDataAnchor(pair.first); | |||||
if (in_data_anchor != nullptr && in_data_anchor->GetPeerOutAnchor() != nullptr) { | |||||
// a. update const input node | |||||
auto out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
auto peer_node = out_anchor->GetOwnerNode(); | |||||
if (peer_node == nullptr) { | |||||
GELOGE(GRAPH_PARAM_INVALID, "op %s [%d]'s input node is null", node.GetName().c_str(), pair.first); | |||||
return GRAPH_PARAM_INVALID; | |||||
} | |||||
if (peer_node->GetType() != CONSTANT) { | |||||
GELOGE(GRAPH_PARAM_INVALID, " op %s [%d]'s input node should be const, but is %s type:%s ", | |||||
node.GetName().c_str(), pair.first, peer_node->GetName().c_str(), peer_node->GetType().c_str()); | |||||
} | |||||
SetWeights(peer_node->GetOpDesc(), pair.second); | |||||
} else { | |||||
// b. create new const input node | |||||
auto const_opdesc = CreateConstOp(pair.second); | |||||
GE_CHECK_NOTNULL(const_opdesc); | |||||
auto owner_graph = node.GetOwnerComputeGraph(); | |||||
if (owner_graph == nullptr) { | |||||
GELOGE(GRAPH_PARAM_INVALID, "node's graph is empty, name: %s", node.GetName().c_str()); | |||||
return GRAPH_PARAM_INVALID; | |||||
} | |||||
auto const_node = owner_graph->AddNodeFront(const_opdesc); | |||||
if (node.AddLinkFrom(static_cast<uint32_t>(pair.first), const_node) != GRAPH_SUCCESS) { | |||||
GELOGE(GRAPH_FAILED, "op %s add const to input index[%d] failed", node.GetName().c_str(), pair.first); | |||||
return GRAPH_FAILED; | |||||
} | |||||
} | |||||
} | |||||
NodeUtils::UpdateIsInputConst(node); | |||||
return GRAPH_SUCCESS; | |||||
} | |||||
OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { | OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { | ||||
GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!"); | GE_CHK_BOOL_EXEC(tensor_ptr != nullptr, return nullptr, "tensor_ptr is nullptr!"); | ||||
shared_ptr<OpDesc> const_opdesc = ComGraphMakeShared<OpDesc>(); | shared_ptr<OpDesc> const_opdesc = ComGraphMakeShared<OpDesc>(); | ||||
@@ -229,6 +229,7 @@ target_link_libraries(ge_runner | |||||
${resouce} | ${resouce} | ||||
${ascend_hal} | ${ascend_hal} | ||||
${adump_server} | ${adump_server} | ||||
${msprofiler} | |||||
rt | rt | ||||
dl) | dl) | ||||
@@ -358,7 +359,10 @@ add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS} ${PROTO_HEADER_HD | |||||
target_compile_definitions(ge_compiler PRIVATE | target_compile_definitions(ge_compiler PRIVATE | ||||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
REUSE_MEMORY=1 | REUSE_MEMORY=1 | ||||
FMK_HOST_INFER) | |||||
FMK_HOST_INFER | |||||
FMK_SUPPORT_DUMP | |||||
COMPILE_OMG_PACKAGE | |||||
REUSE_MEMORY=1) | |||||
target_link_libraries(ge_compiler | target_link_libraries(ge_compiler | ||||
graph | graph | ||||
ge_common | ge_common | ||||
@@ -68,5 +68,7 @@ target_link_libraries(ge_client | |||||
${mmpa} | ${mmpa} | ||||
${runtime} | ${runtime} | ||||
${msprof} | ${msprof} | ||||
${msprofiler} | |||||
${ascend_hal} | |||||
rt | rt | ||||
dl) | dl) |
@@ -16,6 +16,7 @@ | |||||
#include "ge/ge_api.h" | #include "ge/ge_api.h" | ||||
#include <iostream> | #include <iostream> | ||||
#include <malloc.h> | |||||
#include "common/debug/log.h" | #include "common/debug/log.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "common/ge/datatype_util.h" | #include "common/ge/datatype_util.h" | ||||
@@ -163,6 +164,9 @@ Status GEFinalize() { | |||||
g_ge_initialized = false; | g_ge_initialized = false; | ||||
} | } | ||||
// to avoid memory fragment, use malloc_trim to back free stack to system | |||||
malloc_trim(0); | |||||
GELOGT(TRACE_STOP, "GEFinalize finished"); | GELOGT(TRACE_STOP, "GEFinalize finished"); | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -70,9 +70,10 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
libregister \ | libregister \ | ||||
libge_compiler \ | libge_compiler \ | ||||
libge_common \ | libge_common \ | ||||
libmsprof | |||||
libmsprof \ | |||||
stub/libascend_hal | |||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
@@ -107,6 +108,7 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
libge_common \ | libge_common \ | ||||
libmsprof | libmsprof | ||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
LOCAL_LDFLAGS := -lrt -ldl | LOCAL_LDFLAGS := -lrt -ldl | ||||
LOCAL_CFLAGS += \ | LOCAL_CFLAGS += \ | ||||
@@ -172,18 +172,18 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
return RT_FAILED; | return RT_FAILED; | ||||
} | } | ||||
constexpr int32_t ioAddrNum = 2; | |||||
constexpr uint32_t argsSize = sizeof(aicpu::AicpuParamHead) + ioAddrNum * sizeof(uint64_t); | |||||
char args[argsSize] = {0}; | |||||
auto paramHead = reinterpret_cast<aicpu::AicpuParamHead *>(args); | |||||
paramHead->length = argsSize; | |||||
paramHead->ioAddrNum = ioAddrNum; | |||||
auto ioAddr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead)); | |||||
ioAddr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_); | |||||
ioAddr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_); | |||||
constexpr int32_t io_addr_num = 2; | |||||
constexpr uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addr_num * sizeof(uint64_t); | |||||
char args[args_size] = {0}; | |||||
auto param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args); | |||||
param_head->length = args_size; | |||||
param_head->ioAddrNum = io_addr_num; | |||||
auto io_addr = reinterpret_cast<uint64_t *>(args + sizeof(aicpu::AicpuParamHead)); | |||||
io_addr[0] = reinterpret_cast<uintptr_t>(proto_dev_mem_); | |||||
io_addr[1] = reinterpret_cast<uintptr_t>(proto_size_dev_mem_); | |||||
rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, | rt_ret = rtCpuKernelLaunch(nullptr, kDumpKernelsDumpOp, | ||||
1, // blockDim default 1 | 1, // blockDim default 1 | ||||
args, argsSize, | |||||
args, args_size, | |||||
nullptr, // no need smDesc | nullptr, // no need smDesc | ||||
stream_); | stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
@@ -34,7 +34,7 @@ std::map<ge::DataType, std::vector<ge::DataType>> g_reverse_translatable_data_ty | |||||
{ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, | {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, | ||||
{ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; | {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; | ||||
static const std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = { | |||||
std::map<ge::DataType, ge::proto::DataType> g_dump_data_type_map = { | |||||
// key:ge datatype,value:proto datatype | // key:ge datatype,value:proto datatype | ||||
{ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, | {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, | ||||
{ge::DT_FLOAT, ge::proto::DT_FLOAT}, | {ge::DT_FLOAT, ge::proto::DT_FLOAT}, | ||||
@@ -51,12 +51,13 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager &ProfilingMana | |||||
return profiling_manager; | return profiling_manager; | ||||
} | } | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options, | |||||
bool convert_2_phy_device_id) { | |||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Init(const Options &options) { | |||||
#ifdef DAVINCI_SUPPORT_PROFILING | #ifdef DAVINCI_SUPPORT_PROFILING | ||||
vector<int32_t>().swap(device_id_); | vector<int32_t>().swap(device_id_); | ||||
job_id_ = options.job_id; | job_id_ = options.job_id; | ||||
GELOGI("ProfilingManager::Init job_id:%s", job_id_.c_str()); | |||||
Status ret; | Status ret; | ||||
if (!recv_profiling_config_.empty()) { | if (!recv_profiling_config_.empty()) { | ||||
GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); | GELOGI("Profiling json config from acl:%s", recv_profiling_config_.c_str()); | ||||
@@ -64,18 +65,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||||
} else { | } else { | ||||
ret = InitFromOptions(options); | ret = InitFromOptions(options); | ||||
if (ret == SUCCESS && is_load_profiling_) { | if (ret == SUCCESS && is_load_profiling_) { | ||||
// profiling need phy device id | |||||
if (!convert_2_phy_device_id) { | |||||
device_id_.push_back(options.device_id); | |||||
} else { | |||||
uint32_t phy_device_id = 0; | |||||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(static_cast<uint32_t>(options.device_id), &phy_device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||||
return FAILED; | |||||
} | |||||
device_id_.push_back(phy_device_id); | |||||
} | |||||
device_id_.push_back(options.device_id); | |||||
} | } | ||||
} | } | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -557,25 +547,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||||
return; | return; | ||||
} | } | ||||
GELOGI("current logic_device_id:%d", logic_device_id); | GELOGI("current logic_device_id:%d", logic_device_id); | ||||
uint32_t phy_device_id = 0; | |||||
rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); | |||||
return; | |||||
} | |||||
GELOGI("current phy_device_id:%d", phy_device_id); | |||||
if (!is_acl_api_mode_) { | if (!is_acl_api_mode_) { | ||||
auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); | |||||
auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||||
if (ret == device_id_.end()) { | if (ret == device_id_.end()) { | ||||
GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
GELOGI("start ProfilingTaskDescInfo."); | GELOGI("start ProfilingTaskDescInfo."); | ||||
ProfilingTaskDescInfo(task_desc_info, phy_device_id); | |||||
ProfilingTaskDescInfo(task_desc_info, logic_device_id); | |||||
GELOGI("start ProfilingGraphDescInfo."); | GELOGI("start ProfilingGraphDescInfo."); | ||||
ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id); | |||||
ProfilingGraphDescInfo(compute_graph_desc_info, logic_device_id); | |||||
GELOGI("Report profiling data for GE end."); | GELOGI("Report profiling data for GE end."); | ||||
#endif | #endif | ||||
} | } | ||||
@@ -69,7 +69,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
ProfilingManager(); | ProfilingManager(); | ||||
virtual ~ProfilingManager(); | virtual ~ProfilingManager(); | ||||
static ProfilingManager &Instance(); | static ProfilingManager &Instance(); | ||||
ge::Status Init(const Options &options, bool convert_2_phy_device_id = false); | |||||
ge::Status Init(const Options &options); | |||||
ge::Status InitFromOptions(const Options &options); | ge::Status InitFromOptions(const Options &options); | ||||
ge::Status InitFromAclCfg(const std::string &config); | ge::Status InitFromAclCfg(const std::string &config); | ||||
ge::Status StartProfiling(int32_t iter, int32_t device_id); | ge::Status StartProfiling(int32_t iter, int32_t device_id); | ||||
@@ -472,7 +472,7 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str | |||||
return true; | return true; | ||||
} | } | ||||
ret = regexec(®, str.c_str(), 0, nullptr, 0); | |||||
ret = regexec(®, str.c_str(), 0, NULL, 0); | |||||
if (ret) { | if (ret) { | ||||
regerror(ret, ®, ebuff, kMaxBuffSize); | regerror(ret, ®, ebuff, kMaxBuffSize); | ||||
GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff); | GELOGE(ge::PARAM_INVALID, "regexec failed, reason: %s", ebuff); | ||||
@@ -120,6 +120,7 @@ target_link_libraries(ge_executor | |||||
${mmpa} | ${mmpa} | ||||
${msprof} | ${msprof} | ||||
${error_manager} | ${error_manager} | ||||
${ascend_hal} | |||||
rt | rt | ||||
dl) | dl) | ||||
@@ -89,6 +89,7 @@ local_ge_executor_shared_library := \ | |||||
libregister \ | libregister \ | ||||
libmsprof \ | libmsprof \ | ||||
liberror_manager \ | liberror_manager \ | ||||
libascend_hal | |||||
local_ge_executor_ldflags := -lrt -ldl \ | local_ge_executor_ldflags := -lrt -ldl \ | ||||
@@ -104,6 +105,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files) | |||||
LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | LOCAL_C_INCLUDES := $(local_ge_executor_c_include) | ||||
LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) | LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) | ||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
ifeq ($(device_os),android) | ifeq ($(device_os),android) | ||||
LOCAL_LDFLAGS += -ldl | LOCAL_LDFLAGS += -ldl | ||||
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | ||||
@@ -140,6 +142,9 @@ LOCAL_SHARED_LIBRARIES := \ | |||||
libregister \ | libregister \ | ||||
libmsprof \ | libmsprof \ | ||||
liberror_manager \ | liberror_manager \ | ||||
stub/libascend_hal | |||||
LOCAL_STATIC_LIBRARIES := libmsprofiler | |||||
LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | LOCAL_LDFLAGS += $(local_ge_executor_ldflags) | ||||
@@ -355,7 +355,7 @@ LOCAL_MODULE := libge_compiler | |||||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | ||||
# from ome_inference.mk | # from ome_inference.mk | ||||
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP | |||||
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE | |||||
ifeq ($(DEBUG), 1) | ifeq ($(DEBUG), 1) | ||||
LOCAL_CFLAGS += -g -O0 | LOCAL_CFLAGS += -g -O0 | ||||
endif | endif | ||||
@@ -418,7 +418,7 @@ include $(CLEAR_VARS) | |||||
LOCAL_MODULE := libge_compiler | LOCAL_MODULE := libge_compiler | ||||
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE | LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE | ||||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 | LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP | |||||
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE | |||||
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION | LOCAL_CFLAGS += -DOMG_DEVICE_VERSION | ||||
LOCAL_CFLAGS += -O2 | LOCAL_CFLAGS += -O2 | ||||
LOCAL_MODULE_CLASS := SHARED_LIBRARIES | LOCAL_MODULE_CLASS := SHARED_LIBRARIES | ||||
@@ -42,7 +42,7 @@ include_directories(${CMAKE_BINARY_DIR}/proto/ge) | |||||
######### libge_local_engine.so ############# | ######### libge_local_engine.so ############# | ||||
add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | ||||
target_compile_definitions(ge_local_engine PRIVATE Werror) | |||||
target_compile_definitions(ge_local_engine PRIVATE Werror COMPILE_OMG_PACKAGE) | |||||
target_link_libraries(ge_local_engine | target_link_libraries(ge_local_engine | ||||
graph | graph | ||||
${PROTOBUF_LIBRARY} | ${PROTOBUF_LIBRARY} | ||||
@@ -42,7 +42,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} | |||||
include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
LOCAL_MODULE := atclib/libge_local_engine | LOCAL_MODULE := atclib/libge_local_engine | ||||
LOCAL_CFLAGS += -Werror | LOCAL_CFLAGS += -Werror | ||||
LOCAL_CFLAGS += -std=c++11 | |||||
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE | |||||
LOCAL_LDFLAGS := | LOCAL_LDFLAGS := | ||||
LOCAL_STATIC_LIBRARIES := | LOCAL_STATIC_LIBRARIES := | ||||
@@ -356,6 +356,7 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||||
LOCAL_STATIC_LIBRARIES := libge_memory \ | LOCAL_STATIC_LIBRARIES := libge_memory \ | ||||
libadump_server \ | libadump_server \ | ||||
libmsprofiler \ | |||||
LOCAL_SHARED_LIBRARIES := \ | LOCAL_SHARED_LIBRARIES := \ | ||||
libc_sec \ | libc_sec \ | ||||
@@ -136,6 +136,13 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen | |||||
bool attr) { | bool attr) { | ||||
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); | ||||
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); | ||||
auto format = tensor.GetFormat(); | |||||
auto data_type = tensor.GetDataType(); | |||||
if (format == FORMAT_RESERVED && data_type == DT_UNDEFINED) { | |||||
return SUCCESS; | |||||
} | |||||
string op_type; | string op_type; | ||||
if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { | if (!AttrUtils::GetStr(tensor, kAttrOpType, op_type) || op_type.empty()) { | ||||
op_type = DATA; | op_type = DATA; | ||||
@@ -521,8 +528,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
bool is_offline) { | bool is_offline) { | ||||
GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | ||||
if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { | |||||
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize()); | |||||
if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) { | |||||
GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize()); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { | if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { | ||||
@@ -322,11 +322,19 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||||
GELOGE(ge::FAILED, | GELOGE(ge::FAILED, | ||||
"There is an atomic conflict between the current node and the peer out node, not supported!"); | "There is an atomic conflict between the current node and the peer out node, not supported!"); | ||||
return ge::FAILED; | return ge::FAILED; | ||||
} else if (is_loop_graph) { | |||||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start)); | |||||
} else { | |||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}), | |||||
"SetAtomicCleanAttr failed."); | |||||
} | |||||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||||
GE_CHECK_NOTNULL(in_control_anchor); | |||||
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||||
if (peer_out_node->GetType() == ATOMICADDRCLEAN) { | |||||
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); | |||||
return ret; | |||||
} | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -840,68 +848,37 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt | |||||
} | } | ||||
Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | ||||
GE_CHECK_NOTNULL(compute_graph_); | |||||
// Atomic op memory start addr | |||||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_); | |||||
vector<NodePtr> connect_netoutput_nodes; | |||||
for (auto &node : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
if (node_op_desc == nullptr) { | |||||
continue; | |||||
} | |||||
bool is_atomic = false; | |||||
// If GetBool fail, is_atomic is false. | |||||
(void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); | |||||
if (!is_atomic) { | |||||
continue; | |||||
} | |||||
bool is_ref = false; | |||||
// If GetBool fail, is_ref is false. | |||||
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref); | |||||
if (is_ref) { | |||||
GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.", | |||||
node_op_desc->GetName().c_str()); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
vector<int> is_connect_netoutput; | |||||
// If GetBool fail, attr is_connect_netoutput is an empty vector. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); | |||||
if (!is_connect_netoutput.empty()) { | |||||
connect_netoutput_nodes.emplace_back(node); | |||||
continue; | |||||
} | |||||
map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map; | |||||
vector<NodePtr> connecting_output_atomic_nodes; | |||||
Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Failed to filter atomic nodes for memory assignment."); | |||||
return status; | |||||
} | |||||
// Atomic op memory start addr of loop graph | |||||
int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
vector<int64_t> mem_offset_end; | |||||
if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) { | |||||
GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
for (auto &iter : normal_atomic_and_clean_nodes_map) { | |||||
int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_); | |||||
GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start); | |||||
/// In networks with loop op, atomic op uses atomic_addr_clean op independently, | |||||
/// so we need to set the attr separately. | |||||
if (is_loop_graph) { | |||||
GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start)); | |||||
for (auto &atomic_node : iter.second) { | |||||
vector<int64_t> mem_offset_end; | |||||
status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.", | |||||
atomic_node->GetName().c_str()); | |||||
return status; | |||||
} | |||||
} | } | ||||
} | |||||
// In networks without loop op, the same atomic addr clean op is used for atomic op | |||||
if (!is_loop_graph) { | |||||
// Set the address attr of atomic clean operator | |||||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||||
if (atomic_mem_size != 0) { | |||||
GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}), | |||||
"SetAtomicCleanAttr failed."); | |||||
int64_t atomic_mem_size = static_cast<int64_t>(memory_offset_[0].mem_offset_) - atomic_mem_start; | |||||
status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); | |||||
return status; | |||||
} | } | ||||
} | } | ||||
if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) { | |||||
if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) { | |||||
GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput."); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -909,6 +886,55 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map, | |||||
vector<NodePtr> &connecting_output_atomic_nodes) { | |||||
GE_CHECK_NOTNULL(compute_graph_); | |||||
for (const auto &node : compute_graph_->GetAllNodes()) { | |||||
if (node->GetType() == ATOMICADDRCLEAN) { | |||||
vector<NodePtr> tmp_normal_atomic_nodes; | |||||
const auto &out_control_anchor = node->GetOutControlAnchor(); | |||||
GE_CHECK_NOTNULL(out_control_anchor); | |||||
for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { | |||||
if (peer_in_control_anchor != nullptr) { | |||||
auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); | |||||
auto peer_in_node_desc = peer_in_node->GetOpDesc(); | |||||
if (peer_in_node_desc != nullptr) { | |||||
bool is_atomic_node = false; | |||||
// If GetBool fail, is_atomic_node is false. | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); | |||||
if (is_atomic_node) { | |||||
bool is_reference = false; | |||||
// If GetBool fail, is_reference is false. | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference); | |||||
if (is_reference) { | |||||
GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and is_reference attribute.", | |||||
peer_in_node_desc->GetName().c_str()); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
vector<int> is_connecting_output; | |||||
// If GetBool fail, attr is_connecting_output is an empty vector. | |||||
(void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output); | |||||
if (is_connecting_output.empty()) { | |||||
tmp_normal_atomic_nodes.emplace_back(peer_in_node); | |||||
continue; | |||||
} | |||||
connecting_output_atomic_nodes.emplace_back(peer_in_node); | |||||
tmp_normal_atomic_nodes.clear(); | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
if (!tmp_normal_atomic_nodes.empty()) { | |||||
normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, | Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node, | ||||
vector<int64_t> &mem_offset_end) { | vector<int64_t> &mem_offset_end) { | ||||
auto node_op_desc = node->GetOpDesc(); | auto node_op_desc = node->GetOpDesc(); | ||||
@@ -1331,6 +1357,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
vector<int64_t> memory_type; | vector<int64_t> memory_type; | ||||
auto tmp_op_desc = node->GetOpDesc(); | auto tmp_op_desc = node->GetOpDesc(); | ||||
origin_input_list = tmp_op_desc->GetInputOffset(); | origin_input_list = tmp_op_desc->GetInputOffset(); | ||||
int64_t valid_input_index = 0; | |||||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | ||||
for (const auto &anchor : node->GetAllInDataAnchors()) { | for (const auto &anchor : node->GetAllInDataAnchors()) { | ||||
vector<int64_t> output_list; | vector<int64_t> output_list; | ||||
@@ -1344,8 +1371,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc(); | auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(last_peer_out_op_desc); | GE_CHECK_NOTNULL(last_peer_out_op_desc); | ||||
output_list = last_peer_out_op_desc->GetOutputOffset(); | output_list = last_peer_out_op_desc->GetOutputOffset(); | ||||
if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) { | |||||
auto input_index = anchor->GetIdx(); | |||||
auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx()); | |||||
if (output_list.size() > static_cast<size_t>(out_index)) { | |||||
int64_t input_offset = output_list.at(out_index); | |||||
if (has_mem_type_attr) { | if (has_mem_type_attr) { | ||||
auto input_size = tmp_op_desc->GetInputsSize(); | auto input_size = tmp_op_desc->GetInputsSize(); | ||||
auto ori_input_offset_list_size = origin_input_list.size(); | auto ori_input_offset_list_size = origin_input_list.size(); | ||||
@@ -1359,26 +1387,21 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||||
} | } | ||||
// not hbm keep orignal inputoffest | // not hbm keep orignal inputoffest | ||||
// hbm inputoffset = original inputoffset + outputoffset | // hbm inputoffset = original inputoffset + outputoffset | ||||
input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1 | |||||
? origin_input_list[input_index] | |||||
: origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx())); | |||||
GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", | |||||
tmp_op_desc->GetName().c_str(), input_index, | |||||
peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | |||||
input_list.back()); | |||||
} else { | |||||
int64_t output_offset = output_list.at(peer_out_anchor->GetIdx()); | |||||
const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); | |||||
if (in_node->GetType() == CONSTANT) { | |||||
GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index); | |||||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset)); | |||||
} | |||||
GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(), | |||||
input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(), | |||||
output_offset); | |||||
input_list.emplace_back(output_offset); | |||||
input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 | |||||
? origin_input_list[valid_input_index] | |||||
: origin_input_list[valid_input_index] + output_list.at(out_index)); | |||||
} | |||||
const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); | |||||
if (in_node->GetType() == CONSTANT) { | |||||
GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx())); | |||||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); | |||||
} | } | ||||
GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||||
has_mem_type_attr == true ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index, | |||||
peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset); | |||||
input_list.emplace_back(input_offset); | |||||
valid_input_index++; | |||||
} | } | ||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
@@ -1473,125 +1496,49 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) { | |||||
// set the address attr of atomic clean operator for loop graph | |||||
int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start; | |||||
GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.", | |||||
atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_); | |||||
const auto &in_control_anchor = node->GetInControlAnchor(); | |||||
if (atomic_mem_size != 0 && in_control_anchor != nullptr) { | |||||
for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { | |||||
if (peer_out_control_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); | |||||
auto peer_out_node_desc = peer_out_node->GetOpDesc(); | |||||
if (peer_out_node_desc == nullptr) { | |||||
continue; | |||||
} | |||||
GELOGD("SetLoopGraphAtomicAttr, node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(), | |||||
peer_out_node_desc->GetType().c_str()); | |||||
if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { | |||||
GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}), | |||||
GELOGE(FAILED, "SetAtomicCleanAttr failed."); | |||||
return FAILED); | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node, | |||||
bool &is_independent_atomic_clean_node) { | |||||
GE_CHECK_NOTNULL(node); | |||||
const auto &out_control_anchor = node->GetOutControlAnchor(); | |||||
GE_CHECK_NOTNULL(out_control_anchor); | |||||
for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) { | |||||
if (peer_in_control_anchor != nullptr) { | |||||
auto peer_in_node = peer_in_control_anchor->GetOwnerNode(); | |||||
auto peer_in_node_desc = peer_in_node->GetOpDesc(); | |||||
if (peer_in_node_desc != nullptr) { | |||||
bool is_atomic_node = false; | |||||
// If GetBool fail, is_atomic_node is false. | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node); | |||||
if (is_atomic_node) { | |||||
vector<int> is_connect_netoutput; | |||||
// If GetBool fail, attr is_connect_netoutput is an empty vector. | |||||
(void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput); | |||||
if (!is_connect_netoutput.empty()) { | |||||
GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str()); | |||||
is_independent_atomic_clean_node = true; | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start, | |||||
ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start, | |||||
const vector<int64_t> &atomic_mem_size) { | const vector<int64_t> &atomic_mem_size) { | ||||
for (ge::NodePtr &node : compute_graph_->GetAllNodes()) { | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | |||||
bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName()); | |||||
if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) { | |||||
bool is_independent_atomic_clean = false; | |||||
if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) { | |||||
GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node."); | |||||
return PARAM_INVALID; | |||||
} | |||||
is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean); | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
if (node_op_desc != nullptr) { | |||||
GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); | |||||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | |||||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | |||||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
node_op_desc->SetWorkspace(workspace_vector); | |||||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | |||||
std::vector<int64_t> mem_start_vector; | |||||
// If GetListInt fail, mem_start_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::vector<int64_t> mem_size_vector; | |||||
// If GetListInt fail, mem_size_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::stringstream ss; | |||||
for (auto iter : atomic_mem_start) { | |||||
ss << iter << " "; | |||||
} | } | ||||
if (is_valid_atomic_clean_node) { | |||||
GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); | |||||
vector<int64_t> workspace_vector = node_op_desc->GetWorkspace(); | |||||
vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes(); | |||||
workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
node_op_desc->SetWorkspace(workspace_vector); | |||||
node_op_desc->SetWorkspaceBytes(workspace_byte_vector); | |||||
std::vector<int64_t> mem_start_vector; | |||||
// If GetListInt fail, mem_start_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); | |||||
mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::vector<int64_t> mem_size_vector; | |||||
// If GetListInt fail, mem_size_vector is empty. | |||||
(void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); | |||||
mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), | |||||
GELOGE(FAILED, "SetListInt failed."); | |||||
return FAILED); | |||||
std::stringstream ss; | |||||
for (auto iter : atomic_mem_start) { | |||||
ss << iter << " "; | |||||
} | |||||
string atomic_mem_start_str = ss.str(); | |||||
ss.clear(); | |||||
ss.str(""); | |||||
for (auto iter : atomic_mem_size) { | |||||
ss << iter << " "; | |||||
} | |||||
string atomic_mem_size_str = ss.str(); | |||||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||||
string atomic_mem_start_str = ss.str(); | |||||
ss.clear(); | |||||
ss.str(""); | |||||
for (auto iter : atomic_mem_size) { | |||||
ss << iter << " "; | |||||
} | } | ||||
string atomic_mem_size_str = ss.str(); | |||||
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]", | |||||
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), | |||||
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId()); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -135,6 +135,9 @@ class GraphMemoryAssigner { | |||||
ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ||||
ge::Status FilterAtomicNodesForMemoryAssign(std::map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map, | |||||
std::vector<NodePtr> &connecting_output_atomic_nodes); | |||||
ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
int64_t &continuous_mem_size); | int64_t &continuous_mem_size); | ||||
@@ -165,14 +168,8 @@ class GraphMemoryAssigner { | |||||
ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ||||
const std::vector<int64_t> &mem_offset_end); | const std::vector<int64_t> &mem_offset_end); | ||||
/// | |||||
/// @brief set loop graph atomic attr | |||||
/// @param node, atomic memory assignment start offset | |||||
/// @param atomic_mem_start: atomic op memory start address | |||||
/// | |||||
ge::Status SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start); | |||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &n, const std::vector<int64_t> &atomic_mem_start, | |||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start, | |||||
const std::vector<int64_t> &atomic_mem_size); | const std::vector<int64_t> &atomic_mem_size); | ||||
ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | ||||
@@ -695,11 +695,7 @@ Status DataDumper::LoadDumpInfo() { | |||||
} | } | ||||
if (dump_properties_.GetDumpMode() == kDumpInput) { | if (dump_properties_.GetDumpMode() == kDumpInput) { | ||||
if (op_iter.is_task) { | if (op_iter.is_task) { | ||||
Status ret = DumpInput(op_iter, task); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Dump input failed"); | |||||
return ret; | |||||
} | |||||
GE_CHK_STATUS_RET(DumpInput(op_iter, task), "Dump input failed"); | |||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
continue; | continue; | ||||
@@ -726,7 +722,7 @@ Status DataDumper::LoadDumpInfo() { | |||||
SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); | ||||
if (!op_list_.empty() || is_op_debug_) { | |||||
if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { | |||||
auto ret = ExecuteLoadDumpInfo(op_mapping_info); | auto ret = ExecuteLoadDumpInfo(op_mapping_info); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Execute load dump info failed"); | GELOGE(ret, "Execute load dump info failed"); | ||||
@@ -740,7 +736,6 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||||
aicpu::dump::OpMappingInfo &op_mapping_info) { | aicpu::dump::OpMappingInfo &op_mapping_info) { | ||||
if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || | if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput || | ||||
dump_properties_.GetDumpMode() == kDumpAll) { | dump_properties_.GetDumpMode() == kDumpAll) { | ||||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||||
aicpu::dump::Task task; | aicpu::dump::Task task; | ||||
task.set_end_graph(true); | task.set_end_graph(true); | ||||
task.set_task_id(end_graph_task_id_); | task.set_task_id(end_graph_task_id_); | ||||
@@ -748,6 +743,14 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||||
task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH); | task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH); | ||||
task.mutable_op()->set_op_type(ENDGRAPH); | task.mutable_op()->set_op_type(ENDGRAPH); | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
is_end_graph_ = true; | |||||
if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) { | |||||
GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u", | |||||
op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_); | |||||
return; | |||||
} | |||||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||||
} | } | ||||
} | } | ||||
@@ -116,6 +116,7 @@ class DataDumper { | |||||
std::vector<InnerDumpInfo> op_list_; | std::vector<InnerDumpInfo> op_list_; | ||||
uint32_t end_graph_task_id_ = 0; | uint32_t end_graph_task_id_ = 0; | ||||
uint32_t end_graph_stream_id_ = 0; | uint32_t end_graph_stream_id_ = 0; | ||||
bool is_end_graph_ = false; | |||||
std::multimap<std::string, InnerInputMapping> input_map_; | std::multimap<std::string, InnerInputMapping> input_map_; | ||||
bool load_flag_; | bool load_flag_; | ||||
uint32_t device_id_; | uint32_t device_id_; | ||||
@@ -1928,13 +1928,7 @@ Status DavinciModel::SinkModelProfile() { | |||||
name = name_; | name = name_; | ||||
} | } | ||||
size_t name_len = name.size(); | size_t name_len = name.size(); | ||||
// phy device id | |||||
uint32_t phy_device_id = 0; | |||||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||||
return FAILED); | |||||
reporter_data.deviceId = phy_device_id; | |||||
reporter_data.deviceId = device_id_; | |||||
reporter_data.data = (unsigned char *)&name_len; | reporter_data.data = (unsigned char *)&name_len; | ||||
reporter_data.dataLen = sizeof(int32_t); | reporter_data.dataLen = sizeof(int32_t); | ||||
GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", | GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", | ||||
@@ -2103,12 +2097,7 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { | |||||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | ||||
return FAILED, "Sink model tag memcpy error."); | return FAILED, "Sink model tag memcpy error."); | ||||
// device id | // device id | ||||
uint32_t phy_device_id = 0; | |||||
rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%u", phy_device_id); | |||||
return FAILED); | |||||
reporter_data.deviceId = phy_device_id; | |||||
reporter_data.deviceId = device_id_; | |||||
// Model Header | // Model Header | ||||
string name; | string name; | ||||
@@ -236,7 +236,6 @@ ModelManager::~ModelManager() { | |||||
std::lock_guard<std::mutex> lock(map_mutex_); | std::lock_guard<std::mutex> lock(map_mutex_); | ||||
model_map_.clear(); | model_map_.clear(); | ||||
model_aicpu_kernel_.clear(); | model_aicpu_kernel_.clear(); | ||||
cust_aicpu_so_.clear(); | |||||
GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | ||||
} | } | ||||
@@ -400,6 +399,7 @@ Status ModelManager::Unload(uint32_t model_id) { | |||||
} | } | ||||
std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | ||||
exception_infos_.clear(); | exception_infos_.clear(); | ||||
cust_aicpu_so_.clear(); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -328,15 +328,14 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); | op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); | ||||
return v_input_data_addr; | return v_input_data_addr; | ||||
} | } | ||||
for (size_t i = 0; i < inputs_size; ++i) { | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | |||||
if (tensor_desc == nullptr) { | |||||
GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); | |||||
continue; | |||||
} | |||||
if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { | if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { | ||||
// TBE: add weights address to input | // TBE: add weights address to input | ||||
const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); | |||||
if (tensor_desc == nullptr) { | |||||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||||
continue; | |||||
} | |||||
int64_t tensor_size = 0; | int64_t tensor_size = 0; | ||||
GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); | GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); | ||||
if (tensor_size) { | if (tensor_size) { | ||||
@@ -89,16 +89,13 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||||
nodes.push(node); | nodes.push(node); | ||||
static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; | static const std::set<std::string> end_type_set = {STREAMSWITCH, STREAMMERGE, MERGE}; | ||||
bool merge_flag = false; | |||||
bool exit_flag = false; | |||||
bool net_output_flag = false; | |||||
while (!nodes.empty()) { | while (!nodes.empty()) { | ||||
NodePtr cur_node = nodes.top(); | NodePtr cur_node = nodes.top(); | ||||
nodes.pop(); | nodes.pop(); | ||||
if (visited.count(cur_node) > 0) { | if (visited.count(cur_node) > 0) { | ||||
continue; | continue; | ||||
} | } | ||||
if (AttachFlag(cur_node, stream_label, merge_flag, exit_flag, net_output_flag) != SUCCESS) { | |||||
if (AttachFlag(cur_node, stream_label) != SUCCESS) { | |||||
GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); | GELOGE(FAILED, "Attach flag for node %s failed.", cur_node->GetName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -122,12 +119,6 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||||
GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); | GE_CHK_STATUS_RET(SetActiveLabelList(node, {stream_label}), "set active_label_list failed."); | ||||
} | } | ||||
bool attach_flag = (merge_flag || exit_flag) && net_output_flag; | |||||
if (attach_flag) { | |||||
GELOGI("No need to keep on attaching label."); | |||||
return SUCCESS; | |||||
} | |||||
for (const NodePtr &tmp_node : branch_nodes) { | for (const NodePtr &tmp_node : branch_nodes) { | ||||
GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); | GELOGD("Attach label %s to node: %s.", stream_label.c_str(), tmp_node->GetName().c_str()); | ||||
GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); | GE_CHK_STATUS_RET(SetStreamLabel(tmp_node, stream_label), "Set stream label failed."); | ||||
@@ -140,13 +131,9 @@ Status AttachStreamLabelPass::UpdateCondBranch(const NodePtr &node) { | |||||
/// @brief attach flag | /// @brief attach flag | ||||
/// @param [in] node | /// @param [in] node | ||||
/// @param [out] stream_label | /// @param [out] stream_label | ||||
/// @param [out] merge_flag | |||||
/// @param [out] exit_flag | |||||
/// @param [out] net_output_flag | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, | |||||
bool &exit_flag, bool &net_output_flag) { | |||||
Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &stream_label) { | |||||
const std::string &type = node->GetType(); | const std::string &type = node->GetType(); | ||||
if (type == STREAMSWITCH) { | if (type == STREAMSWITCH) { | ||||
if (node->GetInDataNodes().empty()) { | if (node->GetInDataNodes().empty()) { | ||||
@@ -164,12 +151,8 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea | |||||
} else if (type == STREAMMERGE) { | } else if (type == STREAMMERGE) { | ||||
stream_label = node->GetName(); | stream_label = node->GetName(); | ||||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | ||||
merge_flag = true; | |||||
} else if ((type == EXIT) || (type == REFEXIT)) { | } else if ((type == EXIT) || (type == REFEXIT)) { | ||||
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed."); | ||||
exit_flag = true; | |||||
} else if (type == NETOUTPUT) { | |||||
net_output_flag = true; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -50,13 +50,9 @@ class AttachStreamLabelPass : public GraphPass { | |||||
/// @brief attach flag | /// @brief attach flag | ||||
/// @param [in] node | /// @param [in] node | ||||
/// @param [out] stream_label | /// @param [out] stream_label | ||||
/// @param [out] merge_flag | |||||
/// @param [out] exit_flag | |||||
/// @param [out] net_output_flag | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
static Status AttachFlag(const NodePtr &node, std::string &stream_label, bool &merge_flag, bool &exit_flag, | |||||
bool &net_output_flag); | |||||
static Status AttachFlag(const NodePtr &node, std::string &stream_label); | |||||
/// | /// | ||||
/// @brief Update stream_label for loop_branch | /// @brief Update stream_label for loop_branch | ||||
@@ -20,13 +20,14 @@ | |||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
namespace { | |||||
const size_t kOutNodesNum = 1; | |||||
} | |||||
namespace ge { | namespace ge { | ||||
Status EnterPass::Run(NodePtr &node) { | Status EnterPass::Run(NodePtr &node) { | ||||
GELOGD("EnterPass running"); | GELOGD("EnterPass running"); | ||||
if (node == nullptr) { | |||||
GELOGE(PARAM_INVALID, "param [node] must not be null."); | |||||
return PARAM_INVALID; | |||||
} | |||||
GE_CHECK_NOTNULL(node); | |||||
if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) { | if ((node->GetType() != ENTER) && (node->GetType() != REFENTER)) { | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -38,18 +39,17 @@ Status EnterPass::Run(NodePtr &node) { | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
NodePtr in_node = node->GetInDataNodes().at(0); | NodePtr in_node = node->GetInDataNodes().at(0); | ||||
if (in_node == nullptr) { | |||||
GELOGE(PARAM_INVALID, "param [in_node] must not be null"); | |||||
return PARAM_INVALID; | |||||
} | |||||
GE_CHECK_NOTNULL(in_node); | |||||
if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) { | if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP)) { | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
bool need_remove_flag = | |||||
in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty() && node->GetOutDataNodes().empty(); | |||||
if (need_remove_flag) { | |||||
bool need_remove_flag = in_node->GetInControlNodes().empty() && node->GetInControlNodes().empty(); | |||||
if (!need_remove_flag) { | |||||
return SUCCESS; | |||||
} | |||||
if (node->GetOutDataNodes().empty()) { | |||||
for (auto &out_ctrl_node : node->GetOutControlNodes()) { | for (auto &out_ctrl_node : node->GetOutControlNodes()) { | ||||
if (out_ctrl_node == nullptr) { | if (out_ctrl_node == nullptr) { | ||||
continue; | continue; | ||||
@@ -60,9 +60,47 @@ Status EnterPass::Run(NodePtr &node) { | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
} else { | |||||
if (OptimizeEnter(node, in_node) != SUCCESS) { | |||||
GELOGE(FAILED, "Optimize enter node[%s] failed.", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | } | ||||
GELOGD("EnterPass success"); | GELOGD("EnterPass success"); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status EnterPass::OptimizeEnter(NodePtr &node, NodePtr &in_node) { | |||||
auto out_nodes_of_in_node = in_node->GetOutAllNodes(); | |||||
if (out_nodes_of_in_node.size() != kOutNodesNum) { | |||||
return SUCCESS; | |||||
} | |||||
if (!node->GetOutControlNodes().empty()) { | |||||
return SUCCESS; | |||||
} | |||||
for (const auto &out_node : node->GetOutDataNodes()) { | |||||
GE_CHECK_NOTNULL(out_node); | |||||
if (out_node->GetType() == MERGE) { | |||||
return SUCCESS; | |||||
} | |||||
} | |||||
GE_CHECK_NOTNULL(in_node->GetOutDataAnchor(0)); | |||||
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->Unlink(node->GetInDataAnchor(0))); | |||||
auto out_data_anchor = node->GetOutDataAnchor(0); | |||||
GE_CHECK_NOTNULL(out_data_anchor); | |||||
for (auto peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
GE_CHK_STATUS_RET(out_data_anchor->Unlink(peer_in_data_anchor)); | |||||
GE_CHK_STATUS_RET(in_node->GetOutDataAnchor(0)->LinkTo(peer_in_data_anchor)); | |||||
} | |||||
auto graph = node->GetOwnerComputeGraph(); | |||||
GE_CHK_STATUS_RET(GraphUtils::RemoveNodeWithoutRelink(graph, node)) | |||||
AddRePassNodesWithInOut(in_node); | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -23,6 +23,9 @@ namespace ge { | |||||
class EnterPass : public BaseNodePass { | class EnterPass : public BaseNodePass { | ||||
public: | public: | ||||
Status Run(NodePtr &node) override; | Status Run(NodePtr &node) override; | ||||
private: | |||||
Status OptimizeEnter(NodePtr &node, NodePtr &in_node); | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_PASSES_ENTER_PASS_H_ | #endif // GE_GRAPH_PASSES_ENTER_PASS_H_ |
@@ -41,7 +41,6 @@ | |||||
#include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
using std::map; | |||||
using std::set; | using std::set; | ||||
using std::string; | using std::string; | ||||
using std::vector; | using std::vector; | ||||
@@ -266,24 +265,27 @@ Status MultiBatchGraphCopyer::Init() { | |||||
} | } | ||||
Status MultiBatchGraphCopyer::LabelStatus() { | Status MultiBatchGraphCopyer::LabelStatus() { | ||||
map<string, vector<NodePtr>> frame_enters; | |||||
InitStatus(frame_enters); | |||||
for (const auto &data : origin_data_nodes_) { | |||||
auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | |||||
if (!IsAllDimsPositive(data_shape.GetDims())) { | |||||
origin_nodes_status_[data.get()] = kNodeInBatchBranch; | |||||
} | |||||
} | |||||
bool changed = true; | bool changed = true; | ||||
// If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch | // If anyone of in node is kNodeInBatchBranch, it is also kNodeInBatchBranch | ||||
while (changed) { | while (changed) { | ||||
changed = false; | changed = false; | ||||
for (const auto &node : origin_all_nodes_) { | for (const auto &node : origin_all_nodes_) { | ||||
auto iter = origin_nodes_status_.find(node.get()); | |||||
if (iter != origin_nodes_status_.end()) { | |||||
continue; | |||||
} | |||||
for (auto &in_node : node->GetInAllNodes()) { | for (auto &in_node : node->GetInAllNodes()) { | ||||
bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && | bool is_in_batch = origin_nodes_status_.find(in_node.get()) != origin_nodes_status_.end() && | ||||
origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; | origin_nodes_status_[in_node.get()] == kNodeInBatchBranch; | ||||
if (is_in_batch) { | if (is_in_batch) { | ||||
if (origin_nodes_status_.find(node.get()) == origin_nodes_status_.end() || | |||||
origin_nodes_status_[node.get()] != kNodeInBatchBranch) { | |||||
origin_nodes_status_[node.get()] = kNodeInBatchBranch; | |||||
ResetEnterStatus(frame_enters, node); | |||||
changed = true; | |||||
} | |||||
origin_nodes_status_[node.get()] = kNodeInBatchBranch; | |||||
changed = true; | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
@@ -314,45 +316,6 @@ Status MultiBatchGraphCopyer::LabelStatus() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void MultiBatchGraphCopyer::InitStatus(map<string, vector<NodePtr>> &frame_enters) { | |||||
for (const auto &node : origin_all_nodes_) { | |||||
if (node->GetType() != ENTER && node->GetType() != REFENTER) { | |||||
continue; | |||||
} | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
continue; | |||||
} | |||||
string frame_name; | |||||
if (AttrUtils::GetStr(op_desc, ENTER_ATTR_FRAME_NAME, frame_name)) { | |||||
frame_enters[frame_name].emplace_back(node); | |||||
} | |||||
} | |||||
for (const auto &data : origin_data_nodes_) { | |||||
auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); | |||||
if (!IsAllDimsPositive(data_shape.GetDims())) { | |||||
origin_nodes_status_[data.get()] = kNodeInBatchBranch; | |||||
} | |||||
} | |||||
} | |||||
void MultiBatchGraphCopyer::ResetEnterStatus(map<string, vector<NodePtr>> &frame_enters, const NodePtr &node) { | |||||
if (node->GetType() != ENTER && node->GetType() != REFENTER) { | |||||
return; | |||||
} | |||||
for (const auto &frame_enter : frame_enters) { | |||||
auto &enters = frame_enter.second; | |||||
if (std::find(enters.begin(), enters.end(), node) != enters.end()) { | |||||
for (const auto &enter : enters) { | |||||
origin_nodes_status_[enter.get()] = kNodeInBatchBranch; | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
Status MultiBatchGraphCopyer::CreateNewNodes() { | Status MultiBatchGraphCopyer::CreateNewNodes() { | ||||
shape_data_ = InsertShapeDataNode(); | shape_data_ = InsertShapeDataNode(); | ||||
if (shape_data_ == nullptr) { | if (shape_data_ == nullptr) { | ||||
@@ -1200,7 +1163,7 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, s | |||||
} | } | ||||
} | } | ||||
// Connect NetOutput directly: DTS2020070612498 | |||||
// Connect NetOutput directly | |||||
void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set<size_t> &dynamic_output_index, | void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set<size_t> &dynamic_output_index, | ||||
vector<string> &dynamic_output_dims) { | vector<string> &dynamic_output_dims) { | ||||
GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | ||||
@@ -68,8 +68,6 @@ class MultiBatchGraphCopyer { | |||||
// label status for origin_all_nodes_ | // label status for origin_all_nodes_ | ||||
Status LabelStatus(); | Status LabelStatus(); | ||||
void InitStatus(std::map<string, vector<NodePtr>> &frame_enters); | |||||
void ResetEnterStatus(std::map<string, vector<NodePtr>> &frame_enters, const NodePtr &node); | |||||
// add nodes functions | // add nodes functions | ||||
Status CreateNewNodes(); | Status CreateNewNodes(); | ||||
@@ -40,7 +40,7 @@ include ${BUILD_HOST_SHARED_LIBRARY} | |||||
include $(CLEAR_VARS) | include $(CLEAR_VARS) | ||||
LOCAL_MODULE := atclib/libhost_cpu_engine | LOCAL_MODULE := atclib/libhost_cpu_engine | ||||
LOCAL_CFLAGS += -Werror | LOCAL_CFLAGS += -Werror | ||||
LOCAL_CFLAGS += -std=c++11 | |||||
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE | |||||
LOCAL_LDFLAGS := | LOCAL_LDFLAGS := | ||||
LOCAL_STATIC_LIBRARIES := | LOCAL_STATIC_LIBRARIES := | ||||
@@ -165,8 +165,10 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||||
} | } | ||||
} | } | ||||
// In train and infer, profiling is always needed. | |||||
InitOptions(options); | InitOptions(options); | ||||
// In train and infer, profiling is always needed. | |||||
InitProfiling(this->options_); | |||||
auto model_manager = ModelManager::GetInstance(); | auto model_manager = ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, | GE_IF_BOOL_EXEC(model_manager->EnableExceptionDump(options) != SUCCESS, | ||||
@@ -176,21 +178,19 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||||
// 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer | // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer | ||||
// these two case with logical device id | // these two case with logical device id | ||||
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | ||||
InitProfiling(this->options_, true); | |||||
status = InitSystemWithOptions(this->options_); | status = InitSystemWithOptions(this->options_); | ||||
} else { | } else { | ||||
InitProfiling(this->options_); | |||||
status = InitSystemWithoutOptions(); | status = InitSystemWithoutOptions(); | ||||
} | } | ||||
return status; | return status; | ||||
} | } | ||||
void GELib::InitProfiling(Options &options, bool convert_2_phy_device_id) { | |||||
void GELib::InitProfiling(Options &options) { | |||||
GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); | GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); | ||||
std::lock_guard<std::mutex> lock(status_mutex_); | std::lock_guard<std::mutex> lock(status_mutex_); | ||||
GetContext().Init(); | GetContext().Init(); | ||||
// Profiling init | // Profiling init | ||||
if (ProfilingManager::Instance().Init(options, convert_2_phy_device_id) != SUCCESS) { | |||||
if (ProfilingManager::Instance().Init(options) != SUCCESS) { | |||||
GELOGW("Profiling init failed."); | GELOGW("Profiling init failed."); | ||||
} | } | ||||
} | } | ||||
@@ -68,7 +68,7 @@ class GELib { | |||||
// get incre build cache path | // get incre build cache path | ||||
const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; } | const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; } | ||||
void InitProfiling(Options &options, bool convert_2_phy_device_id = false); | |||||
void InitProfiling(Options &options); | |||||
void ShutDownProfiling(); | void ShutDownProfiling(); | ||||
Status InitSystemWithoutOptions(); | Status InitSystemWithoutOptions(); | ||||
@@ -522,7 +522,7 @@ void PrintOptionMap(std::map<std::string, std::string> &options, std::string tip | |||||
for (auto iter = options.begin(); iter != options.end(); iter++) { | for (auto iter = options.begin(); iter != options.end(); iter++) { | ||||
std::string key = iter->first; | std::string key = iter->first; | ||||
std::string option_name = iter->second; | std::string option_name = iter->second; | ||||
GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||||
GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||||
} | } | ||||
} | } | ||||
@@ -96,6 +96,12 @@ static graphStatus CheckGlobalOptions(std::map<std::string, std::string> &global | |||||
return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); | return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); | ||||
global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; | global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; | ||||
// set precision mode default value | |||||
std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == global_options.end() | |||||
? "force_fp16" | |||||
: global_options[ge::ir_option::PRECISION_MODE]; | |||||
global_options[ge::ir_option::PRECISION_MODE] = precision_mode; | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -175,25 +175,25 @@ Status OpsKernelManager::ParsePluginOptions(const map<string, string> &options, | |||||
} else if (flag == 1) { | } else if (flag == 1) { | ||||
enable_flag = true; | enable_flag = true; | ||||
} else { | } else { | ||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), | |||||
iter->second.c_str()); | |||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | |||||
plugin_name.c_str(), iter->second.c_str()); | |||||
return GE_GRAPH_OPTIONS_INVALID; | return GE_GRAPH_OPTIONS_INVALID; | ||||
} | } | ||||
} catch (std::invalid_argument &) { | } catch (std::invalid_argument &) { | ||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", | |||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.", | |||||
iter->second.c_str()); | iter->second.c_str()); | ||||
return GE_GRAPH_OPTIONS_INVALID; | return GE_GRAPH_OPTIONS_INVALID; | ||||
} catch (std::out_of_range &) { | } catch (std::out_of_range &) { | ||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", | |||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.", | |||||
iter->second.c_str()); | iter->second.c_str()); | ||||
return GE_GRAPH_OPTIONS_INVALID; | return GE_GRAPH_OPTIONS_INVALID; | ||||
} catch (...) { | } catch (...) { | ||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), | |||||
iter->second.c_str()); | |||||
GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", | |||||
plugin_name.c_str(), iter->second.c_str()); | |||||
return GE_GRAPH_OPTIONS_INVALID; | return GE_GRAPH_OPTIONS_INVALID; | ||||
} | } | ||||
} else { | } else { | ||||
GELOGI("Not find key %s, set to default value false.", plugin_name.c_str()); | |||||
GELOGI("Not find option_key %s, set to default value false.", plugin_name.c_str()); | |||||
enable_flag = false; | enable_flag = false; | ||||
} | } | ||||
@@ -618,11 +618,16 @@ Status ParseOutNodes(const string &out_nodes) { | |||||
if (!out_nodes.empty()) { | if (!out_nodes.empty()) { | ||||
domi::GetContext().out_nodes_map.clear(); | domi::GetContext().out_nodes_map.clear(); | ||||
domi::GetContext().user_out_nodes.clear(); | domi::GetContext().user_out_nodes.clear(); | ||||
domi::GetContext().user_out_nodes_top_vec.clear(); | |||||
vector<string> nodes_v = StringUtils::Split(out_nodes, ';'); | vector<string> nodes_v = StringUtils::Split(out_nodes, ';'); | ||||
for (const string &node : nodes_v) { | for (const string &node : nodes_v) { | ||||
vector<string> key_value_v = StringUtils::Split(node, ':'); | vector<string> key_value_v = StringUtils::Split(node, ':'); | ||||
if (key_value_v.size() != 2) { // The size must be 2. | if (key_value_v.size() != 2) { // The size must be 2. | ||||
if (key_value_v.size() == 1 && domi::GetContext().type == domi::CAFFE) { | |||||
domi::GetContext().user_out_nodes_top_vec.push_back(node); | |||||
continue; | |||||
} | |||||
ErrorManager::GetInstance().ATCReportErrMessage( | ErrorManager::GetInstance().ATCReportErrMessage( | ||||
"E10001", {"parameter", "value", "reason"}, | "E10001", {"parameter", "value", "reason"}, | ||||
{"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); | {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); | ||||
@@ -632,7 +637,13 @@ Status ParseOutNodes(const string &out_nodes) { | |||||
node.c_str()); | node.c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); | |||||
if (!domi::GetContext().user_out_nodes_top_vec.empty()) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | |||||
{"--out_nodes", out_nodes, "is not all index or top_name"}); | |||||
GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s", | |||||
out_nodes.c_str()); | |||||
return PARAM_INVALID; | |||||
} | |||||
// stoi: The method may throw an exception: invalid_argument/out_of_range | // stoi: The method may throw an exception: invalid_argument/out_of_range | ||||
if (!CheckDigitStr(key_value_v[1])) { | if (!CheckDigitStr(key_value_v[1])) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, | ||||
@@ -640,7 +651,10 @@ Status ParseOutNodes(const string &out_nodes) { | |||||
GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); | GELOGE(PARAM_INVALID, "This str must be digit string, while the actual input is %s", out_nodes.c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); | |||||
int32_t index = stoi(StringUtils::Trim(key_value_v[1])); | int32_t index = stoi(StringUtils::Trim(key_value_v[1])); | ||||
GELOGD("Get output info: node[%s] and index[%ld]", key_value_v[0].c_str(), index); | |||||
if (iter != domi::GetContext().out_nodes_map.end()) { | if (iter != domi::GetContext().out_nodes_map.end()) { | ||||
iter->second.emplace_back(index); | iter->second.emplace_back(index); | ||||
} else { | } else { | ||||
@@ -279,7 +279,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, con | |||||
if (op_task_->GetOpTaskType() == OP_TASK_TBE) { | if (op_task_->GetOpTaskType() == OP_TASK_TBE) { | ||||
return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); | return ExecuteTbeTask(input_desc, inputs, output_desc, outputs); | ||||
} else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { | } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { | ||||
return op_task_->LaunchKernel(input_desc, inputs, output_desc, outputs, stream_); | |||||
return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); | |||||
} else { | } else { | ||||
GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", | GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", | ||||
op_task_->GetOpTaskType()); | op_task_->GetOpTaskType()); | ||||
@@ -75,8 +75,11 @@ std::string BuildTaskUtils::GetTaskInfo(const OpDescPtr &op_desc) { | |||||
// Conv2D IN[DT_FLOAT16 NC1HWC0[256, 128, 7, 7, 16],DT_FLOAT16 FRACTAL_Z[128, 32, 16, 16]] | // Conv2D IN[DT_FLOAT16 NC1HWC0[256, 128, 7, 7, 16],DT_FLOAT16 FRACTAL_Z[128, 32, 16, 16]] | ||||
// OUT[DT_FLOAT16 NC1HWC0[256, 32, 7, 7, 16]] | // OUT[DT_FLOAT16 NC1HWC0[256, 32, 7, 7, 16]] | ||||
ss << op_type << " IN["; | ss << op_type << " IN["; | ||||
for (uint32_t idx = 0; idx < op_desc->GetInputsSize(); idx++) { | |||||
for (uint32_t idx = 0; idx < op_desc->GetAllInputsSize(); idx++) { | |||||
const GeTensorDescPtr &input = op_desc->MutableInputDesc(idx); | const GeTensorDescPtr &input = op_desc->MutableInputDesc(idx); | ||||
if (input == nullptr) { | |||||
continue; | |||||
} | |||||
ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | ss << TypeUtils::DataTypeToSerialString(input->GetDataType()) << " "; | ||||
ss << TypeUtils::FormatToSerialString(input->GetFormat()); | ss << TypeUtils::FormatToSerialString(input->GetFormat()); | ||||
ss << VectorToString(input->GetShape().GetDims()); | ss << VectorToString(input->GetShape().GetDims()); | ||||
@@ -34,6 +34,11 @@ constexpr int kLaunchRetryTimes = 1000; | |||||
constexpr int kSleepTime = 10; | constexpr int kSleepTime = 10; | ||||
constexpr uint64_t kReleaseFlag = 1; | constexpr uint64_t kReleaseFlag = 1; | ||||
constexpr int kCopyNum = 2; | constexpr int kCopyNum = 2; | ||||
void FreeHbm(void *var) { | |||||
if (var) { | |||||
(void)rtFree(var); | |||||
} | |||||
} | |||||
} // namespace | } // namespace | ||||
Status OpTask::OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream) { | Status OpTask::OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream) { | ||||
@@ -336,49 +341,23 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor | |||||
} | } | ||||
AiCpuTask::~AiCpuTask() { | AiCpuTask::~AiCpuTask() { | ||||
if (args_ != nullptr) { | |||||
(void)rtFree(args_); | |||||
} | |||||
if (io_addr_ != nullptr) { | |||||
(void)rtFree(io_addr_); | |||||
} | |||||
if (dynamic_flag_ && workspace_addr_ != nullptr) { | |||||
(void)rtFree(workspace_addr_); | |||||
} | |||||
if (copy_workspace_buf_ != nullptr) { | |||||
(void)rtFree(copy_workspace_buf_); | |||||
} | |||||
if (copy_ioaddr_dev_ != nullptr) { | |||||
(void)rtFree(copy_ioaddr_dev_); | |||||
} | |||||
if (copy_input_release_flag_dev_ != nullptr) { | |||||
(void)rtFree(copy_input_release_flag_dev_); | |||||
} | |||||
if (copy_input_data_size_dev_ != nullptr) { | |||||
(void)rtFree(copy_input_data_size_dev_); | |||||
} | |||||
if (copy_input_src_dev_ != nullptr) { | |||||
(void)rtFree(copy_input_src_dev_); | |||||
} | |||||
if (copy_input_dst_dev_ != nullptr) { | |||||
(void)rtFree(copy_input_dst_dev_); | |||||
} | |||||
if (copy_task_args_buf_ != nullptr) { | |||||
(void)rtFree(copy_task_args_buf_); | |||||
} | |||||
FreeHbm(args_); | |||||
FreeHbm(io_addr_); | |||||
if (dynamic_flag_) { | |||||
FreeHbm(workspace_addr_); | |||||
} | |||||
FreeHbm(copy_workspace_buf_); | |||||
FreeHbm(copy_ioaddr_dev_); | |||||
FreeHbm(copy_input_release_flag_dev_); | |||||
FreeHbm(copy_input_data_size_dev_); | |||||
FreeHbm(copy_input_src_dev_); | |||||
FreeHbm(copy_input_dst_dev_); | |||||
FreeHbm(copy_task_args_buf_); | |||||
for (auto summary : output_summary_) { | for (auto summary : output_summary_) { | ||||
if (summary != nullptr) { | |||||
(void)rtFree(summary); | |||||
} | |||||
FreeHbm(summary); | |||||
} | |||||
for (auto out_shape : out_shape_hbm_) { | |||||
FreeHbm(out_shape); | |||||
} | } | ||||
} | } | ||||
@@ -405,7 +384,7 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm) { | |||||
Status AiCpuTask::PrepareCopyInputs(vector<DataBuffer> &outputs) { | |||||
std::vector<uint64_t> copy_input_release_flag; | std::vector<uint64_t> copy_input_release_flag; | ||||
std::vector<uint64_t> copy_input_data_size; | std::vector<uint64_t> copy_input_data_size; | ||||
std::vector<uint64_t> copy_input_src; | std::vector<uint64_t> copy_input_src; | ||||
@@ -417,11 +396,15 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v | |||||
summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); | summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); | ||||
auto output = outputs[i]; | auto output = outputs[i]; | ||||
copy_input_release_flag.emplace_back(kReleaseFlag); | copy_input_release_flag.emplace_back(kReleaseFlag); | ||||
copy_input_data_size.emplace_back(summary.raw_data_size); | |||||
if (summary.raw_data_size > 0) { | |||||
copy_input_data_size.emplace_back(output.length); | |||||
} else { | |||||
copy_input_data_size.emplace_back(summary.raw_data_size); | |||||
} | |||||
copy_input_src.emplace_back(summary.raw_data_ptr); | copy_input_src.emplace_back(summary.raw_data_ptr); | ||||
copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output)); | |||||
copy_input_dst.emplace_back(reinterpret_cast<uintptr_t>(output.data)); | |||||
const auto &shape_buffer = out_shape_hbm[i]; | |||||
const auto &shape_buffer = out_shape_hbm_[i]; | |||||
copy_input_release_flag.emplace_back(kReleaseFlag); | copy_input_release_flag.emplace_back(kReleaseFlag); | ||||
copy_input_data_size.emplace_back(summary.shape_data_size); | copy_input_data_size.emplace_back(summary.shape_data_size); | ||||
copy_input_src.emplace_back(summary.shape_data_ptr); | copy_input_src.emplace_back(summary.shape_data_ptr); | ||||
@@ -441,7 +424,7 @@ Status AiCpuTask::PrepareCopyInputs(vector<void *> &outputs, const std::vector<v | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm) { | |||||
Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { | |||||
for (size_t i = 0; i < num_outputs_; ++i) { | for (size_t i = 0; i < num_outputs_; ++i) { | ||||
auto &result_summary = output_summary_host_[i]; | auto &result_summary = output_summary_host_[i]; | ||||
@@ -449,36 +432,39 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_sha | |||||
sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST)); | sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST)); | ||||
auto shape_data_size = result_summary.shape_data_size; | auto shape_data_size = result_summary.shape_data_size; | ||||
void *shape_buffer = nullptr; | void *shape_buffer = nullptr; | ||||
GE_MAKE_GUARD_RTMEM(shape_buffer); | |||||
GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); | |||||
out_shape_hbm.emplace_back(shape_buffer); | |||||
if (shape_data_size > 0) { | |||||
GE_CHK_RT_RET(rtMalloc(&shape_buffer, shape_data_size, RT_MEMORY_HBM)); | |||||
} | |||||
out_shape_hbm_.emplace_back(shape_buffer); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream) { | |||||
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs, out_shape_hbm)); | |||||
Status AiCpuTask::CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream) { | |||||
GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); | |||||
GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); | GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); | ||||
GE_CHK_RT_RET(rtStreamSynchronize(stream)); | GE_CHK_RT_RET(rtStreamSynchronize(stream)); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm) { | |||||
Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc) { | |||||
for (size_t i = 0; i < num_outputs_; ++i) { | for (size_t i = 0; i < num_outputs_; ++i) { | ||||
const auto &result_summary = output_summary_host_[i]; | const auto &result_summary = output_summary_host_[i]; | ||||
std::vector<int64_t> shape_dims; | std::vector<int64_t> shape_dims; | ||||
const auto &shape_hbm = out_shape_hbm[i]; | |||||
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | |||||
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]()); | |||||
GE_CHECK_NOTNULL(shape_addr); | |||||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, result_summary.shape_data_size, | |||||
RT_MEMCPY_DEVICE_TO_HOST)); | |||||
for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | |||||
shape_dims.emplace_back(shape_addr[dim_idx]); | |||||
GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); | |||||
if (result_summary.shape_data_size > 0) { | |||||
const auto &shape_hbm = out_shape_hbm_[i]; | |||||
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); | |||||
std::unique_ptr<int64_t[]> shape_addr(new (std::nothrow) int64_t[dim_num]()); | |||||
GE_CHECK_NOTNULL(shape_addr); | |||||
GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, | |||||
result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); | |||||
for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { | |||||
shape_dims.emplace_back(shape_addr[dim_idx]); | |||||
GELOGD("Node [%zu]th output dim[%u]=%ld.", i, dim_idx, shape_addr[dim_idx]); | |||||
} | |||||
} | } | ||||
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), | GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), output_desc[i]), | ||||
@@ -487,7 +473,7 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, cons | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs, | |||||
Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs, | |||||
rtStream_t stream) { | rtStream_t stream) { | ||||
if (num_outputs_ == 0) { | if (num_outputs_ == 0) { | ||||
GELOGI("Output num is 0, there is no need to update the output and size."); | GELOGI("Output num is 0, there is no need to update the output and size."); | ||||
@@ -496,13 +482,20 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output | |||||
GELOGI("Update shape and data by result summary begin."); | GELOGI("Update shape and data by result summary begin."); | ||||
std::vector<void *> out_shape_hbm; | |||||
GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(out_shape_hbm), | |||||
"Read ResultSummary and update output shape failed."); | |||||
for (auto out_shape : out_shape_hbm_) { | |||||
FreeHbm(out_shape); | |||||
} | |||||
out_shape_hbm_.clear(); | |||||
GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), "Read ResultSummary and update output shape failed."); | |||||
GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), "Copy data to output failed."); | |||||
GE_CHK_STATUS_RET(CopyDataToHbm(outputs, out_shape_hbm, stream), "Copy data to output failed."); | |||||
GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), "Update shape by hbm buffer failed."); | |||||
GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc, out_shape_hbm), "Update shape by hbm buffer failed."); | |||||
for (auto out_shape : out_shape_hbm_) { | |||||
FreeHbm(out_shape); | |||||
} | |||||
out_shape_hbm_.clear(); | |||||
GELOGI("Update shape and data by result summary end."); | GELOGI("Update shape and data by result summary end."); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -603,10 +596,18 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, | |||||
rtStream_t stream) { | |||||
Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, | |||||
const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc, | |||||
std::vector<DataBuffer> &output_buffers, rtStream_t stream) { | |||||
GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); | GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); | ||||
std::vector<void *> inputs; | |||||
std::vector<void *> outputs; | |||||
for (auto &buffer : input_buffers) { | |||||
inputs.emplace_back(buffer.data); | |||||
} | |||||
for (auto &buffer : output_buffers) { | |||||
outputs.emplace_back(buffer.data); | |||||
} | |||||
GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); | GE_CHK_STATUS_RET_NOLOG(SetIO(inputs, outputs)); | ||||
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); | GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); | ||||
GE_CHK_RT_RET(rtStreamSynchronize(stream)); | GE_CHK_RT_RET(rtStreamSynchronize(stream)); | ||||
@@ -614,7 +615,7 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, cons | |||||
if (unknown_type_ == DEPEND_SHAPE_RANGE) { | if (unknown_type_ == DEPEND_SHAPE_RANGE) { | ||||
GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); | GE_CHK_STATUS_RET_NOLOG(UpdateOutputShape(output_desc)); | ||||
} else if (unknown_type_ == DEPEND_COMPUTE) { | } else if (unknown_type_ == DEPEND_COMPUTE) { | ||||
GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, outputs, stream)); | |||||
GE_CHK_STATUS_RET_NOLOG(UpdateShapeAndDataByResultSummary(output_desc, output_buffers, stream)); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -658,9 +659,9 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, | |||||
rtStream_t stream) { | |||||
Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, | |||||
const std::vector<DataBuffer> &input_buffers, std::vector<GeTensorDesc> &output_desc, | |||||
std::vector<DataBuffer> &output_buffers, rtStream_t stream) { | |||||
GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, | GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, | ||||
"AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_); | "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_); | ||||
@@ -669,11 +670,11 @@ Status AiCpuCCTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, co | |||||
size_t arg_index = 0; | size_t arg_index = 0; | ||||
auto *task_io_addr = reinterpret_cast<uintptr_t *>(io_addr_); | auto *task_io_addr = reinterpret_cast<uintptr_t *>(io_addr_); | ||||
GE_CHECK_NOTNULL(task_io_addr); | GE_CHECK_NOTNULL(task_io_addr); | ||||
for (auto &input : inputs) { | |||||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input); | |||||
for (auto &input : input_buffers) { | |||||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(input.data); | |||||
} | } | ||||
for (auto &output : outputs) { | |||||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output); | |||||
for (auto &output : output_buffers) { | |||||
task_io_addr[arg_index++] = reinterpret_cast<uintptr_t>(output.data); | |||||
} | } | ||||
GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); | GE_CHK_STATUS_RET_NOLOG(LaunchKernel(stream)); | ||||
@@ -57,8 +57,9 @@ class OpTask { | |||||
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes); | ||||
const OpDescPtr &GetOpdesc() const { return op_desc_; } | const OpDescPtr &GetOpdesc() const { return op_desc_; } | ||||
Status OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream); | Status OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream); | ||||
virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) { | |||||
virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers, | |||||
rtStream_t stream) { | |||||
return UNSUPPORTED; | return UNSUPPORTED; | ||||
} | } | ||||
@@ -138,8 +139,9 @@ class AiCpuTask : public AiCpuBaseTask { | |||||
OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; } | OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; } | ||||
const void *GetIOAddr() const override; | const void *GetIOAddr() const override; | ||||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override; | |||||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers, | |||||
rtStream_t stream) override; | |||||
Status SetMemCopyTask(const domi::KernelExDef &kernel_def); | Status SetMemCopyTask(const domi::KernelExDef &kernel_def); | ||||
private: | private: | ||||
@@ -147,14 +149,14 @@ class AiCpuTask : public AiCpuBaseTask { | |||||
// for copy task. | // for copy task. | ||||
Status InitForSummaryAndCopy(); | Status InitForSummaryAndCopy(); | ||||
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<void *> &outputs, | |||||
Status UpdateShapeAndDataByResultSummary(vector<GeTensorDesc> &output_desc, vector<DataBuffer> &outputs, | |||||
rtStream_t stream); | rtStream_t stream); | ||||
Status ReadResultSummaryAndPrepareMemory(std::vector<void *> &out_shape_hbm); | |||||
Status ReadResultSummaryAndPrepareMemory(); | |||||
Status CopyDataToHbm(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm, rtStream_t stream); | |||||
Status PrepareCopyInputs(vector<void *> &outputs, const std::vector<void *> &out_shape_hbm); | |||||
Status CopyDataToHbm(vector<DataBuffer> &outputs, rtStream_t stream); | |||||
Status PrepareCopyInputs(vector<DataBuffer> &outputs); | |||||
Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc, const std::vector<void *> &out_shape_hbm); | |||||
Status UpdateShapeByHbmBuffer(vector<GeTensorDesc> &output_desc); | |||||
friend class AiCpuTaskBuilder; | friend class AiCpuTaskBuilder; | ||||
void *workspace_addr_ = nullptr; | void *workspace_addr_ = nullptr; | ||||
@@ -178,6 +180,8 @@ class AiCpuTask : public AiCpuBaseTask { | |||||
void *copy_input_data_size_dev_; | void *copy_input_data_size_dev_; | ||||
void *copy_input_src_dev_; | void *copy_input_src_dev_; | ||||
void *copy_input_dst_dev_; | void *copy_input_dst_dev_; | ||||
vector<void *> out_shape_hbm_; | |||||
}; | }; | ||||
class AiCpuCCTask : public AiCpuBaseTask { | class AiCpuCCTask : public AiCpuBaseTask { | ||||
@@ -197,8 +201,9 @@ class AiCpuCCTask : public AiCpuBaseTask { | |||||
void SetIoAddr(void *io_addr); | void SetIoAddr(void *io_addr); | ||||
size_t GetArgSize() const; | size_t GetArgSize() const; | ||||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<void *> &inputs, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<void *> &outputs, rtStream_t stream) override; | |||||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers, | |||||
std::vector<GeTensorDesc> &output_desc, std::vector<DataBuffer> &output_buffers, | |||||
rtStream_t stream) override; | |||||
private: | private: | ||||
friend class AiCpuCCTaskBuilder; | friend class AiCpuCCTaskBuilder; | ||||
@@ -25,16 +25,21 @@ | |||||
namespace ge { | namespace ge { | ||||
/** | /** | ||||
*@brief Performs AI pre-processing (AIPP) on images including color space conversion (CSC), | |||||
image normalization (by subtracting the mean value or multiplying a factor), image cropping | |||||
(by specifying the crop start and cropping the image to the size required by the neural network), and much more. \n | |||||
*@brief Performs AI pre-processing (AIPP) on images including color space | |||||
conversion (CSC), | |||||
image normalization (by subtracting the mean value or multiplying a factor), | |||||
image cropping | |||||
(by specifying the crop start and cropping the image to the size required by | |||||
the neural network), and much more. \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the data layer. | |||||
*@li images: An NCHW or NHWC tensor of type uint8, specifying the input to the | |||||
data layer. | |||||
*@li params: Dynamic AIPP configuration parameters of type uint8. \n | *@li params: Dynamic AIPP configuration parameters of type uint8. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*aipp_config_path: A required string, specifying the path of the AIPP configuration file. \n | |||||
*aipp_config_path: A required string, specifying the path of the AIPP | |||||
configuration file. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*features: The AIPP-processed output tensor of type float16 or uint8. | *features: The AIPP-processed output tensor of type float16 or uint8. | ||||
@@ -28,9 +28,10 @@ namespace ge { | |||||
*@par Inputs: | *@par Inputs: | ||||
*Dynamic inputs, including: | *Dynamic inputs, including: | ||||
* @li x: A list of Tensor objects, each with same shape and type. The supported types are: | |||||
* @li x: A list of Tensor objects, each with same shape and type. The supported | |||||
types are: | |||||
* float16, float32, double, int32, uint8, int16, int8, complex64, int64, | * float16, float32, double, int32, uint8, int16, int8, complex64, int64, | ||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. It's a dynamic input. \n | |||||
* qint8, quint8, qint32, uint16, complex128, uint32, uint64. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same shape and type as the elements of "x". \n | *y: A Tensor. Has the same shape and type as the elements of "x". \n | ||||
@@ -121,7 +122,8 @@ REG_OP(MinimumGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One input: | *One input: | ||||
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, | |||||
*x:A Tensor. Must be one of the following types: bool, float16, float, int8, | |||||
int32, uint32, uint8, | |||||
int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n | int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -385,7 +387,8 @@ REG_OP(Sign) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: \n | *Two inputs, including: \n | ||||
*@li x1: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64,complex128 | |||||
*@li x1: A Tensor. Must be one of the following types: float16, float32, | |||||
float64, int32, int64, complex64,complex128 | |||||
*@li x2: A Tensor. Has the same type as "x1". \n | *@li x2: A Tensor. Has the same type as "x1". \n | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -484,12 +487,16 @@ REG_OP(Equal) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One input:\n | *One input:\n | ||||
*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n | |||||
*x: A Tensor. Must be one of the following types: float16, float32, double, | |||||
complex64, complex128. \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1.0". | |||||
*@li scale: An optional attribute of type float32, specifying the scale alpha. Defaults to "1.0". | |||||
*@li shift: An optional attribute of type float32, specifying the shift beta. Defaults to "0.0". \n | |||||
*@li base: An optional attribute of type float32, specifying the base gamma. | |||||
Defaults to "-1.0". | |||||
*@li scale: An optional attribute of type float32, specifying the scale alpha. | |||||
Defaults to "1.0". | |||||
*@li shift: An optional attribute of type float32, specifying the shift beta. | |||||
Defaults to "0.0". \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor of the same type as "x". \n | *y: A Tensor of the same type as "x". \n | ||||
@@ -510,7 +517,8 @@ REG_OP(Exp) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One input: | *One input: | ||||
*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. \n | |||||
*x: A Tensor. Must be one of the following types: float16, float32, double, | |||||
complex64, complex128. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor of the same type as "x". \n | *y: A Tensor of the same type as "x". \n | ||||
@@ -527,7 +535,9 @@ REG_OP(Expm1) | |||||
*@brief: Computes the reciprocal of "x". \n | *@brief: Computes the reciprocal of "x". \n | ||||
*@par Inputs:\n | *@par Inputs:\n | ||||
*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n | |||||
*x: A Tensor. Must be one of the following types: float16, float32, | |||||
int32, int64, double, | |||||
complex64, complex128. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". \n | *y: A Tensor. Has the same type as "x". \n | ||||
@@ -749,7 +759,8 @@ REG_OP(Xlogy) | |||||
*@par Inputs: | *@par Inputs: | ||||
*One input: \n | *One input: \n | ||||
*x: A Tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128 | |||||
*x: A Tensor. Must be one of the following types: float16, float32, float64, | |||||
int32, int64, complex64, complex128 | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". \n | *y: A Tensor. Has the same type as "x". \n | ||||
@@ -790,7 +801,8 @@ REG_OP(Rsqrt) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128. | |||||
* x: A tensor. Must be one of the following types: float16, float32, float64, | |||||
int32, int64, complex64, complex128. | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A tensor. Has the same type as "x". | * y: A tensor. Has the same type as "x". | ||||
@@ -811,7 +823,8 @@ REG_OP(Asin) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li y: A tensor of type float16, float32, float64, int32, int64, complex64, complex128. | |||||
*@li y: A tensor of type float16, float32, float64, | |||||
int32, int64, complex64, complex128. | |||||
*@li dy: A tensor of the same type as "y". | *@li dy: A tensor of the same type as "y". | ||||
* | * | ||||
*@attention Constraints: | *@attention Constraints: | ||||
@@ -838,7 +851,8 @@ REG_OP(AsinGrad) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128. | |||||
* x: A tensor. Must be one of the following types: float16, float32, float64, | |||||
int32, int64, complex64, complex128. | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A tensor. Has the same type as "x". | * y: A tensor. Has the same type as "x". | ||||
@@ -883,7 +897,8 @@ REG_OP(AcosGrad) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128. | |||||
* x: A tensor. Must be one of the following types: float16, float32, float64, | |||||
complex64, complex128. | |||||
* | * | ||||
*@attention Constraints: | *@attention Constraints: | ||||
* x Given an input tensor, the function computes inverse hyperbolic cosine of every element.\n | * x Given an input tensor, the function computes inverse hyperbolic cosine of every element.\n | ||||
@@ -1160,7 +1175,8 @@ REG_OP(FusedMulAdd) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x1: A tensor. Must be one of the following types: float16, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128. | |||||
*@li x1: A tensor. Must be one of the following types: float16, float32, float64, | |||||
uint8, int8, int16, int32, int64, complex64, complex128. | |||||
*@li x2: A tensor of the same type as "x1". | *@li x2: A tensor of the same type as "x1". | ||||
* | * | ||||
*@attention Constraints: | *@attention Constraints: | ||||
@@ -1189,7 +1205,8 @@ REG_OP(AddV2) | |||||
*@brief Updates "ref" by adding "value" to it. \n | *@brief Updates "ref" by adding "value" to it. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. | |||||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, | |||||
int16, int32, int64, uint8, uint16, uint32, uint64. | |||||
*@li value: A Tensor of the same type as "ref". \n | *@li value: A Tensor of the same type as "ref". \n | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1218,12 +1235,14 @@ REG_OP(AssignAdd) | |||||
*@brief Updates "ref" by assigning "value" to it. \n | *@brief Updates "ref" by assigning "value" to it. \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. | |||||
*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, | |||||
int32, int64, uint8, uint16, uint32, uint64. | |||||
*@li value: A Tensor of the same type as "ref". \n | *@li value: A Tensor of the same type as "ref". \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li validate_shape: An optional bool. Defaults to "true". | *@li validate_shape: An optional bool. Defaults to "true". | ||||
If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to. | |||||
If "true", the operation will validate that the shape of "value" | |||||
matches the shape of the Tensor being assigned to. | |||||
* If "false", "ref" will take on the shape of "value". | * If "false", "ref" will take on the shape of "value". | ||||
* This attribute is reserved. | * This attribute is reserved. | ||||
*@li use_locking: An optional bool. Defaults to True. | *@li use_locking: An optional bool. Defaults to True. | ||||
@@ -1252,7 +1271,8 @@ REG_OP(Assign) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li var: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64 | |||||
*@li var: A tensor. Must be one of the following types: float32, float64, | |||||
int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, uint32, uint64 | |||||
*@li value: A tensor of the same type as "var". | *@li value: A tensor of the same type as "var". | ||||
* | * | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1644,7 +1664,9 @@ REG_OP(Atan2) | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x1: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 | |||||
*@li x1: A tensor. Must be one of the following types: float32, float64, int32, | |||||
uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, complex128, | |||||
float16, uint32, uint64 | |||||
*@li x2: A tensor of the same type as "x1". | *@li x2: A tensor of the same type as "x1". | ||||
* | * | ||||
*@par Attributes: | *@par Attributes: | ||||
@@ -1666,16 +1688,18 @@ REG_OP(ApproximateEqual) | |||||
/** | /** | ||||
*@brief Returns the element-wise sum of a list of tensors.\n | *@brief Returns the element-wise sum of a list of tensors.\n | ||||
* AccumulateNV2 performs the same operation as AddN, but does not wait for all of its inputs | |||||
to be ready before beginning to sum.\n This can save memory if inputs are ready at different times, | |||||
since minimum temporary storage is proportional to the output size rather than the inputs size. | |||||
Returns a Tensor of same shape and type as the elements of inputs. \n | |||||
* AccumulateNV2 performs the same operation as AddN, but does not wait for all | |||||
of its inputs to be ready before beginning to sum.\n This can save memory if | |||||
inputs are ready at different times, \n since minimum temporary storage is | |||||
proportional to the output size rather than the inputs size.\n Returns a Tensor | |||||
of same shape and type as the elements of inputs. \n | |||||
* | * | ||||
*@par Inputs: | *@par Inputs: | ||||
*Dynamic inputs, including: | *Dynamic inputs, including: | ||||
* x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, | |||||
qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. It's a dynamic input. \n | |||||
* x: A tensor. Must be one of the following types: float32, float64, int32, | |||||
uint8, int16, int8, complex64, int64, \n qint8, quint8, qint32, uint16, | |||||
complex128, float16, uint32, uint64. | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A tensor. Has the same type as "x". | * y: A tensor. Has the same type as "x". | ||||
@@ -1731,7 +1755,8 @@ REG_OP(FakeQuantWithMinMaxArgs) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: \n | *Two inputs, including: \n | ||||
*@li gradients: A Tensor of type float32. Backpropagated gradients above the FakeQuantWithMinMaxArgs operation. | |||||
*@li gradients: A Tensor of type float32. Backpropagated gradients | |||||
above the FakeQuantWithMinMaxArgs operation. | |||||
*@li x: A Tensor of type float32. Has the same type and format as "gradients".\n | *@li x: A Tensor of type float32. Has the same type and format as "gradients".\n | ||||
* This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n | * This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n | ||||
@@ -2210,9 +2235,13 @@ REG_OP(BiasAdd) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64. | |||||
*@li x: A Tensor. Must be one of the following types: float32, float64, int32, | |||||
uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, | |||||
complex128, float16, uint32, uint64. | |||||
*format is ND. | *format is ND. | ||||
*@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across. | |||||
*@li dimension: A Tensor. Must be one of the following types: int32, int64. | |||||
Must be in the range [-rank(input x), rank(input x)]. Describes which dimension | |||||
of the input Tensor to reduce across. | |||||
* The format is ND. | * The format is ND. | ||||
*@par Attributes: | *@par Attributes: | ||||
*dtype: The output type, either "int32" or "int64". Defaults to "int64". \n | *dtype: The output type, either "int32" or "int64". Defaults to "int64". \n | ||||
@@ -2286,6 +2315,7 @@ REG_OP(ArgMaxV2) | |||||
.ATTR(dtype, Type, DT_INT64) | .ATTR(dtype, Type, DT_INT64) | ||||
.OP_END_FACTORY_REG(ArgMaxV2) | .OP_END_FACTORY_REG(ArgMaxV2) | ||||
/** | /** | ||||
*@brief Returns the index with the largest value across axes of a tensor. \n | *@brief Returns the index with the largest value across axes of a tensor. \n | ||||
@@ -2298,15 +2328,16 @@ REG_OP(ArgMaxV2) | |||||
*@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n | *@li dtype: The output type, either "int32" or "int64". Defaults to "int64". \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". \n | |||||
*y: A multi-dimensional Tensor of type int32, specifying the index with the | |||||
largest value. The dimension is one less than that of "x". \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li x: If there are multiple maximum values, the index of the first maximum value is used. | *@li x: If there are multiple maximum values, the index of the first maximum value is used. | ||||
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". \n | |||||
*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the | |||||
dimension length of "x". \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with TensorFlow operator ArgMax. | * Compatible with TensorFlow operator ArgMax. | ||||
* | |||||
* @par Restrictions: | * @par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
*/ | */ | ||||
@@ -2929,9 +2960,13 @@ REG_OP(FusedMulAddN) | |||||
*@li bias: An ND tensor of type float16 or float32. \n | *@li bias: An ND tensor of type float16 or float32. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". | |||||
*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1". | |||||
*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n | |||||
*@li axis: An optional int32 used to compute the shape of bias input from the | |||||
online bottoms. Defaults to "1". | |||||
*@li num_axes: An optional int32 used to compute the shape of bias input from a | |||||
Caffe model trained offline. Defaults to "1". | |||||
*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe | |||||
model trained offline. If "false", bias is input from online bottoms. Defaults | |||||
to "true". \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: An ND tensor of type float16 or float32. \n | *y: An ND tensor of type float16 or float32. \n | ||||
@@ -2939,13 +2974,25 @@ REG_OP(FusedMulAddN) | |||||
*@attention Constraints:\n | *@attention Constraints:\n | ||||
* Assume that the shape length of "x" is "n" and that of "bias" is "m". | * Assume that the shape length of "x" is "n" and that of "bias" is "m". | ||||
*@li "axis" is within the range [-n, n-1]. num_axes >= -1. | *@li "axis" is within the range [-n, n-1]. num_axes >= -1. | ||||
*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n | |||||
* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis). | |||||
*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1. | |||||
*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n | |||||
* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). | |||||
*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n | |||||
* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). | |||||
*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis | |||||
of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < | |||||
n-axis).\n | |||||
* If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must | |||||
have the same size (0 <= i < -axis). | |||||
*@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with | |||||
shape length 1 and dimension size 1. | |||||
*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + | |||||
num_axes" must be less than or equal to "n" and the ith axis of "bias" and the | |||||
(i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n | |||||
* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and | |||||
the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same | |||||
size (0 <= i < num_axes). | |||||
*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis | |||||
+ m" must be less than or equal to "n" and the ith axis of "bias" and the (i | |||||
+"axis")th axis of "x" must have the same size (0 <= i < m).\n | |||||
* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith | |||||
axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= | |||||
i < m). | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the Caffe operator Bias. | * Compatible with the Caffe operator Bias. | ||||
*/ | */ | ||||
@@ -3023,10 +3070,12 @@ REG_OP(FusedMulAddNL2loss) | |||||
*@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n | *@li x: A Tensor with any format. Must be one of the following types: float16, float32. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li threshold: A required float32. Defaults to "0.0". "x" is compared with "threshold", outputs "1" for inputs above threshold; "0" otherwise. \n | |||||
*@li threshold: A required float32. Defaults to "0.0". "x" is compared with | |||||
"threshold", outputs "1" for inputs above threshold; "0" otherwise. \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. | |||||
*@li y: A Tensor with any format. Has the same type as the input. Must be one | |||||
of the following types: float16, float32. | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the Caffe operator Threshold. | * Compatible with the Caffe operator Threshold. | ||||
*/ | */ | ||||
@@ -3044,11 +3093,16 @@ REG_OP(FusedMulAddNL2loss) | |||||
*@li x: A tensor. Must be one of the following types: float16, float32. \n | *@li x: A tensor. Must be one of the following types: float16, float32. \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000 | |||||
*@li out_max_val: An optional bool. Whether to output the maximum value. If it is True, the maximum value and index are output, otherwise only the index is output. | |||||
*@li axis: An optional int. Specify the axis to be cut at the input tensor. If | |||||
this parameter is not provided, find the topk for each batch. Defaults to 10000 | |||||
*@li out_max_val: An optional bool. Whether to output the maximum value. If it | |||||
is True, the maximum value and index are output, otherwise only the index is | |||||
output. | |||||
* Defaults to False | * Defaults to False | ||||
*@li topk: An optional int. It means the number of top tok in each axis (the value is greater than or equal to 1), and the value range must be in [1,x.shape(axis)]. | |||||
* Defaults to 1 | |||||
*@li topk: An optional int. It means the number of top tok in each axis (the | |||||
value is greater than or equal to 1), and the value range must be in [1,x.shape | |||||
(axis)]. | |||||
* Defaults to 1 \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output. | *@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output. | ||||
@@ -3168,7 +3222,8 @@ REG_OP(Axpy) | |||||
.OP_END_FACTORY_REG(Axpy) | .OP_END_FACTORY_REG(Axpy) | ||||
/** | /** | ||||
*@brief Creates a criterion that measures the loss given input tensors x1 x2 and a Tensor label y with values 1 or -1. \n | |||||
*@brief Creates a criterion that measures the loss given input tensors x1 x2 | |||||
and a Tensor label y with values 1 or -1. \n | |||||
*@par Inputs: | *@par Inputs: | ||||
*@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32. | *@li x1: A ND Tensor with one of the following types: int8, uint8, int32, float16, float32. | ||||
@@ -36,7 +36,7 @@ namespace ge { | |||||
* if "cond" is a numerical scalar, non-zero means True and zero means False; | * if "cond" is a numerical scalar, non-zero means True and zero means False; | ||||
* if "cond" is a string scalar, non-empty means True and empty means False; | * if "cond" is a string scalar, non-empty means True and empty means False; | ||||
* if "cond" is not a scalar, non-empty means True and empty means False. | * if "cond" is not a scalar, non-empty means True and empty means False. | ||||
*@li input: The input tensors . It's a dynamic input. \n | |||||
*@li input: The input tensors . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*@li then_branch: A subgraph takes 'input' and returns a list of tensors, | *@li then_branch: A subgraph takes 'input' and returns a list of tensors, | ||||
@@ -69,7 +69,7 @@ REG_OP(_If) | |||||
* if "cond" is a numerical scalar, non-zero means True and zero means False; | * if "cond" is a numerical scalar, non-zero means True and zero means False; | ||||
* if "cond" is a string scalar, non-empty means True and empty means False; | * if "cond" is a string scalar, non-empty means True and empty means False; | ||||
* if "cond" is not a scalar, non-empty means True and empty means False. | * if "cond" is not a scalar, non-empty means True and empty means False. | ||||
*@li input: The input tensors . It's a dynamic input. \n | |||||
*@li input: The input tensors . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*@li then_branch: A subgraph takes 'input' and returns a list of tensors, | *@li then_branch: A subgraph takes 'input' and returns a list of tensors, | ||||
@@ -102,7 +102,7 @@ REG_OP(StatelessIf) | |||||
* if "cond" is a numerical scalar, non-zero means True and zero means False; | * if "cond" is a numerical scalar, non-zero means True and zero means False; | ||||
* if "cond" is a string scalar, non-empty means True and empty means False; | * if "cond" is a string scalar, non-empty means True and empty means False; | ||||
* if "cond" is not a scalar, non-empty means True and empty means False. | * if "cond" is not a scalar, non-empty means True and empty means False. | ||||
*@li input: The input tensors . It's a dynamic input. \n | |||||
*@li input: The input tensors . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*@li then_branch: A subgraph takes 'input' and returns a list of tensors, | *@li then_branch: A subgraph takes 'input' and returns a list of tensors, | ||||
@@ -129,7 +129,7 @@ REG_OP(If) | |||||
*@par Inputs: | *@par Inputs: | ||||
*@li branch_index: A int32 scalar which determines the selected subgraph. | *@li branch_index: A int32 scalar which determines the selected subgraph. | ||||
*@li input: The input tensors, which will be passed to the subgraph . It's a dynamic input. \n | |||||
*@li input: The input tensors, which will be passed to the subgraph . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, | *branches: A list of subgraphs, each of which takes 'input' and returns a list of tensors, | ||||
@@ -152,7 +152,7 @@ REG_OP(Case) | |||||
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*input: The input tensors . It's a dynamic input. \n | |||||
*input: The input tensors . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*@li cond: A subgraph takes 'input' and returns a tensor. | *@li cond: A subgraph takes 'input' and returns a tensor. | ||||
@@ -183,7 +183,7 @@ REG_OP(_While) | |||||
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*input: The input tensors . It's a dynamic input. \n | |||||
*input: The input tensors . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*@li cond: A subgraph takes 'input' and returns a tensor. | *@li cond: A subgraph takes 'input' and returns a tensor. | ||||
@@ -215,7 +215,7 @@ REG_OP(While) | |||||
*@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | *@brief Cyclic execute the "body" subgraph until the return tensor of "cond" subgraph means False . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*input: The input tensors . It's a dynamic input. \n | |||||
*input: The input tensors . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*@li cond: A subgraph takes 'input' and returns a tensor. | *@li cond: A subgraph takes 'input' and returns a tensor. | ||||
@@ -250,7 +250,7 @@ REG_OP(StatelessWhile) | |||||
*@li start: A int32 scalar. The lower bound. | *@li start: A int32 scalar. The lower bound. | ||||
*@li limit: A int32 scalar. The upper bound. | *@li limit: A int32 scalar. The upper bound. | ||||
*@li delta: A int32 scalar. The step size. | *@li delta: A int32 scalar. The step size. | ||||
*@li input: The input tensors, which will be passed to "body" . It's a dynamic input. \n | |||||
*@li input: The input tensors, which will be passed to "body" . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*body: A subgraph takes 'input' and returns a another list of tensors . \n | *body: A subgraph takes 'input' and returns a another list of tensors . \n | ||||
@@ -274,7 +274,7 @@ REG_OP(For) | |||||
*@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n | *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*args: The input tensors, which will be passed to "f" . It's a dynamic input. \n | |||||
*args: The input tensors, which will be passed to "f" . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*f: A subgraph takes 'args' and returns a another list of tensors . \n | *f: A subgraph takes 'args' and returns a another list of tensors . \n | ||||
@@ -303,7 +303,7 @@ REG_OP(PartitionedCall) | |||||
*@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n | *@brief Pass the input tensors to the subgraph "f" and return the output tensors . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*args: The input tensors, which will be passed to "f" . It's a dynamic input. \n | |||||
*args: The input tensors, which will be passed to "f" . \n | |||||
*@par Graphs: | *@par Graphs: | ||||
*f: A subgraph takes 'args' and returns a another list of tensors . \n | *f: A subgraph takes 'args' and returns a another list of tensors . \n | ||||
@@ -160,8 +160,10 @@ REG_OP(CropAndResize) | |||||
*@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n | *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches are resized to this size. | |||||
*@li extrapolation_value: An optional float. Defaults to 0. Value used for extrapolation, when applicable. | |||||
*@li crop_size: list int. [crop_height, crop_width]. All cropped image patches | |||||
are resized to this size. | |||||
*@li extrapolation_value: An optional float. Defaults to 0. Value used for | |||||
extrapolation, when applicable. | |||||
*@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n | *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -172,7 +174,6 @@ REG_OP(CropAndResize) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with tensorflow CropAndResize operator. | *Compatible with tensorflow CropAndResize operator. | ||||
* @par Restrictions: | * @par Restrictions: | ||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead. | * Warning: THIS FUNCTION IS DEPRECATED. Please use CropAndResize instead. | ||||
*/ | */ | ||||
@@ -87,39 +87,58 @@ REG_OP(L2NormalizeGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) | ||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW | |||||
for 4D or NC1HWC0 for 5D. | |||||
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format | |||||
NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the scaling factor. | if input "x" is with format NC1HWC0. Specifies the scaling factor. | ||||
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | *@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | ||||
if input "x" is with format NC1HWC0. Specifies the offset. | if input "x" is with format NC1HWC0. Specifies the offset. | ||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the | |||||
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format | |||||
NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean used for inference. | |||||
Must be "None" if the | |||||
operation is used for training. | operation is used for training. | ||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be | |||||
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" | |||||
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format | |||||
NHWC or NCHW. Must be | |||||
5D if input "x" is with format NC1HWC0. Specifies the variance used for | |||||
inference. Must be "None" | |||||
if the operation is used for training . \n | if the operation is used for training . \n | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". | |||||
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". | |||||
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n | |||||
*@li epsilon: An optional float32, specifying the small value added to variance | |||||
to avoid dividing by zero. Defaults to "0.0001". | |||||
*@li data_format: An optional string, specifying the format of "x". Defaults to | |||||
"NHWC". | |||||
*@li is_training: An optional bool, specifying if the operation is used for | |||||
training or inference. Defaults to "True" . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) | ||||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D | |||||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", | |||||
with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with | |||||
format NHWC or NCHW. Must be 5D | |||||
if input "x" is with format NC1HWC0. Specifies the mean of "x". | if input "x" is with format NC1HWC0. Specifies the mean of "x". | ||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with | |||||
format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". | ||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n | |||||
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input | |||||
"x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for | |||||
gradient computation. Pass "None" to skip this output. | |||||
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input | |||||
"x" is with format NHWC or NCHW. | |||||
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" | |||||
for gradient computation. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, | |||||
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n | |||||
*@li If the operation is used for inference and outputs "reserve_space_1" and | |||||
"reserve_space_2" are available, | |||||
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has | |||||
the same value as "variance". | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | |||||
root instruction . \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*@li Compatible with the TensorFlow operator fused_batch_norm. | *@li Compatible with the TensorFlow operator fused_batch_norm. | ||||
@@ -166,13 +185,17 @@ is used for training or inference. Defaults to "True" . \n | |||||
*@li y: A 4D Tensor of type float16 or float32, for the normalized "x". | *@li y: A 4D Tensor of type float16 or float32, for the normalized "x". | ||||
*@li batch_mean: A 1D Tensor of type float32, for the mean of "x". | *@li batch_mean: A 1D Tensor of type float32, for the mean of "x". | ||||
*@li batch_variance: A 1D Tensor of type float32, for the variance of "x". | *@li batch_variance: A 1D Tensor of type float32, for the variance of "x". | ||||
*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for gradient computation. | |||||
*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" for gradient computation . \n | |||||
*@li reserve_space_1: A 1D Tensor of type float32, for the mean of "x" for | |||||
gradient computation. | |||||
*@li reserve_space_2: A 1D Tensor of type float32, for the variance of "x" | |||||
for gradient computation . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
*@li If the operation is used for inference, then output "reserve_space_1" | *@li If the operation is used for inference, then output "reserve_space_1" | ||||
has the same value as "mean" and output "reserve_space_2" has the same value as "variance". | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n | |||||
has the same value as "mean" and output "reserve_space_2" has the same value as | |||||
"variance". | |||||
*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square | |||||
root instruction . \n | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator fused_batch_norm_v2. | * Compatible with the TensorFlow operator fused_batch_norm_v2. | ||||
@@ -198,23 +221,34 @@ REG_OP(BatchNormExt2) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: | * Five inputs, including: | ||||
*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the gradient. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0. | |||||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. | |||||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm. | |||||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||||
*@li y_backprop: A 4D or 5D Tensor of type float16 or float32, with format | |||||
NHWC, NCHW, or NC1HWC0, for the gradient. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC, NCHW, | |||||
or NC1HWC0. | |||||
*@li scale: A 4D or 5D Tensor of type float32, with format NHWC, NCHW, or | |||||
NC1HWC0. | |||||
*@li reserve_space_1: A 4D or 5D Tensor of type float32, with format NHWC, | |||||
NCHW, or NC1HWC0. It is an output of BatchNorm. | |||||
*@li reserve_space_2: A 4D or 5D Tensor of type float32, with format NHWC, | |||||
NCHW, or NC1HWC0. It is an output of BatchNorm . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". | |||||
*@li epsilon: An optional float32. Defaults to "0.0001". A small float number | |||||
added to the variance of "x". | |||||
*@li data_format: An optional string. Defaults to "NHWC". | *@li data_format: An optional string. Defaults to "NHWC". | ||||
*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n | ||||
*@par Outputs: | *@par Outputs: | ||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "scale". | |||||
*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "offset". | |||||
*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output. | |||||
*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or NC1HWC0. Pass "None" to skip this output . \n | |||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, | |||||
or NC1HWC0, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NHWC, NCHW, or | |||||
NC1HWC0, for the offset of "scale". | |||||
*@li *offset_backprop: A Tensor of type float32, with format NHWC, NCHW, or | |||||
NC1HWC0, for the offset of "offset". | |||||
*@li *reserve_space_4: A Tensor of type float32, with shape NHWC, NCHW, or | |||||
NC1HWC0. Pass "None" to skip this output. | |||||
*@li *reserve_space_5: A Tensor of type float32, with shape NHWC, NCHW, or | |||||
NC1HWC0. Pass "None" to skip this output . \n | |||||
*@attention Constraints: | *@attention Constraints: | ||||
* The preceding layer of this operator must be operator BatchNorm . \n | * The preceding layer of this operator must be operator BatchNorm . \n | ||||
@@ -244,21 +278,28 @@ REG_OP(BatchNormGrad) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Five inputs, including: | * Five inputs, including: | ||||
*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient. | |||||
*@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or | |||||
NCHW, for the gradient. | |||||
*@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW. | *@li x: A 4D Tensor of type float16 or float32, with format NHWC or NCHW. | ||||
*@li scale: A 4D Tensor of type float32, with format NHWC or NCHW. | *@li scale: A 4D Tensor of type float32, with format NHWC or NCHW. | ||||
*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2. | |||||
*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It is an output of BatchNormExt2 . \n | |||||
*@li reserve_space_1: A 4D Tensor of type float32, with format NHWC or NCHW. It | |||||
is an output of BatchNormExt2. | |||||
*@li reserve_space_2: A 4D Tensor of type float32, with format NHWC or NCHW. It | |||||
is an output of BatchNormExt2 . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: A required float32. A small float number added to the variance of "x". | *@li epsilon: A required float32. A small float number added to the variance of "x". | ||||
*@li data_format: A required string for the format. | *@li data_format: A required string for the format. | ||||
*@li is_training: A required bool for specifying the operation is for training (true) or inference (false) . \n | |||||
*@li is_training: A required bool for specifying the operation is for training | |||||
(true) or inference (false) . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "scale". | |||||
*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for the offset of "offset". | |||||
*@li x_backprop: A Tensor of type float16 or float32, with format NHWC or NCHW, | |||||
for the offset of "x". | |||||
*@li scale_backprop: A Tensor of type float32, with format NHWC or NCHW, for | |||||
the offset of "scale". | |||||
*@li offset_backprop: A Tensor of type float32, with format NHWC or NCHW, for | |||||
the offset of "offset". | |||||
*@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW. | *@li reserve_space_3: A Tensor of type float32, with format NHWC or NCHW. | ||||
*@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n | *@li reserve_space_4: A Tensor of type float32, with format NHWC or NCHW . \n | ||||
@@ -290,14 +331,18 @@ REG_OP(BatchNormGradExt2) | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW | |||||
for 4D or NC1HWC0 for 5D. | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" | |||||
Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" | |||||
Specifies the variance used for inference. | |||||
*@li momentum: A Tensor,represents the mean and the variance's scale factor | *@li momentum: A Tensor,represents the mean and the variance's scale factor | ||||
*@li scale: An optional tensor of type float16 or float32, no use | *@li scale: An optional tensor of type float16 or float32, no use | ||||
*@li offset: An optional tensor of type float16 or float32, no use | *@li offset: An optional tensor of type float16 or float32, no use | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li epsilon: An optional float32, specifying the small value added to variance | |||||
to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li use_global_stats: mean inference mode , only can be "True". | *@li use_global_stats: mean inference mode , only can be "True". | ||||
*@li mode: An optional input, not use | *@li mode: An optional input, not use | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -315,16 +360,20 @@ REG_OP(BNInference) | |||||
.ATTR(use_global_stats, Bool,true) | .ATTR(use_global_stats, Bool,true) | ||||
.ATTR(mode, Int,1) | .ATTR(mode, Int,1) | ||||
.OP_END_FACTORY_REG(BNInference) | .OP_END_FACTORY_REG(BNInference) | ||||
/** | /** | ||||
*@brief aicpu batch normalization host . \n | *@brief aicpu batch normalization host . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" | |||||
Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" | |||||
Specifies the variance used for inference. | |||||
*@li momentum: An optional float, mean and variance's Scale factor | *@li momentum: An optional float, mean and variance's Scale factor | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li epsilon: An optional float32, specifying the small value added to variance | |||||
to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li use_global_stats: mean inference mode , only can be "True". | *@li use_global_stats: mean inference mode , only can be "True". | ||||
*@li mode: An optional attr, not use | *@li mode: An optional attr, not use | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -348,14 +397,19 @@ REG_OP(BnHost) | |||||
*@brief Performs batch normalization . \n | *@brief Performs batch normalization . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW | |||||
for 4D or NC1HWC0 for 5D. | |||||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" | |||||
Specifies the mean used for inference. | |||||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" | |||||
Specifies the variance used for inference. | |||||
*@li scale: An optional tensor of type float16 or float32, no use | *@li scale: An optional tensor of type float16 or float32, no use | ||||
*@li offset: An optional tensor of type float16 or float32, no use | *@li offset: An optional tensor of type float16 or float32, no use | ||||
*@par Attributes: | *@par Attributes: | ||||
*@li momentum: An optional float32 num, represents the mean and the variance's scale factor | |||||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li momentum: An optional float32 num, represents the mean and the variance's | |||||
scale factor | |||||
*@li epsilon: An optional float32, specifying the small value added to variance | |||||
to avoid dividing by zero. Defaults to "0.00001". | |||||
*@li use_global_stats: mean inference mode , only can be "True". | *@li use_global_stats: mean inference mode , only can be "True". | ||||
*@li mode: An optional attr, not use | *@li mode: An optional attr, not use | ||||
*@par Outputs: | *@par Outputs: | ||||
@@ -310,9 +310,6 @@ REG_OP(DepthwiseConv2DBackpropInputD) | |||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
* @li Compatible with the TensorFlow operator DepthwiseConv2D. | * @li Compatible with the TensorFlow operator DepthwiseConv2D. | ||||
* @li Compatible with the Caffe operator DepthwiseConv2D. | * @li Compatible with the Caffe operator DepthwiseConv2D. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(DepthwiseConv2D) | REG_OP(DepthwiseConv2D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | ||||
@@ -460,9 +457,9 @@ REG_OP(Conv2DBackpropInputD) | |||||
*@par Attributes: | *@par Attributes: | ||||
* Six attributes: | * Six attributes: | ||||
* @li strides: A tuple or list of 2 integers. The stride of the sliding window | * @li strides: A tuple or list of 2 integers. The stride of the sliding window | ||||
* for H/W dimension. | |||||
* for H/W dimension, defaults to [1,1]. | |||||
* @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] | * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] | ||||
* padding on the feature map. | |||||
* padding on the feature map, defaults to [0,0,0,0]. | |||||
* @li dilations: A tuple or list of 4 integers. The dilation factor for each | * @li dilations: A tuple or list of 4 integers. The dilation factor for each | ||||
* dimension of input, defaults to [1,1,1,1]. | * dimension of input, defaults to [1,1,1,1]. | ||||
* @li groups: Number of blocked connections from input channels to | * @li groups: Number of blocked connections from input channels to | ||||
@@ -482,8 +479,8 @@ REG_OP(Deconvolution) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) | .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) | ||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) | .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) | ||||
.REQUIRED_ATTR(strides, ListInt) | |||||
.REQUIRED_ATTR(pads, ListInt) | |||||
.ATTR(strides, ListInt, {1, 1}) | |||||
.ATTR(pads, ListInt, {0, 0, 0, 0}) | |||||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
.ATTR(groups, Int, 1) | .ATTR(groups, Int, 1) | ||||
.ATTR(data_format, String, "NCHW") | .ATTR(data_format, String, "NCHW") | ||||
@@ -593,7 +590,7 @@ REG_OP(Conv2DBackpropFilterD) | |||||
*@li bias: An optional 1D tensor. Shape is [out_channels]. | *@li bias: An optional 1D tensor. Shape is [out_channels]. | ||||
*@li offset_w: An optional 1D tensor for quantized convolution. Shape is | *@li offset_w: An optional 1D tensor for quantized convolution. Shape is | ||||
* [out_channels]. Reserved. | |||||
* [out_channels]. Not supported. | |||||
*\n | *\n | ||||
*\n | *\n | ||||
* Note that there is a strict data type mapping between the input and output | * Note that there is a strict data type mapping between the input and output | ||||
@@ -622,7 +619,8 @@ REG_OP(Conv2DBackpropFilterD) | |||||
* and right padding. | * and right padding. | ||||
* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate | * @li dilations: Optional. A list of 4 integers. Specifying the dilation rate | ||||
* to use for dilated convolution. Has the same dimension order and value as | * to use for dilated convolution. Has the same dimension order and value as | ||||
* "strides". Defaults to [1, 1, 1, 1]. | |||||
* "strides". Dilation > 1 is not supported for quantized convolution. Defaults | |||||
* to [1, 1, 1, 1]. | |||||
* @li groups: Optional. An integer of type int32, for the number of blocked | * @li groups: Optional. An integer of type int32, for the number of blocked | ||||
* connections from input channels to output channels. Input channels and output | * connections from input channels to output channels. Input channels and output | ||||
* channels must both be divisible by "groups". "x" in_channels must be equal to | * channels must both be divisible by "groups". "x" in_channels must be equal to | ||||
@@ -704,13 +702,62 @@ REG_OP(Conv2D) | |||||
.ATTR(offset_x, Int, 0) | .ATTR(offset_x, Int, 0) | ||||
.OP_END_FACTORY_REG(Conv2D) | .OP_END_FACTORY_REG(Conv2D) | ||||
/** | |||||
*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors. | |||||
*@par Inputs: | |||||
* @li x: A 4D tensor of input images. | |||||
* @li filter_compress: A 4D tensor of compressed filters. | |||||
* @li compress_index: A 1D Tensor dtype of int8. | |||||
* @li bias: An optional 1D tensor. | |||||
* @li offset_w: An optional 1D tensor for quantized convolution. Reserved. | |||||
* | |||||
* The input and output tensor attributes are listed as follows: | |||||
* @verbatim | |||||
|Tensor | x | filter_compress | bias | offset_w | y | |||||
-----------|---------|---------|---------|----------|-------- | |||||
|Data Type | float16 | float16 | float16 | _ | float16 | |||||
| |---------|---------|---------|----------|-------- | |||||
| | float32 | float32 | float32 | _ | float32 | |||||
| |---------|---------|---------|----------|-------- | |||||
| | int8 | int8 | int32 | int8 | int32 | |||||
-----------|---------|---------|---------|----------|-------- | |||||
|Format | NCHW | NCHW | ND | ND | NCHW | |||||
| | NHWC | NHWC | | | NHWC | |||||
| | | HWCN | | | | |||||
@endverbatim | |||||
* It should be noted that the data types must correspond to each other, but the | |||||
* format does not need to . \n | |||||
*@par Attributes: | |||||
* @li strides: A list of 4 integers. Specifying the strides of the | |||||
* convolution along the height and width. The dimension order is determined | |||||
* by the data format of "x". By default the N and C dimensions are set to 1. | |||||
* @li pads: A list of 4 integers. Specifying the top, bottom, left and right | |||||
* padding. | |||||
* @li dilations: A list of 4 integers. Specifying the dilation rate to use | |||||
* for dilated convolution. Has the same dimension order and value as "strides". | |||||
* @li groups: Number of blocked connections from input channels to output | |||||
* channels. Input channels and output channels must both be divisible by | |||||
* "groups".Type is int32. | |||||
* @li offset_x: An optional integer for quantized convolution. Type is int32. | |||||
* Defaults to "0". | |||||
* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the | |||||
* data format of the input and output images. Type is string. | |||||
* Defaults to "NHWC". Reserved . \n | |||||
*@par Outputs: | |||||
* @li y: A 4D Tensor of output images . \n | |||||
*@par Restrictions: | |||||
*Warning: THIS FUNCTION IS DEPRECATED. | |||||
*/ | |||||
REG_OP(Conv2DCompress) | REG_OP(Conv2DCompress) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||||
.INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||||
.INPUT(filter_compress, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||||
.INPUT(compress_index, TensorType({DT_INT8})) | .INPUT(compress_index, TensorType({DT_INT8})) | ||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | ||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||||
.REQUIRED_ATTR(strides, ListInt) | .REQUIRED_ATTR(strides, ListInt) | ||||
.REQUIRED_ATTR(pads, ListInt) | .REQUIRED_ATTR(pads, ListInt) | ||||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | .ATTR(dilations, ListInt, {1, 1, 1, 1}) | ||||
@@ -158,18 +158,25 @@ REG_OP(Iou) | |||||
*@par Inputs: | *@par Inputs: | ||||
* Three inputs, including: | * Three inputs, including: | ||||
*@li ydiff: A 5HD gradient input of type float32. | *@li ydiff: A 5HD gradient input of type float32. | ||||
*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, | |||||
the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1". | |||||
*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n | |||||
*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" | |||||
indicates the number of ROIs, | |||||
the value "5" indicates the indexes of images where the ROIs are located, "x0", | |||||
"x1", "y0", and "y1". | |||||
*@li rois_n: An optional input, specifying the number of valid ROIs. This | |||||
parameter is reserved . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign. | *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign. | ||||
*@li pooled_width: A required attribute of type int, specifying the W dimension. | *@li pooled_width: A required attribute of type int, specifying the W dimension. | ||||
*@li pooled_height: A required attribute of type int, specifying the H dimension. | *@li pooled_height: A required attribute of type int, specifying the H dimension. | ||||
*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. | |||||
*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical | |||||
sampling frequency of each output. If this attribute is set to "0", the sampling frequency is | |||||
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n | |||||
*@li spatial_scale: A required attribute of type float, specifying the scaling | |||||
ratio of "features" to the original image. | |||||
*@li sample_num: An optional attribute of type int, specifying the horizontal | |||||
and vertical | |||||
sampling frequency of each output. If this attribute is set to "0", the | |||||
sampling frequency is | |||||
equal to the rounded up value of "rois", which is a floating point number. | |||||
Defaults to "2" . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". | *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features". | ||||
@@ -876,9 +883,7 @@ REG_OP(YoloV3DetectionOutputV2) | |||||
A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. | A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. | ||||
*@li imginfo: A float16, describing the image information including the required image height and width | *@li imginfo: A float16, describing the image information including the required image height and width | ||||
and the actual image height and width. | and the actual image height and width. | ||||
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. | |||||
[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] | |||||
is formed for the three Yolo outputs, respectively .It's a dynamic input. \n | |||||
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n | |||||
*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n | *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n | ||||
@@ -896,29 +896,7 @@ REG_OP(InstanceNormV2) | |||||
.ATTR(epsilon, Float, 0.00001) | .ATTR(epsilon, Float, 0.00001) | ||||
.OP_END_FACTORY_REG(InstanceNormV2) | .OP_END_FACTORY_REG(InstanceNormV2) | ||||
/** | |||||
*@brief Performs instance normalization for inference. | |||||
*@par Inputs:\n | |||||
* Five inputs, including: (NC1HWC0 supported) | |||||
*@li x: A Tensor of type float16 or float32. | |||||
*@li gamma: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling gamma. | |||||
*@li beta: A [N, C1, 1, 1, C0] Tensor of type float32, for the scaling beta. | |||||
*@li mean: A [N, C1, 1, 1, C0] ensor of type float32, for the mean. | |||||
*@li variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. | |||||
*@li variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. | |||||
*@par Outputs:\n | |||||
*y: A Tensor of type float16 or float32 for the normalized "x". | |||||
*batch_mean: A Tensor of type float32 for the result mean. | |||||
*batch_ variance: A Tensor of type float32 for the result variance. | |||||
*@attention Constraints: | |||||
*For Ascend 310, the result accuracy fails to reach 1<89> due to the square root instruction. | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use INInferV2 instead. | |||||
*/ | |||||
REG_OP(INInferV2D) | REG_OP(INInferV2D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
.OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT})) | .OPTIONAL_INPUT(gamma, TensorType({DT_FLOAT})) | ||||
@@ -931,6 +909,20 @@ REG_OP(INInferV2D) | |||||
.OUTPUT(batch_variance, TensorType({DT_FLOAT})) | .OUTPUT(batch_variance, TensorType({DT_FLOAT})) | ||||
.OP_END_FACTORY_REG(INInferV2D) | .OP_END_FACTORY_REG(INInferV2D) | ||||
/** | |||||
*@brief Performs instance normalization for inference of InHost part. | |||||
*@par Inputs:\n | |||||
* One input, including: (NC1HWC0 supported) | |||||
* variance: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance. | |||||
*@par Attributes: | |||||
* epsilon: An optional float32, specifying the small value added to | |||||
variance to avoid dividing by zero. Defaults to "0.00001" . \n | |||||
*@par Outputs:\n | |||||
* variance_sqrt: A [N, C1, 1, 1, C0] Tensor of type float32, for the variance_sqrt. | |||||
*/ | |||||
REG_OP(InHost) | REG_OP(InHost) | ||||
.INPUT(variance, TensorType({DT_FLOAT})) | .INPUT(variance, TensorType({DT_FLOAT})) | ||||
.OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | .OUTPUT(variance_sqrt, TensorType({DT_FLOAT})) | ||||
@@ -128,9 +128,6 @@ REG_OP(AvgPool) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator AvgPool3D. | * Compatible with the TensorFlow operator AvgPool3D. | ||||
* | |||||
* @par Restrictions: | |||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||||
*/ | */ | ||||
REG_OP(AvgPool3D) | REG_OP(AvgPool3D) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) | ||||
@@ -111,9 +111,6 @@ REG_OP(ApplyAdaMax) | |||||
* | * | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyAdaMax. | *Compatible with the TensorFlow operator ApplyAdaMax. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdaMax instead. | |||||
*/ | */ | ||||
REG_OP(ApplyAdaMaxD) | REG_OP(ApplyAdaMaxD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -352,9 +349,6 @@ REG_OP(ApplyMomentum) | |||||
* accum: A mutable tensor. Has the same type as input "accum". | * accum: A mutable tensor. Has the same type as input "accum". | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyMomentum. | *Compatible with the TensorFlow operator ApplyMomentum. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyMomentum instead. | |||||
*/ | */ | ||||
REG_OP(ApplyMomentumD) | REG_OP(ApplyMomentumD) | ||||
@@ -681,9 +675,6 @@ REG_OP(ApplyPowerSign) | |||||
* | * | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyPowerSign. | *Compatible with the TensorFlow operator ApplyPowerSign. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyPowerSign instead. | |||||
*/ | */ | ||||
REG_OP(ApplyPowerSignD) | REG_OP(ApplyPowerSignD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -804,9 +795,6 @@ REG_OP(ApplyAddSign) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator ApplyAddSign. | * Compatible with the TensorFlow operator ApplyAddSign. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAddSign instead. | |||||
*/ | */ | ||||
REG_OP(ApplyAddSignD) | REG_OP(ApplyAddSignD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -928,9 +916,6 @@ REG_OP(ApplyCenteredRMSProp) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyCenteredRMSPropD. | *Compatible with the TensorFlow operator ApplyCenteredRMSPropD. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyCenteredRMSProp instead. | |||||
*/ | */ | ||||
REG_OP(ApplyCenteredRMSPropD) | REG_OP(ApplyCenteredRMSPropD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1049,9 +1034,6 @@ REG_OP(ApplyAdagrad) | |||||
* | * | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyAdagrad. | *Compatible with the TensorFlow operator ApplyAdagrad. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagrad instead. | |||||
*/ | */ | ||||
REG_OP(ApplyAdagradD) | REG_OP(ApplyAdagradD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1236,9 +1218,6 @@ REG_OP(ApplyAdagradDA) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyAdagradDA. | *Compatible with the TensorFlow operator ApplyAdagradDA. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdagradDA instead. | |||||
*/ | */ | ||||
REG_OP(ApplyAdagradDAD) | REG_OP(ApplyAdagradDAD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1496,9 +1475,6 @@ REG_OP(ApplyProximalAdagrad) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyProximalAdagradD. | *Compatible with the TensorFlow operator ApplyProximalAdagradD. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyProximalAdagrad instead. | |||||
*/ | */ | ||||
REG_OP(ApplyProximalAdagradD) | REG_OP(ApplyProximalAdagradD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1592,9 +1568,6 @@ REG_OP(SparseApplyProximalAdagrad) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator SparseApplyProximalAdagrad. | *Compatible with the TensorFlow operator SparseApplyProximalAdagrad. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SparseApplyProximalAdagrad instead. | |||||
*/ | */ | ||||
REG_OP(SparseApplyProximalAdagradD) | REG_OP(SparseApplyProximalAdagradD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1681,9 +1654,6 @@ REG_OP(ApplyFtrl) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyFtrl. | *Compatible with the TensorFlow operator ApplyFtrl. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrl instead. | |||||
*/ | */ | ||||
REG_OP(ApplyFtrlD) | REG_OP(ApplyFtrlD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1775,9 +1745,6 @@ REG_OP(ApplyFtrlV2) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyFtrlV2. | *Compatible with the TensorFlow operator ApplyFtrlV2. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyFtrlV2 instead. | |||||
*/ | */ | ||||
REG_OP(ApplyFtrlV2D) | REG_OP(ApplyFtrlV2D) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1890,9 +1857,6 @@ REG_OP(ApplyAdam) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator ApplyAdam. | *Compatible with the TensorFlow operator ApplyAdam. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdam instead. | |||||
*/ | */ | ||||
REG_OP(ApplyAdamD) | REG_OP(ApplyAdamD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -1981,9 +1945,6 @@ REG_OP(ApplyAdadelta) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator ApplyAdadelta. | * Compatible with the TensorFlow operator ApplyAdadelta. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ApplyAdadelta instead. | |||||
*/ | */ | ||||
REG_OP(ApplyAdadeltaD) | REG_OP(ApplyAdadeltaD) | ||||
.INPUT(var, TensorType::NumberType()) | .INPUT(var, TensorType::NumberType()) | ||||
@@ -65,9 +65,6 @@ REG_OP(Fill) | |||||
* | * | ||||
*@par Outputs: | *@par Outputs: | ||||
* y: A tensor. Has the same type as "value". | * y: A tensor. Has the same type as "value". | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Fill instead. | |||||
*/ | */ | ||||
REG_OP(FillD) | REG_OP(FillD) | ||||
.INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, | .INPUT(value, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, | ||||
@@ -125,9 +122,6 @@ REG_OP(BroadcastTo) | |||||
* | * | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator BroadcastTo. | *Compatible with the TensorFlow operator BroadcastTo. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use BroadcastTo instead. | |||||
*/ | */ | ||||
REG_OP(BroadcastToD) | REG_OP(BroadcastToD) | ||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
@@ -175,9 +169,6 @@ REG_OP(Pad) | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with TensorFlow operator Pad. | * Compatible with TensorFlow operator Pad. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Pad instead. | |||||
*/ | */ | ||||
REG_OP(PadD) | REG_OP(PadD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) | ||||
@@ -269,9 +260,6 @@ REG_OP(PadV3D) | |||||
*@see Diag() | *@see Diag() | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator Diag. | * Compatible with the TensorFlow operator Diag. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Diag instead. | |||||
*/ | */ | ||||
REG_OP(DiagD) | REG_OP(DiagD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | ||||
@@ -30,7 +30,7 @@ namespace ge { | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
*@li rt_nested_splits: A list of at least 1 Tensor objects with the same type | *@li rt_nested_splits: A list of at least 1 Tensor objects with the same type | ||||
in: int32, int64. The row_splits for the RaggedTensor. It's a dynamic input. | |||||
in: int32, int64. The row_splits for the RaggedTensor. | |||||
*@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor | *@li rt_dense_values: A Tensor. The flat_values for the RaggedTensor | ||||
Must be one of the following types: bool, int8, int16, uint16, int32, | Must be one of the following types: bool, int8, int16, uint16, int32, | ||||
int64, double, float, float16 . \n | int64, double, float, float16 . \n | ||||
@@ -66,7 +66,7 @@ REG_OP(RaggedTensorToSparse) | |||||
*@li values:A 1D tensor representing the values of the ragged tensor. | *@li values:A 1D tensor representing the values of the ragged tensor. | ||||
*@li default_value:A `Tensor`. Must have the same type as `values`. | *@li default_value:A `Tensor`. Must have the same type as `values`. | ||||
*@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same | *@li row_partition_tensors:A list of at least 1 `Tensor` objects with the same | ||||
type in: `int64`, `int32` . It's a dynamic input.\n | |||||
type in: `int64`, `int32` .\n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li num_row_partition_tensors:Numbers of row partition tensors. | *@li num_row_partition_tensors:Numbers of row partition tensors. | ||||
@@ -374,9 +374,6 @@ REG_OP(DropOutGenMask) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator lin_space. | * Compatible with the TensorFlow operator lin_space. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use LinSpace instead. | |||||
*/ | */ | ||||
REG_OP(LinSpaceD) | REG_OP(LinSpaceD) | ||||
.INPUT(assist, TensorType({DT_FLOAT})) | .INPUT(assist, TensorType({DT_FLOAT})) | ||||
@@ -353,9 +353,6 @@ REG_OP(ReduceSum) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator Sum. | * Compatible with the TensorFlow operator Sum. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceSum instead. | |||||
*/ | */ | ||||
REG_OP(ReduceSumD) | REG_OP(ReduceSumD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -381,9 +378,6 @@ REG_OP(ReduceSumD) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator ReduceAll. | * Compatible with the TensorFlow operator ReduceAll. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAll instead. | |||||
*/ | */ | ||||
REG_OP(ReduceAllD) | REG_OP(ReduceAllD) | ||||
.INPUT(x, TensorType({DT_BOOL})) | .INPUT(x, TensorType({DT_BOOL})) | ||||
@@ -459,9 +453,6 @@ REG_OP(ReduceProd) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator ReduceProd. | * Compatible with the TensorFlow operator ReduceProd. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceProd instead. | |||||
*/ | */ | ||||
REG_OP(ReduceProdD) | REG_OP(ReduceProdD) | ||||
.INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16})) | .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16})) | ||||
@@ -516,9 +507,6 @@ REG_OP(ReduceMean) | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the TensorFlow operator ReduceMean. | * Compatible with the TensorFlow operator ReduceMean. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMean instead. | |||||
*/ | */ | ||||
REG_OP(ReduceMeanD) | REG_OP(ReduceMeanD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -573,9 +561,6 @@ REG_OP(ReduceMax) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with TensorFlow operator Max. | * Compatible with TensorFlow operator Max. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMax instead. | |||||
*/ | */ | ||||
REG_OP(ReduceMaxD) | REG_OP(ReduceMaxD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, | .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, | ||||
@@ -630,9 +615,6 @@ REG_OP(ReduceMin) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator reduce_min. | * Compatible with the TensorFlow operator reduce_min. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceMin instead. | |||||
*/ | */ | ||||
REG_OP(ReduceMinD) | REG_OP(ReduceMinD) | ||||
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) | ||||
@@ -699,9 +681,6 @@ REG_OP(ReduceAny) | |||||
* | * | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator reduce_any. | *Compatible with the TensorFlow operator reduce_any. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ReduceAny instead. | |||||
*/ | */ | ||||
REG_OP(ReduceAnyD) | REG_OP(ReduceAnyD) | ||||
.INPUT(x, TensorType({DT_BOOL})) | .INPUT(x, TensorType({DT_BOOL})) | ||||
@@ -787,9 +766,6 @@ REG_OP(EuclideanNorm) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator EuclideanNorm. | * Compatible with the TensorFlow operator EuclideanNorm. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use EuclideanNorm instead. | |||||
*/ | */ | ||||
REG_OP(EuclideanNormD) | REG_OP(EuclideanNormD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16})) | .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16})) | ||||
@@ -92,6 +92,7 @@ REG_OP(DynamicLSTM) | |||||
.OUTPUT(output_h, TensorType({DT_FLOAT32})) | .OUTPUT(output_h, TensorType({DT_FLOAT32})) | ||||
.OP_END_FACTORY_REG(DynamicLSTM) | .OP_END_FACTORY_REG(DynamicLSTM) | ||||
/** | /** | ||||
*@brief: DynamicRNNGrad calculation. | *@brief: DynamicRNNGrad calculation. | ||||
*@par Inputs: | *@par Inputs: | ||||
@@ -126,7 +127,7 @@ REG_OP(DynamicLSTM) | |||||
*@li keep_prob:An float identifying the keep prob in the op. Default to 1. | *@li keep_prob:An float identifying the keep prob in the op. Default to 1. | ||||
*@li cell_clip:An float identifying the cell clip in the op. Default to -1. | *@li cell_clip:An float identifying the cell clip in the op. Default to -1. | ||||
*@li num_proj:An integer identifying the num projection in the op. Default to 0. | *@li num_proj:An integer identifying the num projection in the op. Default to 0. | ||||
*@li time_major:An bool identifying the time major in the op. Default to false. | |||||
*@li time_major:An bool identifying the time major in the op. Default to true. | |||||
*@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. | *@li activation:An string identifying the type of activation function in the op. Default to "tanh". Only tanh is currently supported. | ||||
*@li forget_bias:An float identifying the forget bias in the op. Default to 0. | *@li forget_bias:An float identifying the forget bias in the op. Default to 0. | ||||
*@li is_training:An bool identifying is training in the op. Default to true. | *@li is_training:An bool identifying is training in the op. Default to true. | ||||
@@ -138,6 +139,9 @@ REG_OP(DynamicLSTM) | |||||
*@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dx:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dh_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | *@li dc_prev:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | ||||
*@li dwci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dwcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*@li dwco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. | |||||
*/ | */ | ||||
REG_OP(DynamicRNNGrad) | REG_OP(DynamicRNNGrad) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) | ||||
@@ -28,7 +28,7 @@ namespace ge { | |||||
/** | /** | ||||
*@brief Mark which tensors need to be saved to the ckpt file. | *@brief Mark which tensors need to be saved to the ckpt file. | ||||
*@par Inputs: | *@par Inputs: | ||||
*tensors: A list of input tensor.It's a dynamic input. | |||||
*tensors: A list of input tensor. | |||||
*@par Restrictions: | *@par Restrictions: | ||||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | ||||
*/ | */ | ||||
@@ -35,16 +35,16 @@ namespace ge { | |||||
*rate . \n | *rate . \n | ||||
*@par Inputs: | *@par Inputs: | ||||
*@li sparse_example_indices: a list of vectors which contain example indices.It's a dynamic input. | |||||
*@li sparse_feature_indices: a list of vectors which contain feature indices.It's a dynamic input. | |||||
*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group.It's a dynamic input. | |||||
*@li dense_features: a list of matrices which contains the dense feature values.It's a dynamic input. | |||||
*@li sparse_example_indices: a list of vectors which contain example indices. | |||||
*@li sparse_feature_indices: a list of vectors which contain feature indices. | |||||
*@li sparse_feature_values: a list of vectors which contains feature value associated with each feature group. | |||||
*@li dense_features: a list of matrices which contains the dense feature values. | |||||
*@li example_weights: a vector which contains the weight associated with each example. | *@li example_weights: a vector which contains the weight associated with each example. | ||||
*@li example_labels: a vector which contains the label/target associated with each example. | *@li example_labels: a vector which contains the label/target associated with each example. | ||||
*@li sparse_indices: a list of vectors where each value is the indices which has | *@li sparse_indices: a list of vectors where each value is the indices which has | ||||
*corresponding weights in sparse_weights. This field maybe omitted for the dense approach.It's a dynamic input. | |||||
*corresponding weights in sparse_weights. This field maybe omitted for the dense approach. | |||||
*@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group. | *@li sparse_weights: a list of vectors where each value is the weight associated with a sparse feature group. | ||||
*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group.It's a dynamic input. | |||||
*@li dense_weights: a list of vectors where the values are the weights associated with a dense feature group. | |||||
*@li example_state_data: a list of vectors containing the example state data. | *@li example_state_data: a list of vectors containing the example state data. | ||||
*@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. | *@li loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. | ||||
*@li l1: Symmetric l1 regularization strength. | *@li l1: Symmetric l1 regularization strength. | ||||
@@ -61,7 +61,6 @@ namespace ge { | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with tensorflow SdcaOptimizerV2 operator. | * Compatible with tensorflow SdcaOptimizerV2 operator. | ||||
*/ | */ | ||||
REG_OP(SdcaOptimizerV2) | REG_OP(SdcaOptimizerV2) | ||||
.DYNAMIC_INPUT(sparse_example_indices, TensorType({DT_INT64})) | .DYNAMIC_INPUT(sparse_example_indices, TensorType({DT_INT64})) | ||||
.DYNAMIC_INPUT(sparse_feature_indices, TensorType({DT_INT64})) | .DYNAMIC_INPUT(sparse_feature_indices, TensorType({DT_INT64})) | ||||
@@ -79,9 +79,6 @@ REG_OP(Range) | |||||
*@see Range() | *@see Range() | ||||
*@since V100R001C33 | *@since V100R001C33 | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Range instead. | |||||
*/ | */ | ||||
REG_OP(RangeD) | REG_OP(RangeD) | ||||
.INPUT(x, TensorType({DT_FLOAT,DT_INT32})) | .INPUT(x, TensorType({DT_FLOAT,DT_INT32})) | ||||
@@ -186,7 +183,8 @@ REG_OP(GatherNd) | |||||
* uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, | * uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, | ||||
* uint16, complex128, float16, uint32, uint64, complex64, complex128. | * uint16, complex128, float16, uint32, uint64, complex64, complex128. | ||||
* @li indices: A Tensor of type int32 or int64. | * @li indices: A Tensor of type int32 or int64. | ||||
* @li axis: A Tensor of type as int32 . \n | |||||
* @li axis: A Tensor of type as int32 or int64, | |||||
* Must be in the range [-rank(input_tensor), rank(input_tensor)) . \n | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x" . \n | *y: A Tensor. Has the same type as "x" . \n | ||||
@@ -225,9 +223,6 @@ REG_OP(GatherV2) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator GatherV2. | * Compatible with the TensorFlow operator GatherV2. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use GatherV2 instead. | |||||
*/ | */ | ||||
REG_OP(GatherV2D) | REG_OP(GatherV2D) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8, | .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8, | ||||
@@ -330,9 +325,6 @@ REG_OP(StridedSlice) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator StridedSlice. | * Compatible with the TensorFlow operator StridedSlice. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead. | |||||
*/ | */ | ||||
REG_OP(StridedSliceD) | REG_OP(StridedSliceD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, | .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, | ||||
@@ -388,9 +380,6 @@ REG_OP(StridedSliceD) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator StridedSliceGradD. | * Compatible with the TensorFlow operator StridedSliceGradD. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSliceGrad instead. | |||||
*/ | */ | ||||
REG_OP(StridedSliceGradD) | REG_OP(StridedSliceGradD) | ||||
.INPUT(dy, TensorType::BasicType()) | .INPUT(dy, TensorType::BasicType()) | ||||
@@ -502,9 +491,6 @@ REG_OP(UnsortedSegmentSum) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator UnsortedSegmentSum. | * Compatible with the TensorFlow operator UnsortedSegmentSum. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentSum instead. | |||||
*/ | */ | ||||
REG_OP(UnsortedSegmentSumD) | REG_OP(UnsortedSegmentSumD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8})) | .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8})) | ||||
@@ -729,9 +715,6 @@ REG_OP(OneHot) | |||||
*@par Third-party framework compatibility: | *@par Third-party framework compatibility: | ||||
* Compatible with the TensorFlow operator OneHot. | * Compatible with the TensorFlow operator OneHot. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use OneHot instead. | |||||
*/ | */ | ||||
REG_OP(OneHotD) | REG_OP(OneHotD) | ||||
.INPUT(x, TensorType({DT_UINT8, DT_INT32})) | .INPUT(x, TensorType({DT_UINT8, DT_INT32})) | ||||
@@ -807,7 +790,7 @@ REG_OP(SliceD) | |||||
* @li assist_seq: A 1D tensor of type float16. | * @li assist_seq: A 1D tensor of type float16. | ||||
* with size of 2N, which "N" is the last dimension. | * with size of 2N, which "N" is the last dimension. | ||||
* The first N numbers is indices, and the next N numbers is deviation of casting | * The first N numbers is indices, and the next N numbers is deviation of casting | ||||
* int32 to float16. \n | |||||
* float16 to int32 . \n | |||||
* @par Attributes: | * @par Attributes: | ||||
* @li k: A required int that is at least 0, specifying the number of top elements | * @li k: A required int that is at least 0, specifying the number of top elements | ||||
@@ -816,7 +799,7 @@ REG_OP(SliceD) | |||||
* If true, the resulting "k" elements will be sorted by the values in descending | * If true, the resulting "k" elements will be sorted by the values in descending | ||||
* order. | * order. | ||||
* @li dim: An optional int. Defaults to -1. For reserved use. | * @li dim: An optional int. Defaults to -1. For reserved use. | ||||
* @li largest: An optional bool. Defaults to true. For reserved use. \n | |||||
* @li largest: An optional bool. Defaults to true. For reserved use. | |||||
* @par Outputs: | * @par Outputs: | ||||
* @li values: A Tensor, specifying the sorted data. Has the same type as "input". | * @li values: A Tensor, specifying the sorted data. Has the same type as "input". | ||||
@@ -1270,9 +1253,6 @@ REG_OP(InplaceUpdate) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator InplaceUpdate. | *Compatible with the TensorFlow operator InplaceUpdate. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceUpdate instead. | |||||
*/ | */ | ||||
REG_OP(InplaceUpdateD) | REG_OP(InplaceUpdateD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | ||||
@@ -1325,9 +1305,6 @@ REG_OP(InplaceAdd) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator InplaceAdd. | *Compatible with the TensorFlow operator InplaceAdd. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceAdd instead. | |||||
*/ | */ | ||||
REG_OP(InplaceAddD) | REG_OP(InplaceAddD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | ||||
@@ -1379,9 +1356,6 @@ REG_OP(InplaceSub) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*Compatible with the TensorFlow operator InplaceSub. | *Compatible with the TensorFlow operator InplaceSub. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use InplaceSub instead. | |||||
*/ | */ | ||||
REG_OP(InplaceSubD) | REG_OP(InplaceSubD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | ||||
@@ -1433,9 +1407,6 @@ REG_OP(ScatterNonAliasingAdd) | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor of type RealNumberType . \n | * y: A Tensor of type RealNumberType . \n | ||||
* @attention Constraints: | |||||
* @li segment_ids must be non-negative tensor. | |||||
* @see UnsortedSegmentSum(), UnsortedSegmentProd(), | * @see UnsortedSegmentSum(), UnsortedSegmentProd(), | ||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
@@ -1463,9 +1434,6 @@ REG_OP(UnsortedSegmentMin) | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor.Must have the same type as input "x" . \n | * y: A Tensor.Must have the same type as input "x" . \n | ||||
* @attention Constraints: | |||||
* @li segment_ids must be non-negative tensor. | |||||
* @see UnsortedSegmentProdD(), UnsortedSegmentSumD(), | * @see UnsortedSegmentProdD(), UnsortedSegmentSumD(), | ||||
* | * | ||||
* @par Restrictions: | * @par Restrictions: | ||||
@@ -1491,9 +1459,6 @@ REG_OP(UnsortedSegmentMinD) | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor of type RealNumberType . \n | * y: A Tensor of type RealNumberType . \n | ||||
* @attention Constraints: | |||||
* @li segment_ids must be non-negative tensor. | |||||
* @see UnsortedSegmentSum(), UnsortedSegmentProd(), | * @see UnsortedSegmentSum(), UnsortedSegmentProd(), | ||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
@@ -1521,9 +1486,6 @@ REG_OP(UnsortedSegmentMax) | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor.Must have the same type as input "x" . \n | * y: A Tensor.Must have the same type as input "x" . \n | ||||
* @attention Constraints: | |||||
* @li segment_ids must be non-negative tensor. | |||||
* @see UnsortedSegmentProdD(), | * @see UnsortedSegmentProdD(), | ||||
* | * | ||||
* @par Restrictions: | * @par Restrictions: | ||||
@@ -1548,9 +1510,6 @@ REG_OP(UnsortedSegmentMaxD) | |||||
* @par Outputs: | * @par Outputs: | ||||
* y: A Tensor of type NumberType . \n | * y: A Tensor of type NumberType . \n | ||||
* @attention Constraints: | |||||
* @li segment_ids must be non-negative tensor. | |||||
* @see UnsortedSegmentSum(), UnsortedSegmentMin(), | * @see UnsortedSegmentSum(), UnsortedSegmentMin(), | ||||
* @par Third-party framework compatibility | * @par Third-party framework compatibility | ||||
@@ -1582,9 +1541,6 @@ REG_OP(UnsortedSegmentProd) | |||||
* @li segment_ids must be non-negative tensor. | * @li segment_ids must be non-negative tensor. | ||||
* @see UnsortedSegmentMinD() | * @see UnsortedSegmentMinD() | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use UnsortedSegmentProd instead. | |||||
*/ | */ | ||||
REG_OP(UnsortedSegmentProdD) | REG_OP(UnsortedSegmentProdD) | ||||
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) | .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) | ||||
@@ -1900,9 +1856,6 @@ REG_OP(CumulativeLogsumexp) | |||||
*y: A Tensor. Has the same type as "x". | *y: A Tensor. Has the same type as "x". | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator Cumsum. | * Compatible with the TensorFlow operator Cumsum. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use CumulativeLogsumexp instead. | |||||
*/ | */ | ||||
REG_OP(CumulativeLogsumexpD) | REG_OP(CumulativeLogsumexpD) | ||||
.INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16})) | .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16})) | ||||
@@ -75,9 +75,6 @@ REG_OP(Split) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator Split. | * Compatible with the TensorFlow operator Split. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use Split instead. | |||||
*/ | */ | ||||
REG_OP(SplitD) | REG_OP(SplitD) | ||||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | ||||
@@ -144,9 +141,6 @@ Under the caffe framework, the conversion of slice_point through the cut point t | |||||
Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect. | Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect. | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator SplitV. | * Compatible with the TensorFlow operator SplitV. | ||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SplitV instead. | |||||
*/ | */ | ||||
REG_OP(SplitVD) | REG_OP(SplitVD) | ||||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | ||||
@@ -164,8 +158,7 @@ REG_OP(SplitVD) | |||||
* Two inputs, including: | * Two inputs, including: | ||||
* @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, | * @li values: A list of Tensors. Must be one of the following types: int8, int16, int32, | ||||
* int64, uint8, uint16, uint32, uint64, float16, float32. | * int64, uint8, uint16, uint32, uint64, float16, float32. | ||||
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. | |||||
* It's a dynamic input. | |||||
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. | |||||
* @li shape: A Tensor of the same type as "x". | * @li shape: A Tensor of the same type as "x". | ||||
* The final shape of the result. Should be equal to the shapes of any input | * The final shape of the result. Should be equal to the shapes of any input | ||||
* but with the number of input values in the first dimension . \n | * but with the number of input values in the first dimension . \n | ||||
@@ -314,7 +307,7 @@ REG_OP(Concat) | |||||
*@par Inputs: | *@par Inputs: | ||||
* x: A list of N Tensors. Must be one of the following types: int8, int16, int32, | * x: A list of N Tensors. Must be one of the following types: int8, int16, int32, | ||||
* int64, uint8, uint16, uint32, uint64, float16, float32, bool . It's a dynamic input. \n | |||||
* int64, uint8, uint16, uint32, uint64, float16, float32, bool . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li axis: A optional int, defaultvalue is 0. | *@li axis: A optional int, defaultvalue is 0. | ||||
@@ -340,7 +333,7 @@ REG_OP(Pack) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
* @li concat_dim: A Tensor of type int32. | * @li concat_dim: A Tensor of type int32. | ||||
* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n | |||||
* @li x: A list of 1D Tensor objects of type int32 . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*N: A required int . \n | *N: A required int . \n | ||||
@@ -364,7 +357,7 @@ REG_OP(ConcatOffset) | |||||
*@par Inputs: | *@par Inputs: | ||||
*Two inputs, including: | *Two inputs, including: | ||||
* @li concat_dim: A Tensor of type int32. | * @li concat_dim: A Tensor of type int32. | ||||
* @li x: A list of 1D Tensor objects of type int32 . It's a dynamic input. \n | |||||
* @li x: A list of 1D Tensor objects of type int32 . \n | |||||
*@par Attributes: | *@par Attributes: | ||||
*@li Concat_dim: A required int. Must be within the rank of input "x". | *@li Concat_dim: A required int. Must be within the rank of input "x". | ||||
@@ -235,12 +235,8 @@ REG_OP(BatchToSpaceND) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor with format NC1HWC0. Has the same type as input "x". | *y: A Tensor with format NC1HWC0. Has the same type as input "x". | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator BatchToSpaceND. | * Compatible with the TensorFlow operator BatchToSpaceND. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpaceND instead. | |||||
*/ | */ | ||||
REG_OP(BatchToSpaceNDD) | REG_OP(BatchToSpaceNDD) | ||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
@@ -287,9 +283,6 @@ REG_OP(SpaceToBatchND) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator SpaceToBatchND. | * Compatible with the TensorFlow operator SpaceToBatchND. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatchND instead. | |||||
*/ | */ | ||||
REG_OP(SpaceToBatchNDD) | REG_OP(SpaceToBatchNDD) | ||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
@@ -411,9 +404,6 @@ REG_OP(BatchToSpace) | |||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
* Compatible with the TensorFlow operator BatchToSpace. | * Compatible with the TensorFlow operator BatchToSpace. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use BatchToSpace instead. | |||||
*/ | */ | ||||
REG_OP(BatchToSpaceD) | REG_OP(BatchToSpaceD) | ||||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, | .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, | ||||
@@ -467,9 +457,6 @@ REG_OP(SpaceToBatch) | |||||
*y: A Tensor. Has the same type as input "x". | *y: A Tensor. Has the same type as input "x". | ||||
*@par Third-party framework compatibility | *@par Third-party framework compatibility | ||||
*@ Compatible with the TensorFlow operator SpaceToBatch. | *@ Compatible with the TensorFlow operator SpaceToBatch. | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use SpaceToBatch instead. | |||||
*/ | */ | ||||
REG_OP(SpaceToBatchD) | REG_OP(SpaceToBatchD) | ||||
.INPUT(x, TensorType::BasicType()) | .INPUT(x, TensorType::BasicType()) | ||||
@@ -598,9 +585,6 @@ REG_OP(ExtractVolumePatches) | |||||
*@par Outputs: | *@par Outputs: | ||||
*y: A Tensor. Has the same type as "x". | *y: A Tensor. Has the same type as "x". | ||||
* | |||||
* @par Restrictions: | |||||
* Warning: THIS FUNCTION IS DEPRECATED. Please use ConfusionTranspose instead. | |||||
*/ | */ | ||||
REG_OP(ConfusionTransposeD) | REG_OP(ConfusionTransposeD) | ||||
.INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, | ||||
@@ -664,11 +648,6 @@ REG_OP(FlattenV2) | |||||
.ATTR(end_axis, Int, -1) | .ATTR(end_axis, Int, -1) | ||||
.OP_END_FACTORY_REG(FlattenV2) | .OP_END_FACTORY_REG(FlattenV2) | ||||
REG_OP(DeConvTrans) | |||||
.INPUT(x, TensorType({DT_INT8})) | |||||
.OUTPUT(y, TensorType({DT_INT8})) | |||||
.OP_END_FACTORY_REG(DeConvTrans) | |||||
/** | /** | ||||
*@brief Compress large weight to small one. Usually inserted before Conv2d. | *@brief Compress large weight to small one. Usually inserted before Conv2d. | ||||
* | * | ||||
@@ -19,7 +19,7 @@ | |||||
#include <stdint.h> | #include <stdint.h> | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -580,7 +580,8 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d | |||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); | RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -19,7 +19,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -185,7 +185,7 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType); | |||||
*/ | */ | ||||
RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -19,7 +19,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -149,7 +149,7 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint | |||||
*/ | */ | ||||
RTS_API rtError_t rtGetGroupCount(uint32_t *count); | RTS_API rtError_t rtGetGroupCount(uint32_t *count); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -19,7 +19,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -339,7 +339,7 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 | |||||
* @return RT_ERROR_NONE for ok | * @return RT_ERROR_NONE for ok | ||||
*/ | */ | ||||
RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); | RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -19,7 +19,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -56,7 +56,7 @@ RTS_API rtError_t rtUnsetDvfsProfile(); | |||||
*/ | */ | ||||
RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); | RTS_API rtError_t rtGetDvfsProfile(DvfsProfileMode *pmode); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -19,7 +19,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -229,7 +229,7 @@ RTS_API rtError_t rtNotifyGetAddrOffset(rtNotify_t notify, uint64_t *devAddrOffs | |||||
*/ | */ | ||||
RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); | RTS_API rtError_t rtSetIpcNotifyPid(const char *name, int32_t pid[], int num); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -20,7 +20,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#include "stream.h" | #include "stream.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -529,7 +529,7 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream); | |||||
* @return RT_ERROR_INVALID_VALUE for error input | * @return RT_ERROR_INVALID_VALUE for error input | ||||
*/ | */ | ||||
RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum); | RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -24,7 +24,7 @@ | |||||
#include "config.h" | #include "config.h" | ||||
#include "stream.h" | #include "stream.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -491,7 +491,7 @@ RTS_API rtError_t rtSetIpcMemPid(const char *name, int32_t pid[], int num); | |||||
*/ | */ | ||||
RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream); | RTS_API rtError_t rtRDMADBSend(uint32_t dbIndex, uint64_t dbInfo, rtStream_t stream); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -19,7 +19,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -430,7 +430,7 @@ rtError_t rtDebugRegister(rtModel_t model, uint32_t flag, const void *addr, uint | |||||
*/ | */ | ||||
RTS_API rtError_t rtDebugUnRegister(rtModel_t model); | RTS_API rtError_t rtDebugUnRegister(rtModel_t model); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -20,7 +20,7 @@ | |||||
#include "base.h" | #include "base.h" | ||||
#include "event.h" | #include "event.h" | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
extern "C" { | extern "C" { | ||||
#endif | #endif | ||||
@@ -188,7 +188,7 @@ RTS_API rtError_t rtStreamActive(rtStream_t active_stream, rtStream_t stream); | |||||
*/ | */ | ||||
RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr, | RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr, | ||||
uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType); | uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType); | ||||
#ifdef __cplusplus | |||||
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) | |||||
} | } | ||||
#endif | #endif | ||||
@@ -1,12 +1,18 @@ | |||||
/** | /** | ||||
* @file adx_datadump_server.h | |||||
* | |||||
* Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. | |||||
* | |||||
* This program is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
*/ | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef ADX_DATADUMP_SERVER_H | #ifndef ADX_DATADUMP_SERVER_H | ||||
#define ADX_DATADUMP_SERVER_H | #define ADX_DATADUMP_SERVER_H | ||||