Merge pull request !1938 from mindspore_ding/code_sync_0705tags/v1.3.0
@@ -161,6 +161,7 @@ Status ExceptionDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> &ex | |||
uint64_t proto_size = dump_data.ByteSizeLong(); | |||
std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]); | |||
GE_CHECK_NOTNULL(proto_msg); | |||
bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); | |||
if (!ret || proto_size == 0) { | |||
REPORT_INNER_ERROR("E19999", "Serialize proto to string fail"); | |||
@@ -22,6 +22,7 @@ | |||
#include "graph/load/graph_loader.h" | |||
#include "init/gelib.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
#include "model/ge_model.h" | |||
namespace { | |||
const uint32_t kDeviceListIndex = 3; | |||
@@ -42,6 +43,10 @@ const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = { | |||
{kProfCommandhandleFinalize, kProfilingFinalize}, | |||
{kProfCommandhandleModelSubscribe, kProfModelSubscribe}, | |||
{kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; | |||
const uint64_t kModelId = ge::INVALID_MODEL_ID; | |||
const uint16_t kStepStart = 0; | |||
const uint16_t kStepEnd = 1; | |||
} // namespace | |||
bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<string> &prof_config_params) { | |||
@@ -216,6 +221,36 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||
return ge::SUCCESS; | |||
} | |||
GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream) { | |||
return ge::SUCCESS; | |||
ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream) { | |||
static bool is_first_run = true; | |||
int32_t device_id = 0; | |||
rtError_t rt_ret = rtGetDevice(&device_id); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "[Get][LogicDeviceId]Failed, ret 0x%X", rt_ret); | |||
REPORT_CALL_ERROR("E19999", "Get logic device id failed, ret 0x%X", rt_ret); | |||
return ge::FAILED; | |||
} | |||
if (is_first_run && tag_id == kStepStart) { | |||
GE_CHK_STATUS_RET_NOLOG(ge::ProfilingManager::Instance().ProfileStepInfo(index_id, | |||
kModelId, | |||
tag_id, | |||
stream, | |||
device_id)); | |||
is_first_run = false; | |||
return ge::SUCCESS; | |||
} | |||
if (!is_first_run && tag_id == kStepEnd) { | |||
GE_CHK_STATUS_RET_NOLOG(ge::ProfilingManager::Instance().ProfileStepInfo(index_id, | |||
kModelId, | |||
tag_id, | |||
stream, | |||
device_id)); | |||
is_first_run = true; | |||
return ge::SUCCESS; | |||
} | |||
GELOGE(ge::FAILED, "Param tag_id:%u invalid when is_first_run is %d", tag_id, is_first_run); | |||
REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"value", "parameter", "reason"}), | |||
std::vector<std::string>({std::to_string(tag_id), "tag_id", | |||
"tag id must be 0 when first run, must be 1 when second run"})); | |||
return ge::FAILED; | |||
} |
@@ -13,15 +13,15 @@ | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "host_cpu_engine.h" | |||
#include "graph/common/omg_util.h" | |||
#include "ge_local_engine/engine/host_cpu_engine.h" | |||
#include "graph/utils/op_desc_utils.h" | |||
#include "graph/utils/tensor_adapter.h" | |||
#include "graph/utils/node_utils.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "register/op_kernel_registry.h" | |||
#include "register/host_cpu_context.h" | |||
#include "common/ge/ge_util.h" | |||
#include "common/ge/plugin_manager.h" | |||
#include "graph/utils/type_utils.h" | |||
#include "common/fp16_t.h" | |||
#include "common/math/math_util.h" | |||
@@ -123,10 +123,7 @@ bool HostCpuEngine::CheckSupported(const string &op_type) { | |||
} | |||
Status HostCpuEngine::FindOpKernel(const ge::NodePtr &node, std::unique_ptr<HostCpuOp> &op_kernel) { | |||
std::string op_type; | |||
auto status = GetOriginalType(node, op_type); | |||
GE_CHK_BOOL_EXEC_NOLOG(status == SUCCESS, return status); | |||
const std::string op_type = NodeUtils::GetNodeType(node); | |||
auto kernel = OpKernelRegistry::GetInstance().CreateHostCpuOp(op_type); | |||
if (kernel == nullptr) { | |||
GELOGD("Op of type %s is not supported by host cpu engine", op_type.c_str()); | |||
@@ -85,7 +85,7 @@ bool LabelGotoTask::Distribute() { | |||
return false; | |||
} | |||
rt_ret = rtLabelListCpy((void**)label_list.data(), label_list.size(), label_info_, label_info_size); | |||
rt_ret = rtLabelListCpy(reinterpret_cast<void**>(label_list.data()), label_list.size(), label_info_, label_info_size); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret); | |||
return false; | |||
@@ -707,7 +707,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||
if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { | |||
GE_CHECK_NOTNULL(kernel_buffer.GetData()); | |||
std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); | |||
tbe_kernel = std::make_shared<OpKernelBin>(kernel_name, std::move(data)); | |||
tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data)); | |||
GE_CHECK_NOTNULL(tbe_kernel); | |||
GELOGI("Node [%s][%s] start recovery extra attr %s from %s", node_op_desc->GetName().c_str(), | |||
node_op_desc->GetType().c_str(), ge::OP_EXTATTR_NAME_TBE_KERNEL, ATTR_NAME_TBE_KERNEL_NAME.c_str()); | |||
@@ -793,7 +793,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
GELOGI("Start AutoFindBpOpIndex"); | |||
NodePtr bp_node = nullptr; | |||
uint32_t current_idx = 0; | |||
uint32_t netoutput_idx = 0; | |||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
@@ -811,7 +810,6 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | |||
if (bp_node == nullptr) { | |||
bp_node = node; | |||
netoutput_idx = current_idx - 1; | |||
} | |||
} | |||
if (graph->GetNeedIteration()) { | |||
@@ -836,34 +834,30 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
if (bp_node == nullptr) { | |||
GELOGW("not find bp_node."); | |||
return SUCCESS; | |||
} else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) { | |||
profiling_point.bp_index = netoutput_idx; | |||
GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx); | |||
} else { | |||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||
} | |||
return SUCCESS; | |||
return FindLastBpFromBpNode(graph, bp_node, profiling_point.bp_index); | |||
} | |||
uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const { | |||
uint32_t last_bp = 0; | |||
Status TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &target_node, | |||
uint32_t &bp_index) const { | |||
bp_index = 0; | |||
auto target_desc = target_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(target_desc); | |||
OpDescPtr bp_op_desc = nullptr; | |||
for (auto &in_anchor : bp_node->GetAllInDataAnchors()) { | |||
auto out_anchor = in_anchor->GetPeerOutAnchor(); | |||
if (out_anchor == nullptr || out_anchor->GetOwnerNode() == nullptr) { | |||
continue; | |||
} | |||
auto out_node_desc = out_anchor->GetOwnerNode()->GetOpDesc(); | |||
GE_CHECK_NOTNULL(out_node_desc); | |||
if (bp_op_desc == nullptr || ((out_node_desc->GetId()) > (bp_op_desc->GetId()))) { | |||
bp_op_desc = out_node_desc; | |||
for (auto &in_node : target_node->GetInAllNodes()) { | |||
GE_CHECK_NOTNULL(in_node); | |||
auto in_node_desc = in_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(in_node_desc); | |||
if ((bp_op_desc == nullptr || (in_node_desc->GetId() > bp_op_desc->GetId())) && | |||
(in_node_desc->GetStreamId() == target_desc->GetStreamId())){ | |||
bp_op_desc = in_node_desc; | |||
} | |||
GELOGI("bp_op_desc is %s, id is %ld", bp_op_desc->GetName().c_str(), bp_op_desc->GetId()); | |||
} | |||
if (bp_op_desc == nullptr) { | |||
return last_bp; | |||
GELOGI("Did not find bp node."); | |||
return SUCCESS; | |||
} | |||
uint32_t current_idx = 0; | |||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||
@@ -871,12 +865,14 @@ uint32_t TaskGenerator::FindLastBpFromBpNode(const ComputeGraphPtr &graph, const | |||
GE_CHECK_NOTNULL(op_desc); | |||
current_idx++; | |||
if (op_desc->GetName() == bp_op_desc->GetName()) { | |||
last_bp = current_idx; | |||
GELOGI("First bp name %s, idx %u", op_desc->GetName().c_str(), last_bp); | |||
bp_index = current_idx; | |||
GELOGI("Find bp name %s, idx %u", op_desc->GetName().c_str(), bp_index); | |||
break; | |||
} | |||
} | |||
return last_bp; | |||
GELOGI("Last bp node[%s], type[%s], index[%u], stream id[%ld]", bp_op_desc->GetName().c_str(), | |||
bp_op_desc->GetType().c_str(), bp_index, bp_op_desc->GetStreamId()); | |||
return SUCCESS; | |||
} | |||
Status TaskGenerator::FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | |||
@@ -116,7 +116,7 @@ class TaskGenerator { | |||
Status AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point) const; | |||
Status AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | |||
vector<uint32_t> &all_reduce_nodes) const; | |||
uint32_t FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node) const; | |||
Status FindLastBpFromBpNode(const ComputeGraphPtr &graph, const NodePtr &bp_node, uint32_t &bp_index) const; | |||
Status FindFpOfEnv(const ComputeGraphPtr &graph, const std::string &fp_point_str, | |||
ProfilingPoint &profiling_point) const; | |||
@@ -1378,7 +1378,9 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ | |||
Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | |||
GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str()); | |||
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | |||
if (cust_aicpu_so_.size() == 0) return SUCCESS; | |||
if (cust_aicpu_so_.empty()) { | |||
return SUCCESS; | |||
} | |||
// get current context | |||
rtContext_t rt_cur_ctx = nullptr; | |||
auto rt_error = rtCtxGetCurrent(&rt_cur_ctx); | |||
@@ -1394,9 +1396,19 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | |||
return SUCCESS; | |||
} | |||
rtStream_t stream = nullptr; | |||
vector<void *> allocated_mem; | |||
std::function<void()> callback = [&]() { | |||
for (auto mem : allocated_mem) { | |||
GE_CHK_RT(rtFree(mem)); | |||
} | |||
if (stream != nullptr) { | |||
GE_CHK_RT(rtStreamDestroy(stream)); | |||
} | |||
}; | |||
GE_MAKE_GUARD(release, callback); | |||
rtError_t status; | |||
rtStream_t stream = nullptr; | |||
vector<CustAicpuSoBuf> v_cust_so; | |||
void *args = nullptr; | |||
@@ -1471,13 +1483,6 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | |||
GELOGE(RT_FAILED, "[Call][RtStreamSynchronize] fail, ret = 0x%X", status); | |||
return RT_ERROR_TO_GE_STATUS(status); | |||
} | |||
std::function<void()> callback = [&]() { | |||
for (auto mem : allocated_mem) { | |||
GE_CHK_RT(rtFree(mem)); | |||
} | |||
GE_CHK_RT(rtStreamDestroy(stream)); | |||
}; | |||
GE_MAKE_GUARD(release, callback); | |||
GELOGI("Cpu kernel launch task success."); | |||
return SUCCESS; | |||
} | |||
@@ -645,6 +645,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
GE_CHECK_NOTNULL(op_desc); | |||
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
GE_CHECK_NOTNULL(args_addr); | |||
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
if (sec_ret != EOK) { | |||
REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X", args_size_, sec_ret); | |||
@@ -1000,6 +1001,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
// copy args to new host memory | |||
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||
GE_CHECK_NOTNULL(args_addr); | |||
GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | |||
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | |||
if (sec_ret != EOK) { | |||
@@ -3139,10 +3139,10 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
} | |||
// Avoid repeatively prerun for graphs owns same graph_id in online inference concurrency | |||
if (count > 1 && graph_node->GetBuildFlag()) { | |||
graph_node->Lock(); | |||
GELOGD("Avoid repeatively prerun, graph_id:%u.", args.graph_id); | |||
// In online inference concurrency senario, graph_node is allowed to be locked for 'count' times | |||
graph_node->SetSemSize(count); | |||
graph_node->Lock(); | |||
graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context, | |||
args.input_tensor, graph_node->GetGeRootModel(), GetThreadLocalContext(), args.callback })); | |||
GELOGI("[PreRunThread] Loop end. Start to run with cached build model."); | |||
@@ -284,9 +284,6 @@ Status DynamicShapePartitioner::InitClusters() { | |||
auto cluster = MakeShared<Cluster>(rank++, type, node, this); | |||
REQUIRE_NOT_NULL(cluster, "[New][Memory] for cluster failed."); | |||
node_2_cluster_[node] = cluster; | |||
if (cluster->IsUnknownShape()) { | |||
ordered_cluster_.push_back(cluster); | |||
} | |||
int64_t group_index = -1; | |||
if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index)) { | |||
@@ -306,7 +303,7 @@ Status DynamicShapePartitioner::InitClusters() { | |||
return SUCCESS; | |||
} | |||
Status DynamicShapePartitioner::TopologicalSortClusters() { | |||
Status DynamicShapePartitioner::TopologicalSortClusters(const OrderedFilter &ordered_filter) { | |||
ordered_cluster_.clear(); | |||
// BFS topological sort clusters for known shape cluster | |||
std::queue<ClusterPtr> ready_clusters; | |||
@@ -331,7 +328,7 @@ Status DynamicShapePartitioner::TopologicalSortClusters() { | |||
auto cluster = ready_clusters.front(); | |||
ready_clusters.pop(); | |||
cluster->UpdateRank(rank++); | |||
if (cluster->IsKnownShape() || cluster->IsInputNode()) { | |||
if (ordered_filter == nullptr || ordered_filter(cluster)) { | |||
ordered_cluster_.push_back(cluster); | |||
} | |||
for (const auto &out_cluster : cluster->Outputs()) { | |||
@@ -378,7 +375,6 @@ void DynamicShapePartitioner::MergeClustersControlFlow() { | |||
continue; | |||
} | |||
bool is_unknown_cluster = cluster->IsUnknownShape(); | |||
for (++rit; rit != control_cluster.rend(); ++rit) { | |||
const auto &cluster_from = *rit; | |||
if (all_merged_clusters.count(cluster_from) > 0) { | |||
@@ -395,11 +391,6 @@ void DynamicShapePartitioner::MergeClustersControlFlow() { | |||
} | |||
} | |||
} | |||
if (!is_unknown_cluster && cluster->IsUnknownShape()) { | |||
GELOGD("Add to ordered cluster: %s", cluster->DebugString().c_str()); | |||
ordered_cluster_.push_back(cluster); | |||
} | |||
} | |||
} | |||
@@ -475,9 +466,19 @@ void DynamicShapePartitioner::MergeClustersInputData() { | |||
} | |||
Status DynamicShapePartitioner::MergeClusters() { | |||
const auto filter_known = [](const ClusterPtr &cluster) { | |||
return cluster->IsKnownShape() || cluster->IsInputNode(); | |||
}; | |||
const auto filter_unknown = [](const ClusterPtr &cluster) { | |||
return cluster->IsUnknownShape(); | |||
}; | |||
MergeClustersControlFlow(); | |||
REQUIRE_SUCCESS(TopologicalSortClusters(filter_unknown), | |||
"[TopologicalSort][Clusters] after merge control flow clusters failed."); | |||
MergeClustersUnknownShape(); | |||
REQUIRE_SUCCESS(TopologicalSortClusters(), "[TopologicalSort][Clusters] after merge unknown shape clusters failed."); | |||
REQUIRE_SUCCESS(TopologicalSortClusters(filter_known), | |||
"[TopologicalSort][Clusters] after merge unknown shape clusters failed."); | |||
MergeClustersKnownShape(); | |||
MergeClustersInputData(); | |||
return SUCCESS; | |||
@@ -111,6 +111,8 @@ class DynamicShapePartitioner { | |||
Status Partition(); | |||
using OrderedFilter = std::function<bool(const std::shared_ptr<Cluster> &cluster)>; | |||
private: | |||
Status PartitionImpl(); | |||
// Collect nodes that satisfy the unknowshape rules: | |||
@@ -138,7 +140,7 @@ class DynamicShapePartitioner { | |||
// Merge clusters step3 | |||
void MergeClustersInputData(); | |||
// Topological sort clusters after merge unknown shape clusters. | |||
Status TopologicalSortClusters(); | |||
Status TopologicalSortClusters(const OrderedFilter &ordered_filter); | |||
// Deduplicate merged clusters | |||
void PruneUniqueClusters(); | |||
// Establish the input-output anchors for each partition of the cluster and record links to other clusters | |||
@@ -16,8 +16,6 @@ | |||
#include "mark_force_unknown_for_cond_pass.h" | |||
#include <queue> | |||
#include "graph/utils/node_utils.h" | |||
#include "graph/common/omg_util.h" | |||
@@ -26,17 +24,7 @@ namespace { | |||
inline bool IsMergeInLoop(const NodePtr &node) { | |||
const static std::set<std::string> kLoopMergeInputs{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION }; | |||
std::string node_type; | |||
(void)GetOriginalType(node, node_type); | |||
return kLoopMergeInputs.count(node_type) > 0; | |||
} | |||
inline bool IsSwitchInLoop(const NodePtr &node) { | |||
const static std::set<std::string> kLoopSwitchInputs{ MERGE, REFMERGE, LOOPCOND }; | |||
std::string node_type; | |||
(void)GetOriginalType(node, node_type); | |||
return kLoopSwitchInputs.count(node_type) > 0; | |||
return kLoopMergeInputs.count(NodeUtils::GetNodeType(node)) > 0; | |||
} | |||
} | |||
@@ -44,10 +32,7 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||
GELOGD("MarkForceUnknownForCondPass Enter"); | |||
std::map<NodePtr, std::vector<NodePtr>> switch_groups; | |||
for (const auto &node : graph->GetDirectNode()) { | |||
std::string node_type; | |||
GE_CHK_STATUS_RET(GetOriginalType(node, node_type), | |||
"[Get][OriginalType] of node in graph:%s failed.", graph->GetName().c_str()); | |||
if (kMergeOpTypes.count(node_type) == 0) { | |||
if (kMergeOpTypes.count(NodeUtils::GetNodeType(node)) == 0) { | |||
continue; | |||
} | |||
@@ -65,6 +50,51 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||
} | |||
/// | |||
/// @brief Deal with Switch node for LoopCond | |||
/// @param [in] Switch node | |||
/// @param [in] dest span | |||
/// @param [out] Search queue | |||
/// @return true: Switch In while loop / false: Not in while Loop. | |||
/// | |||
bool MarkForceUnknownForCondPass::DealAsLoopSwitch(const NodePtr &node, uint32_t dst_span, | |||
std::queue<std::pair<NodePtr, uint32_t>> &search_queue) { | |||
/// LoopCond --->\. | |||
/// \. | |||
/// Enter-----------+ \. | |||
/// +--> Merge --> Switch --> Exit | |||
/// NextIteration---+ | |||
const auto is_loop_op = [](const NodePtr &n) { | |||
return NodeUtils::GetNodeType(n) == LOOPCOND; | |||
}; | |||
const auto is_exit_op = [](const NodePtr &n) { | |||
return kExitOpTypes.count(NodeUtils::GetNodeType(n)) > 0; | |||
}; | |||
const auto src_nodes = node->GetInAllNodes(); | |||
const auto dst_nodes = node->GetOutAllNodes(); | |||
if (std::none_of(src_nodes.begin(), src_nodes.end(), is_loop_op) && | |||
std::none_of(dst_nodes.begin(), dst_nodes.end(), is_exit_op)) { | |||
return false; | |||
} | |||
for (const auto &m : src_nodes) { | |||
if (kMergeOpTypes.count(NodeUtils::GetNodeType(m)) > 0) { | |||
for (const auto &n : m->GetInAllNodes()) { | |||
if (kNextIterationOpTypes.count(NodeUtils::GetNodeType(n)) > 0) { | |||
continue; | |||
} | |||
search_queue.push({n, dst_span}); | |||
GELOGD("Travel in Loop: %s <-- %s <-- %s, span is: %u", node->GetName().c_str(), m->GetName().c_str(), | |||
n->GetName().c_str(), dst_span); | |||
} | |||
} | |||
} | |||
return true; | |||
} | |||
/// | |||
/// @brief Mark force unknown shape for Switch node | |||
/// @param [in] merge node | |||
/// @param [out] switch group | |||
@@ -72,6 +102,7 @@ Status MarkForceUnknownForCondPass::Run(ComputeGraphPtr graph) { | |||
/// | |||
void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std::vector<NodePtr> &switch_group) { | |||
// Switch --> {Switch --> Merge} --> Merge | |||
GELOGD("Search Switch node for Merge: %s", node->GetName().c_str()); | |||
std::unordered_set<NodePtr> nodes_seen; | |||
std::queue<std::pair<NodePtr, uint32_t>> search_queue({{node, 0}}); | |||
while (!search_queue.empty()) { | |||
@@ -79,43 +110,25 @@ void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std: | |||
const auto dst_span = search_queue.front().second; | |||
search_queue.pop(); | |||
// Switch --> Identity --> Constant | |||
for (const auto &in_node : dst_node->GetInControlNodes()) { | |||
if (nodes_seen.count(in_node) > 0) { | |||
GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str()); | |||
continue; | |||
} | |||
nodes_seen.insert(in_node); | |||
if (in_node->GetType() == IDENTITY) { | |||
GELOGD("Travel node: %s, In control: %s, span is: %u", dst_node->GetName().c_str(), | |||
in_node->GetName().c_str(), dst_span); | |||
search_queue.push({in_node, dst_span}); | |||
} | |||
} | |||
for (const auto &in_node : dst_node->GetInDataNodes()) { | |||
for (const auto &in_node : dst_node->GetInAllNodes()) { | |||
if (nodes_seen.count(in_node) > 0) { | |||
GELOGD("Travel node: %s, Skip already seen node: %s", dst_node->GetName().c_str(), in_node->GetName().c_str()); | |||
continue; | |||
} | |||
nodes_seen.insert(in_node); | |||
std::string node_type; | |||
(void)GetOriginalType(in_node, node_type); | |||
const std::string node_type = NodeUtils::GetNodeType(in_node); | |||
GELOGD("Travel node: %s, %s node: %s, span is: %u", dst_node->GetName().c_str(), node_type.c_str(), | |||
in_node->GetName().c_str(), dst_span); | |||
if (kSwitchOpTypes.count(node_type) > 0) { // Switch input node. | |||
if (DealAsLoopSwitch(in_node, dst_span, search_queue)) { | |||
continue; | |||
} | |||
if (dst_span > 0) { | |||
search_queue.push({in_node, dst_span - 1}); | |||
} else { | |||
const auto &all_in_nodes = in_node->GetInDataNodes(); | |||
if (std::any_of(all_in_nodes.begin(), all_in_nodes.end(), IsSwitchInLoop)) { | |||
GELOGW("Travel node: %s, %s node: %s, Skip LoopCond switch", dst_node->GetName().c_str(), node_type.c_str(), | |||
in_node->GetName().c_str()); | |||
} else { | |||
switch_group.emplace_back(in_node); | |||
} | |||
switch_group.emplace_back(in_node); | |||
} | |||
} else if (kMergeOpTypes.count(node_type) > 0) { // Merge input node. | |||
search_queue.push({in_node, dst_span + 1}); | |||
@@ -19,6 +19,8 @@ | |||
#include "inc/graph_pass.h" | |||
#include <queue> | |||
namespace ge { | |||
class MarkForceUnknownForCondPass : public GraphPass { | |||
public: | |||
@@ -26,6 +28,15 @@ class MarkForceUnknownForCondPass : public GraphPass { | |||
private: | |||
/// | |||
/// @brief Deal with Switch node for LoopCond | |||
/// @param [in] Switch node | |||
/// @param [in] dest span | |||
/// @param [out] Search queue | |||
/// @return true: Switch In while loop / false: Not in while Loop. | |||
/// | |||
bool DealAsLoopSwitch(const NodePtr &node, uint32_t dst_span, std::queue<std::pair<NodePtr, uint32_t>> &search_queue); | |||
/// | |||
/// @brief Mark force unknown shape for Switch node | |||
/// @param [in] merge node | |||
/// @param [out] switch group | |||
@@ -24,7 +24,9 @@ using std::string; | |||
namespace ge { | |||
namespace { | |||
const int64_t kLoopType = 1; | |||
constexpr int64_t kLoopType = 1; | |||
constexpr uint8_t kMaxTransOp = 3; | |||
constexpr uint8_t kTransOpIoSize = 1; | |||
} | |||
Status NextIterationPass::Run(ComputeGraphPtr graph) { | |||
@@ -287,18 +289,25 @@ void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group, i | |||
std::string node_type; | |||
for (const auto &switch_node : loop_group.switch_nodes) { | |||
SetControlFlowGroup(switch_node, group_index); | |||
for (const auto &node : switch_node->GetOutDataNodes()) { | |||
(void)GetOriginalType(node, node_type); | |||
if (kExitOpTypes.count(node_type) > 0) { | |||
SetControlFlowGroup(node, group_index); | |||
} else { | |||
// For: Switch -> Cast -> Exit | |||
for (const auto &n : node->GetOutDataNodes()) { | |||
(void)GetOriginalType(n, node_type); | |||
if (kExitOpTypes.count(node_type) > 0) { | |||
SetControlFlowGroup(n, group_index); | |||
} | |||
for (auto node : switch_node->GetOutDataNodes()) { | |||
// Switch --> Exit | |||
// Switch --> Cast --> Exit | |||
// Switch --> TransData --> Cast --> Exit | |||
for (uint8_t i = 0; i < kMaxTransOp; ++i) { | |||
if (node->GetInDataNodes().size() != kTransOpIoSize || node->GetAllOutDataAnchorsSize() != kTransOpIoSize) { | |||
break; | |||
} | |||
if (kExitOpTypes.count(NodeUtils::GetNodeType(node)) > 0) { | |||
SetControlFlowGroup(node, group_index); | |||
break; | |||
} | |||
const auto &all_nodes = node->GetOutAllNodes(); | |||
if (all_nodes.size() != kTransOpIoSize) { | |||
break; | |||
} | |||
node = all_nodes.at(0); | |||
} | |||
} | |||
} | |||
@@ -15,7 +15,7 @@ | |||
*/ | |||
#include "graph/passes/parallel_group_pass.h" | |||
#include <queue> | |||
#include "framework/common/debug/ge_log.h" | |||
#include "common/ge/ge_util.h" | |||
#include "framework/common/ge_inner_error_codes.h" | |||
@@ -299,24 +299,19 @@ Status ParallelGroupPass::ReplaceWithSwitchAndMerge(NodePtr pre_node, NodePtr cu | |||
for (const auto &switch_node : cur_itr->second.first) { | |||
int64_t pre_id = pre_node->GetOpDesc()->GetId(); | |||
int64_t switch_id = switch_node->GetOpDesc()->GetId(); | |||
// avoid ring | |||
if (pre_id > switch_id) { | |||
auto merge_node = cur_itr->second.second; | |||
if (AddCtrlEdge(merge_node, pre_node) != SUCCESS) { | |||
GELOGE(FAILED, "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
REPORT_CALL_ERROR("E19999", "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} else { | |||
if (AddCtrlEdge(pre_node, switch_node) != SUCCESS) { | |||
GELOGE(FAILED, "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
REPORT_CALL_ERROR("E19999", "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
pre_node->GetName().c_str(), switch_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
NodePtr first_node = pre_node; | |||
NodePtr second_node = switch_node; | |||
if (pre_id > switch_id && IsIndirectConnect(switch_node, pre_node)) { | |||
// avoid ring, merge->pre_node | |||
first_node = cur_itr->second.second; | |||
second_node = pre_node; | |||
} | |||
if (AddCtrlEdge(first_node, second_node) != SUCCESS) { | |||
GELOGE(FAILED, "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
first_node->GetName().c_str(), second_node->GetName().c_str()); | |||
REPORT_CALL_ERROR("E19999", "[AddEdge][Node]Add edge for nodes: %s->%s failed.", | |||
first_node->GetName().c_str(), second_node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
} else { | |||
@@ -345,4 +340,29 @@ bool ParallelGroupPass::IsWhileStreamSwitch(OpDescPtr switch_op_desc) { | |||
return (AttrUtils::GetInt(switch_op_desc, ATTR_NAME_STREAM_SWITCH_TYPE, stream_switch_type) && | |||
stream_switch_type == kLoopType); | |||
} | |||
bool ParallelGroupPass::IsIndirectConnect(const NodePtr &node_a, const NodePtr &node_b) { | |||
if (node_a == nullptr || node_b == nullptr) { | |||
GELOGW("node_a or node_b is nullptr."); | |||
return false; | |||
} | |||
int64_t end_id = node_b->GetOpDesc()->GetId(); | |||
std::queue<NodePtr> nodes; | |||
nodes.push(node_a); | |||
while (!nodes.empty()) { | |||
NodePtr tmp_node = nodes.front(); | |||
nodes.pop(); | |||
if (tmp_node == nullptr || tmp_node->GetOpDesc() == nullptr || | |||
tmp_node->GetOpDesc()->GetId() > end_id) { | |||
continue; | |||
} | |||
if (tmp_node == node_b) { | |||
return true; | |||
} | |||
for (const auto &out_node : tmp_node->GetOutAllNodes()) { | |||
nodes.push(out_node); | |||
} | |||
} | |||
return false; | |||
} | |||
} // namespace ge |
@@ -48,6 +48,7 @@ class ParallelGroupPass : public GraphPass { | |||
bool IsBigSmallLoopStreamSwitch(OpDescPtr switch_op_desc); | |||
bool IsWhileStreamSwitch(OpDescPtr switch_op_desc); | |||
bool IsIndirectConnect(const NodePtr &node_a, const NodePtr &node_b); | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_PASSES_PARALLEL_GROUP_PASS_H |
@@ -395,8 +395,9 @@ NodePtr SwitchToStreamSwitchPass::CreateStreamSwitchNode(const ComputeGraphPtr & | |||
peer_cond_anchor->GetOwnerNode()->GetName().c_str(), stream_switch->GetName().c_str()); | |||
int64_t group_index = -1; | |||
(void)AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); | |||
SetControlFlowGroup(stream_switch, group_index); | |||
if (AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index)) { | |||
SetControlFlowGroup(stream_switch, group_index); | |||
} | |||
return stream_switch; | |||
} | |||
@@ -568,6 +568,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std: | |||
} | |||
std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | |||
GE_CHECK_NOTNULL(aipp_params); | |||
ge::GeAttrValue::NAMED_ATTRS aipp_attr; | |||
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||
"[Get][Attr] %s from op:%s failed", ATTR_NAME_AIPP.c_str(), data_op->GetName().c_str()); | |||
@@ -1206,7 +1206,7 @@ Status MultiBatchGraphCopyer::CheckCopyResult(const std::vector<NodePtr> &start_ | |||
auto dims = NodeUtils::GetOutputDesc(*node, kDataOutIndex).GetShape().GetDims(); | |||
if (!IsAllDimsPositive(dims)) { | |||
REPORT_CALL_ERROR("E19999", "Failed to copy multi batch graph, the node %s still has unknown shape %s", | |||
node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||
node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||
GELOGE(INTERNAL_ERROR, "[Check][Param] Failed to copy multi batch graph, the node %s still has unknown shape %s", | |||
node->GetName().c_str(), formats::ShapeToString(dims).c_str()); | |||
return INTERNAL_ERROR; | |||
@@ -45,6 +45,7 @@ Status FillKernel::Compute(const ge::OpDescPtr op_desc_ptr, const std::vector<ge | |||
GELOGE(PARAM_INVALID, "Parameter's invalid, Input opDescPtr is nullptr."); | |||
return PARAM_INVALID; | |||
} | |||
GELOGD("FillKernel in, name: %s.", op_desc_ptr->GetName().c_str()); | |||
GE_CHECK_NOTNULL(input.at(kFillDimsInputIndex)); | |||
GE_CHECK_NOTNULL(input.at(kFillDataInputIndex)); | |||
@@ -57,6 +58,13 @@ Status FillKernel::Compute(const ge::OpDescPtr op_desc_ptr, const std::vector<ge | |||
return NOT_CHANGED; | |||
} | |||
auto output_desc = op_desc_ptr->GetOutputDescPtr(0); | |||
GE_CHECK_NOTNULL(output_desc); | |||
if (output_desc->GetShape().IsUnknownShape()) { | |||
GELOGD("Output is unknown shape, [%s] skip FillKernel.", op_desc_ptr->GetName().c_str()); | |||
return NOT_CHANGED; | |||
} | |||
GeTensorPtr output_ptr; | |||
output_ptr = MakeShared<GeTensor>(op_desc_ptr->GetOutputDesc(0)); | |||
if (output_ptr == nullptr) { | |||
@@ -297,13 +297,15 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||
} | |||
} | |||
tensor_desc->SetShape(shape); | |||
args.input_desc[input_index] = tensor_desc; | |||
GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); | |||
GELOGD("Update shape[%s] of input[%zu] to [%s]", | |||
shape.ToString().c_str(), input_index, tensor_desc->MutableShape().ToString().c_str()); | |||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), | |||
"[Invoke][GetTensorMemorySizeInBytes]Failed to calc tensor size," | |||
"index = %zu, shape = [%s], model_id = %u.", | |||
input_index, tensor_desc->GetShape().ToString().c_str(), model_id_); | |||
GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size); | |||
GELOGD("Input tensor[%zu] size = %ld", input_index, tensor_size); | |||
TensorUtils::SetSize(*tensor_desc, tensor_size); | |||
args.input_desc[input_index] = tensor_desc; | |||
} | |||
GE_CHECK_GE(tensor_size, 0); | |||
@@ -326,17 +326,45 @@ std::shared_ptr<TaskContext> NodeState::GetTaskContext() { | |||
} | |||
void NodeState::SavePersistTensor(int input_idx, const TensorValue &tensor) { | |||
if (node_item_->root_data_.count(input_idx) > 0) { | |||
GELOGD("[%s] Save Root input tensor: %d", GetName().c_str(), input_idx); | |||
root_tensor_values_[input_idx] = tensor; | |||
const auto is_persist_tensor = [](const std::map<const NodeItem *, std::set<int>> &items, int idx) { | |||
const auto is_exist = [&idx](const std::pair<const NodeItem *, std::set<int>> &items) { | |||
return items.second.count(idx) > 0; | |||
}; | |||
return std::any_of(items.begin(), items.end(), is_exist); | |||
}; | |||
if (root_tensor_values_.count(input_idx) > 0) { | |||
return; | |||
} | |||
if (node_item_->enter_data_.count(input_idx) > 0) { | |||
if (is_persist_tensor(node_item_->root_data_, input_idx)) { | |||
GELOGD("[%s] Save Root input tensor: %d", GetName().c_str(), input_idx); | |||
root_tensor_values_[input_idx] = tensor; | |||
} else if (is_persist_tensor(node_item_->enter_data_, input_idx)) { | |||
GELOGD("[%s] Save Enter input tensor: %d", GetName().c_str(), input_idx); | |||
root_tensor_values_[input_idx] = tensor; | |||
} | |||
} | |||
void NodeState::UpdatePersistTensor() { | |||
const auto update_tensor = [&](const std::map<const NodeItem *, std::set<int>> &items) { | |||
for (const auto &item : items) { | |||
for (const auto idx : item.second) { | |||
UpdatePersistTensor(idx); | |||
} | |||
} | |||
}; | |||
if (root_tensor_values_.empty()) { | |||
return; | |||
} | |||
update_tensor(node_item_->root_data_); | |||
if (iteration_count_ > 0) { | |||
update_tensor(node_item_->enter_data_); | |||
} | |||
} | |||
void NodeState::UpdatePersistTensor(int input_idx) { | |||
const auto it = root_tensor_values_.find(input_idx); | |||
if (it == root_tensor_values_.end()) { | |||
@@ -363,16 +391,9 @@ void NodeState::ResetContext(uint64_t iteration) { | |||
data_scheduled_ = static_cast<uint32_t>(node_item_->root_data_.size()); | |||
ctrl_scheduled_ = static_cast<uint32_t>(node_item_->root_ctrl_.size()); | |||
for (auto item : node_item_->root_data_) { | |||
UpdatePersistTensor(item.first); | |||
} | |||
if (iteration > 0) { | |||
data_scheduled_ += static_cast<uint32_t>(node_item_->enter_data_.size()); | |||
ctrl_scheduled_ += static_cast<uint32_t>(node_item_->enter_ctrl_.size()); | |||
for (auto item : node_item_->enter_data_) { | |||
UpdatePersistTensor(item.first); | |||
} | |||
} | |||
iteration_count_ = iteration; | |||
@@ -132,6 +132,7 @@ struct NodeState { | |||
void RunNextIteration(); | |||
void SavePersistTensor(int input_idx, const TensorValue &tensor); | |||
void UpdatePersistTensor(); | |||
Status NodeScheduled(const std::function<void(const NodeItem *)> &ready) const; | |||
@@ -373,6 +373,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, | |||
auto executor = node_item.node_executor; | |||
GE_CHECK_NOTNULL(executor); | |||
RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start"); | |||
node_state.UpdatePersistTensor(); | |||
GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), "[Prepare][Task] for [%s] failed.", | |||
node_state.GetName().c_str()); | |||
RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End"); | |||
@@ -147,6 +147,7 @@ class HybridModel { | |||
GeRootModelPtr ge_root_model_; | |||
std::map<uint32_t, NodeItem *> input_nodes_; | |||
ComputeGraphPtr root_graph_; | |||
ComputeGraphPtr orig_root_graph_; | |||
std::map<std::string, NodePtr> device_variable_nodes_; //lint !e148 | |||
std::map<std::string, NodePtr> host_variable_nodes_; //lint !e148 | |||
std::map<std::string, std::unique_ptr<TensorValue>> variable_tensors_; | |||
@@ -147,6 +147,7 @@ Status HybridModelBuilder::Build() { | |||
GE_CHK_STATUS_RET(ValidateParams(), "[Invoke][ValidateParams] failed, model_name_:[%s]", GetGraphName()); | |||
hybrid_model_.model_name_ = ge_root_model_->GetModelName(); | |||
GELOGI("[%s] Start to build hybrid model.", GetGraphName()); | |||
GE_CHK_STATUS_RET(CopyGraph(), "[Invoke][CopyGraph] failed, model_name_:[%s]", GetGraphName()); | |||
GE_CHK_STATUS_RET(InitRuntimeParams(), "[Invoke][InitRuntimeParams] failed, model_name_:[%s]", GetGraphName()); | |||
GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), | |||
"[Invoke][RecoverGraphUnknownFlag] failed, model_name_:[%s]", GetGraphName()); | |||
@@ -171,11 +172,12 @@ Status HybridModelBuilder::Build() { | |||
Status HybridModelBuilder::BuildForSingleOp() { | |||
GE_CHK_STATUS_RET(ValidateParams(), "[Invoke][ValidateParams] failed, model_name_:[%s]", GetGraphName()); | |||
hybrid_model_.root_graph_ = ge_root_model_->GetRootGraph(); | |||
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); | |||
GELOGI("[%s] Start to build hybrid model.", GetGraphName()); | |||
auto ret = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||
const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()]; | |||
GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model), | |||
const GeModelPtr ge_model = ret[hybrid_model_.root_graph_->GetName()]; | |||
GE_CHK_STATUS_RET(IndexTaskDefs(hybrid_model_.root_graph_, ge_model), | |||
"[Invoke][IndexTaskDefs] failed, model_name_:[%s]", GetGraphName()); | |||
GE_CHK_STATUS_RET(LoadGraph(), "[Invoke][LoadGraph] failed, model_name_:[%s]", GetGraphName()); | |||
GE_CHK_STATUS_RET(InitWeights(), "[Invoke][InitWeights] failed, model_name_:[%s]", GetGraphName()); | |||
@@ -190,6 +192,27 @@ Status HybridModelBuilder::ValidateParams() { | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::CopyGraph() { | |||
GELOGD("Copy compute graph begin."); | |||
auto root_graph = ge_root_model_->GetRootGraph(); | |||
std::string new_graph_name = ge_root_model_->GetRootGraph()->GetName(); | |||
ComputeGraphPtr new_root_graph = MakeShared<ComputeGraph>(new_graph_name); | |||
GE_CHECK_NOTNULL(new_root_graph); | |||
int32_t depth = 0; | |||
std::map<ConstNodePtr, NodePtr> node_old_2_new; | |||
std::map<ConstOpDescPtr, OpDescPtr> op_desc_old_2_new; | |||
graphStatus ret = GraphUtils::CopyComputeGraph(root_graph, new_root_graph, node_old_2_new, op_desc_old_2_new, depth); | |||
if (ret != GRAPH_SUCCESS) { | |||
GELOGE(GRAPH_FAILED, "Copy compute graph failed."); | |||
return GRAPH_FAILED; | |||
} | |||
hybrid_model_.root_graph_ = new_root_graph; | |||
GELOGD("Copy compute graph[%s] success.", new_graph_name.c_str()); | |||
return SUCCESS; | |||
} | |||
Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHK_STATUS_RET(ParseForceInfershapeNodes(node, node_item), | |||
@@ -265,10 +288,6 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n | |||
return SUCCESS; | |||
} | |||
if (node->GetType() == MEMCPYASYNC) { // Convert MemcpyAsync to Identity. | |||
node->GetOpDesc()->SetType(IDENTITY); | |||
} | |||
std::unique_ptr<NodeItem> new_node; | |||
GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "[Invoke][Create] failed, model_name_:[%s]", GetGraphName()); | |||
GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor)); | |||
@@ -814,12 +833,13 @@ Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, | |||
} | |||
Status HybridModelBuilder::LoadGraph() { | |||
auto root_graph = ge_root_model_->GetRootGraph(); | |||
auto root_graph = hybrid_model_.root_graph_; | |||
if (!GetContext().GetHostExecFlag()) { | |||
std::shared_ptr<ComputeGraph> merged_graph; | |||
GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", | |||
root_graph->GetDirectNodesSize(), | |||
root_graph->GetAllNodesSize()); | |||
hybrid_model_.orig_root_graph_ = root_graph; | |||
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph), | |||
"[Invoke][UnfoldSubgraphs]Failed to unfold subgraphs, model_name_:%s.", GetGraphName()); | |||
root_graph = std::move(merged_graph); | |||
@@ -877,6 +897,7 @@ Status HybridModelBuilder::LoadGraph() { | |||
} | |||
for (auto &it : hybrid_model_.known_shape_sub_models_) { | |||
auto node_item = MutableNodeItem(it.first); | |||
GE_CHECK_NOTNULL(node_item); | |||
AscendString graph_name; | |||
GE_CHK_GRAPH_STATUS_RET(it.second->GetGraph().GetName(graph_name), "Failed to get subgraph name"); | |||
auto subgraph = hybrid_model_.GetRootGraph()->GetSubgraph(graph_name.GetString()); | |||
@@ -1023,6 +1044,7 @@ Status HybridModelBuilder::InitConstantOps() { | |||
} else { | |||
var_tensor.reset(new(std::nothrow)TensorValue(nullptr, 0)); | |||
} | |||
GE_CHECK_NOTNULL(var_tensor); | |||
} else { | |||
GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | |||
GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | |||
@@ -1125,7 +1147,9 @@ Status HybridModelBuilder::InitWeights() { | |||
sub_weight_buffer->GetSize()); | |||
auto subgraph = GraphUtils::GetComputeGraph(subgraph_model.second->GetGraph()); | |||
if (subgraph != ge_root_model_->GetRootGraph()) { | |||
subgraph = ge_root_model_->GetRootGraph()->GetSubgraph(subgraph_model.first); | |||
subgraph = hybrid_model_.root_graph_->GetSubgraph(subgraph_model.first); | |||
} else { | |||
subgraph = hybrid_model_.root_graph_; | |||
} | |||
GE_CHECK_NOTNULL(subgraph); | |||
hybrid_model_.weight_buffer_map_.emplace(subgraph->GetName(), std::move(sub_weight_buffer)); | |||
@@ -1304,7 +1328,7 @@ Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const | |||
} | |||
Status HybridModelBuilder::IndexTaskDefs() { | |||
const auto root_graph = ge_root_model_->GetRootGraph(); | |||
const auto &root_graph = hybrid_model_.root_graph_; | |||
const auto &root_graph_name = root_graph->GetName(); | |||
if (SetOutputNameAttr(*root_graph) != SUCCESS) { | |||
GELOGW("Set output name attr failed."); | |||
@@ -1338,7 +1362,7 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||
Status HybridModelBuilder::IndexSpecialNodes() { | |||
GELOGD("Start to index special nodes"); | |||
const auto &root_graph = ge_root_model_->GetRootGraph(); | |||
const auto &root_graph = hybrid_model_.root_graph_; | |||
for (auto &node : root_graph->GetAllNodes()) { | |||
GE_CHECK_NOTNULL(node); | |||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||
@@ -1493,7 +1517,7 @@ Status HybridModelBuilder::InitRuntimeParams() { | |||
runtime_param_.session_id = ret ? static_cast<uint64_t>(value) : 0; | |||
ret = ge::AttrUtils::GetInt(first_model, ATTR_MODEL_TASK_GEN_VAR_ADDR, value); | |||
runtime_param_.logic_var_base = ret ? static_cast<uint64_t>(value) : 0; | |||
runtime_param_.graph_id = ge_root_model_->GetRootGraph()->GetGraphID(); | |||
runtime_param_.graph_id = hybrid_model_.root_graph_->GetGraphID(); | |||
value = 0; | |||
for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { | |||
(void) ge::AttrUtils::GetInt(it.second, ATTR_MODEL_VAR_SIZE, value); | |||
@@ -1630,7 +1654,7 @@ Status HybridModelBuilder::TransAllVarData() { | |||
} | |||
Status HybridModelBuilder::CopyVarData() { | |||
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(), | |||
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(hybrid_model_.root_graph_, | |||
runtime_param_.session_id, | |||
hybrid_model_.device_id_), | |||
"[Invoke][CopyVarData] failed."); | |||
@@ -1713,7 +1737,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem | |||
} | |||
Status HybridModelBuilder::RecoverGraphUnknownFlag() { | |||
const auto &root_graph = ge_root_model_->GetRootGraph(); | |||
const auto &root_graph = hybrid_model_.root_graph_; | |||
for (auto &sub_graph : root_graph->GetAllSubgraphs()) { | |||
GE_CHECK_NOTNULL(sub_graph); | |||
for (const auto &node : sub_graph->GetDirectNode()) { | |||
@@ -56,6 +56,7 @@ class HybridModelBuilder { | |||
Status BuildOutputMapping(GraphItem &partitioned_call, const NodeItem &node_item, bool is_root_graph); | |||
Status ValidateParams(); | |||
Status LoadGraph(); | |||
Status CopyGraph(); | |||
Status LoadGeModel(ComputeGraph &graph, const GeModelPtr &ge_model); | |||
static Status InitHcclExecutorOnDemand(const GeModelPtr &ge_model); | |||
Status LoadTask(NodeItem &node_item); | |||
@@ -14,10 +14,8 @@ | |||
* limitations under the License. | |||
*/ | |||
#include "node_item.h" | |||
#include <sstream> | |||
#include "common/debug/log.h" | |||
#include "graph/common/omg_util.h" | |||
#include "hybrid/model/node_item.h" | |||
#include "graph/compute_graph.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "hybrid/executor/worker/shape_inference_engine.h" | |||
@@ -26,6 +24,8 @@ | |||
namespace ge { | |||
namespace hybrid { | |||
namespace { | |||
const uint8_t kMaxTransCount = 3; | |||
const uint32_t kTransOpIoSize = 1; | |||
const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; | |||
const char *const kNodeTypeRetVal = "_RetVal"; | |||
const std::set<std::string> kControlOpTypes{ | |||
@@ -41,6 +41,25 @@ const std::set<std::string> kMergeOpTypes{ | |||
MERGE, REFMERGE, STREAMMERGE | |||
}; | |||
bool IsEnterFeedNode(NodePtr node) { | |||
// For: Enter -> node | |||
// For: Enter -> Cast -> node | |||
// For: Enter -> TransData -> Cast -> node | |||
for (uint8_t i = 0; i < kMaxTransCount; ++i) { | |||
if (kEnterOpTypes.count(NodeUtils::GetNodeType(node)) > 0) { | |||
GELOGD("Node[%s] is Enter feed node.", node->GetName().c_str()); | |||
return true; | |||
} | |||
const auto all_nodes = node->GetInDataNodes(); | |||
if (all_nodes.size() != kTransOpIoSize || node->GetAllInDataAnchorsSize() != kTransOpIoSize) { | |||
return false; | |||
} | |||
node = all_nodes.at(0); | |||
} | |||
return false; | |||
} | |||
Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) { | |||
uint32_t parent_index = 0; | |||
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||
@@ -98,8 +117,7 @@ Status ParseFusedSubgraph(NodeItem &node_item) { | |||
GE_CHECK_NOTNULL(node); | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
std::string node_type; | |||
GE_CHK_STATUS_RET(GetOriginalType(node, node_type)); | |||
const std::string node_type = NodeUtils::GetNodeType(node); | |||
if (node_type == DATA) { | |||
GE_CHK_GRAPH_STATUS_RET(ParseInputMapping(*node, *op_desc, *fused_subgraph)); | |||
} else if (node_type == kNodeTypeRetVal) { | |||
@@ -398,19 +416,21 @@ void NodeItem::SetDataSend(NodeItem *node_item, int anchor_index) { | |||
data_send_.emplace(node_item); | |||
node_item->data_recv_[this] = anchor_index; | |||
if (is_root_node_) { | |||
node_item->root_data_[anchor_index] = this; | |||
auto &data_anchors = node_item->root_data_[this]; | |||
data_anchors.emplace(anchor_index); | |||
} | |||
// If Enter feed Not Merge, take as root Node. | |||
if (IsEnterOp() && (node_item->node_type != STREAMMERGE)) { | |||
node_item->enter_data_[anchor_index] = this; | |||
if (IsEnterFeedNode(node) && (node_item->node_type != STREAMMERGE)) { | |||
auto &data_anchors = node_item->enter_data_[this]; | |||
data_anchors.emplace(anchor_index); | |||
} | |||
GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | |||
} | |||
void NodeItem::SetCtrlSend(NodeItem *node_item, uint32_t switch_index) { | |||
if (switch_index < switch_groups_.size()) { | |||
std::vector<const NodeItem *> &switch_group = switch_groups_[switch_index]; | |||
switch_group.emplace_back(node_item); | |||
auto &switch_group = switch_groups_[switch_index]; | |||
switch_group.emplace(node_item); | |||
} else { | |||
ctrl_send_.insert(node_item); | |||
} | |||
@@ -420,7 +440,7 @@ void NodeItem::SetCtrlSend(NodeItem *node_item, uint32_t switch_index) { | |||
node_item->root_ctrl_.emplace(this); | |||
} | |||
// If Enter feed control signal, take as root Node. | |||
if (IsEnterOp() && (node_item->node_type != STREAMMERGE && node_item->node_type != STREAMACTIVE)) { | |||
if (IsEnterFeedNode(node) && (node_item->node_type != STREAMMERGE && node_item->node_type != STREAMACTIVE)) { | |||
node_item->enter_ctrl_.emplace(this); | |||
} | |||
GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); | |||
@@ -433,8 +453,8 @@ void NodeItem::SetMergeCtrl(NodeItem *node_item, uint32_t merge_index) { | |||
} | |||
// this is StreamMerge node, node_item is StreamActive node. | |||
std::vector<const NodeItem *> &switch_group = switch_groups_[merge_index]; | |||
switch_group.emplace_back(node_item); | |||
auto &switch_group = switch_groups_[merge_index]; | |||
switch_group.emplace(node_item); | |||
node_item->ctrl_send_.emplace(this); | |||
GELOGI("Node[%s] will control node[%s]", node_item->NodeName().c_str(), NodeName().c_str()); | |||
@@ -148,14 +148,14 @@ struct NodeItem { | |||
int64_t frame_index_ = -1; | |||
int64_t parent_frame_ = -1; | |||
std::set<const NodeItem *> root_ctrl_; // Recv ctrl from root node | |||
std::map<int, const NodeItem *> root_data_; // Recv data from root node | |||
std::map<const NodeItem *, std::set<int>> root_data_; // Recv data from root node | |||
std::set<const NodeItem *> enter_ctrl_; // Recv ctrl from Enter node | |||
std::map<int, const NodeItem *> enter_data_; // Recv data from Enter node | |||
std::map<const NodeItem *, std::set<int>> enter_data_; // Recv data from Enter node | |||
std::set<const NodeItem *> data_send_; // Send data notify to | |||
std::map<const NodeItem *, int> data_recv_; // Recv data notify from | |||
std::set<const NodeItem *> ctrl_send_; // Send ctrl notify to | |||
std::set<const NodeItem *> ctrl_recv_; // Recv ctrl notify from | |||
std::vector<std::vector<const NodeItem *>> switch_groups_; // Send ctrl notify to | |||
std::vector<std::set<const NodeItem *>> switch_groups_; // Send ctrl notify to | |||
std::shared_ptr<NodeTask> kernel_task; | |||
std::unique_ptr<FusedSubgraph> fused_subgraph; | |||
@@ -342,6 +342,7 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||
GE_CHK_RT_RET(rtEventDestroy(evt)); | |||
} | |||
GELOGI("rdma callback success."); | |||
return SUCCESS; | |||
}; | |||
HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); | |||
@@ -17,13 +17,9 @@ | |||
#include "hybrid/node_executor/rts/rts_node_executor.h" | |||
#include "hybrid/node_executor/rts/rts_task_factory.h" | |||
#include "common/debug/log.h" | |||
#include "common/ge/ge_util.h" | |||
#include "common/types.h" | |||
#include "graph/common/omg_util.h" | |||
#include "graph/utils/tensor_utils.h" | |||
#include "hybrid/model/hybrid_model.h" | |||
#include "runtime/rt.h" | |||
namespace ge { | |||
namespace hybrid { | |||
@@ -33,6 +29,7 @@ REGISTER_RTS_TASK_CREATOR(IDENTITY, IdentityNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(IDENTITYN, IdentityNNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(READVARIABLEOP, ReadVariableOpNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(PROFILINGTRAININGTRACE, ProfilingTraceNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(MEMCPYASYNC, IdentityNodeTask); | |||
Status IdentityNodeTask::DoCopyTensor(TaskContext &context, int index) { | |||
auto input_desc = context.MutableInputDesc(index); | |||
@@ -133,8 +130,7 @@ Status ProfilingTraceNodeTask::ExecuteAsync(TaskContext &context, std::function< | |||
Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | |||
GE_CHECK_NOTNULL(node); | |||
GELOGD("[%s] Load for local task.", node->GetName().c_str()); | |||
std::string node_type; | |||
GE_CHK_STATUS_RET(GetOriginalType(node, node_type), "Get original type failed."); | |||
const std::string node_type = NodeUtils::GetNodeType(node); | |||
RtsNodeTaskPtr rts_task = RtsTaskFactory::GetInstance().Create(node_type); | |||
if (rts_task == nullptr) { | |||
GELOGE(UNSUPPORTED, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), node_type.c_str()); | |||
@@ -43,7 +43,6 @@ namespace hybrid { | |||
REGISTER_RTS_TASK_CREATOR(STREAMACTIVE, StreamActiveNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(STREAMSWITCH, StreamSwitchNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(STREAMMERGE, StreamMergeNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(MEMCPYASYNC, MemcpyAsyncNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(ENTER, PassThroughNodeTask); | |||
REGISTER_RTS_TASK_CREATOR(REFENTER, PassThroughNodeTask); | |||
@@ -168,34 +167,6 @@ Status StreamMergeNodeTask::ExecuteAsync(TaskContext &task_context, std::functio | |||
return SUCCESS; | |||
} | |||
Status MemcpyAsyncNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) { | |||
GELOGD("[%s] Start to execute.", task_context.GetNodeName()); | |||
auto input_desc = task_context.MutableInputDesc(0); | |||
GE_CHECK_NOTNULL(input_desc); | |||
int64_t copy_size = 0; | |||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*input_desc, copy_size)); | |||
// copy_size would not be negative since GetTensorSizeInBytes returned successfully. | |||
if (copy_size > 0) { | |||
const auto in_v = task_context.MutableInput(0); | |||
const auto out_v = task_context.MutableOutput(0); | |||
GE_CHECK_NOTNULL(in_v); | |||
GE_CHECK_NOTNULL(out_v); | |||
GELOGD("[%s] input size: %zu, output size: %zu, copy size: %ld", task_context.GetNodeName(), | |||
in_v->GetSize(), out_v->GetSize(), copy_size); | |||
GE_CHK_RT_RET(rtMemcpyAsync(out_v->MutableData(), out_v->GetSize(), in_v->GetData(), copy_size, | |||
RT_MEMCPY_DEVICE_TO_DEVICE, task_context.GetStream())); | |||
} else { | |||
GELOGW("[%s] invalid copy size: %ld", task_context.GetNodeName(), copy_size); | |||
} | |||
if (done_callback) { | |||
GE_CHK_STATUS_RET(task_context.RegisterCallback(done_callback)); | |||
} | |||
GELOGD("[%s] Done executing successfully.", task_context.GetNodeName()); | |||
return SUCCESS; | |||
} | |||
Status PassThroughNodeTask::ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) { | |||
GELOGD("[%s] Start to execute.", task_context.GetNodeName()); | |||
const auto in_x = task_context.GetInput(0); // x | |||
@@ -60,11 +60,6 @@ class StreamMergeNodeTask : public RtsNodeTask { | |||
Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override; | |||
}; | |||
class MemcpyAsyncNodeTask : public RtsNodeTask { | |||
public: | |||
Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override; | |||
}; | |||
class PassThroughNodeTask : public RtsNodeTask { | |||
public: | |||
Status ExecuteAsync(TaskContext &task_context, std::function<void()> done_callback) override; | |||
@@ -458,10 +458,6 @@ Status TaskContext::PropagateOutputs() { | |||
subgraph_context_->all_inputs_[input_offset].SetName( | |||
node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); | |||
} | |||
auto dst_node_state = subgraph_context_->GetOrCreateNodeState(dst_node_item); | |||
GE_CHECK_NOTNULL(dst_node_state); | |||
dst_node_state->SavePersistTensor(dst_input_idx, *tensor); | |||
} | |||
} | |||
(void)guard; | |||
@@ -493,6 +489,7 @@ void TaskContext::ReleaseInputsAndOutputs() { | |||
void TaskContext::ReleaseInput(int index) { | |||
auto input_tensor = MutableInput(index); | |||
if (input_tensor != nullptr) { | |||
node_state_->SavePersistTensor(index, *input_tensor); | |||
input_tensor->Destroy(); | |||
GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index); | |||
} | |||
@@ -33,6 +33,10 @@ | |||
#include "register/op_tiling.h" | |||
namespace ge { | |||
namespace { | |||
const int kAddressNum = 2; | |||
} // namespace | |||
class StreamResource; | |||
struct SingleOpModelParam; | |||
class OpTask { | |||
@@ -256,7 +260,7 @@ class MemcpyAsyncTask : public OpTask { | |||
friend class SingleOpModel; | |||
friend class RtsKernelTaskBuilder; | |||
uintptr_t addresses_[2]; | |||
uintptr_t addresses_[kAddressNum]; | |||
size_t dst_max_; | |||
size_t count_; | |||
rtMemcpyKind_t kind_; | |||
@@ -26,9 +26,9 @@ extern "C" { | |||
#endif | |||
// Current version is 1.0.0 | |||
#define ACL_MAJOR_VERSION 1 | |||
#define ACL_MINOR_VERSION 0 | |||
#define ACL_PATCH_VERSION 0 | |||
#define ACL_MAJOR_VERSION 1 | |||
#define ACL_MINOR_VERSION 0 | |||
#define ACL_PATCH_VERSION 0 | |||
/** | |||
* @ingroup AscendCL | |||
@@ -72,11 +72,11 @@ ACL_FUNC_VISIBILITY aclError aclrtGetVersion(int32_t *majorVersion, int32_t *min | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY const char *aclGetRecentErrMsg(); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_H_ | |||
#endif // INC_EXTERNAL_ACL_ACL_H_ |
@@ -136,50 +136,49 @@ static const int ACL_ERROR_PROFILING_FAILURE = 500005; | |||
#define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE | |||
typedef enum { | |||
ACL_DT_UNDEFINED = -1, | |||
ACL_FLOAT = 0, | |||
ACL_FLOAT16 = 1, | |||
ACL_INT8 = 2, | |||
ACL_INT32 = 3, | |||
ACL_UINT8 = 4, | |||
ACL_INT16 = 6, | |||
ACL_UINT16 = 7, | |||
ACL_UINT32 = 8, | |||
ACL_INT64 = 9, | |||
ACL_UINT64 = 10, | |||
ACL_DOUBLE = 11, | |||
ACL_BOOL = 12, | |||
ACL_STRING = 13, | |||
ACL_DT_UNDEFINED = -1, | |||
ACL_FLOAT = 0, | |||
ACL_FLOAT16 = 1, | |||
ACL_INT8 = 2, | |||
ACL_INT32 = 3, | |||
ACL_UINT8 = 4, | |||
ACL_INT16 = 6, | |||
ACL_UINT16 = 7, | |||
ACL_UINT32 = 8, | |||
ACL_INT64 = 9, | |||
ACL_UINT64 = 10, | |||
ACL_DOUBLE = 11, | |||
ACL_BOOL = 12, | |||
ACL_STRING = 13, | |||
} aclDataType; | |||
typedef enum { | |||
ACL_FORMAT_UNDEFINED = -1, | |||
ACL_FORMAT_NCHW = 0, | |||
ACL_FORMAT_NHWC = 1, | |||
ACL_FORMAT_ND = 2, | |||
ACL_FORMAT_NC1HWC0 = 3, | |||
ACL_FORMAT_FRACTAL_Z = 4, | |||
ACL_FORMAT_NC1HWC0_C04 = 12, | |||
ACL_FORMAT_NDHWC = 27, | |||
ACL_FORMAT_FRACTAL_NZ = 29, | |||
ACL_FORMAT_NCDHW = 30, | |||
ACL_FORMAT_NDC1HWC0 = 32, | |||
ACL_FRACTAL_Z_3D = 33 | |||
ACL_FORMAT_UNDEFINED = -1, | |||
ACL_FORMAT_NCHW = 0, | |||
ACL_FORMAT_NHWC = 1, | |||
ACL_FORMAT_ND = 2, | |||
ACL_FORMAT_NC1HWC0 = 3, | |||
ACL_FORMAT_FRACTAL_Z = 4, | |||
ACL_FORMAT_NC1HWC0_C04 = 12, | |||
ACL_FORMAT_NDHWC = 27, | |||
ACL_FORMAT_FRACTAL_NZ = 29, | |||
ACL_FORMAT_NCDHW = 30, | |||
ACL_FORMAT_NDC1HWC0 = 32, | |||
ACL_FRACTAL_Z_3D = 33 | |||
} aclFormat; | |||
typedef enum { | |||
ACL_DEBUG = 0, | |||
ACL_INFO = 1, | |||
ACL_WARNING = 2, | |||
ACL_ERROR = 3, | |||
ACL_DEBUG = 0, | |||
ACL_INFO = 1, | |||
ACL_WARNING = 2, | |||
ACL_ERROR = 3, | |||
} aclLogLevel; | |||
typedef enum { | |||
ACL_MEMTYPE_DEVICE = 0, | |||
ACL_MEMTYPE_HOST = 1, | |||
ACL_MEMTYPE_DEVICE = 0, | |||
ACL_MEMTYPE_HOST = 1, | |||
} aclMemType; | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Converts data of type aclFloat16 to data of type float | |||
@@ -312,9 +311,7 @@ ACL_FUNC_VISIBILITY size_t aclDataTypeSize(aclDataType dataType); | |||
* @retval aclTensorDesc pointer. | |||
* @retval nullptr if param is invalid or run out of memory | |||
*/ | |||
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, | |||
int numDims, | |||
const int64_t *dims, | |||
ACL_FUNC_VISIBILITY aclTensorDesc *aclCreateTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, | |||
aclFormat format); | |||
/** | |||
@@ -336,8 +333,7 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc); | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc* desc, | |||
size_t dimsCount, | |||
ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount, | |||
int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]); | |||
/** | |||
@@ -434,9 +430,7 @@ ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimV2(const aclTensorDesc *desc, si | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, | |||
size_t index, | |||
size_t dimRangeNum, | |||
ACL_FUNC_VISIBILITY aclError aclGetTensorDescDimRange(const aclTensorDesc *desc, size_t index, size_t dimRangeNum, | |||
int64_t *dimRange); | |||
/** | |||
@@ -473,7 +467,7 @@ ACL_FUNC_VISIBILITY const char *aclGetTensorDescName(aclTensorDesc *desc); | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclTransTensorDescFormat(const aclTensorDesc *srcDesc, aclFormat dstFormat, | |||
aclTensorDesc **dstDesc); | |||
aclTensorDesc **dstDesc); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -561,7 +555,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorOriginShape(aclTensorDesc *desc, int nu | |||
* | |||
* @retval null for failed. | |||
* @retval OtherValues success. | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, size_t index); | |||
/** | |||
@@ -572,7 +566,7 @@ ACL_FUNC_VISIBILITY aclTensorDesc *aclGetTensorDescByIndex(aclTensorDesc *desc, | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY void *aclGetTensorDescAddress(const aclTensorDesc *desc); | |||
/** | |||
@@ -624,7 +618,7 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemTy | |||
* @param ... [IN] the value of current log | |||
*/ | |||
ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const char *file, uint32_t line, | |||
const char *fmt, ...); | |||
const char *fmt, ...); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -632,14 +626,13 @@ ACL_FUNC_VISIBILITY void aclAppLog(aclLogLevel logLevel, const char *func, const | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY const char *aclrtGetSocName(); | |||
#define ACL_APP_LOG(level, fmt, ...) \ | |||
aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||
#define ACL_APP_LOG(level, fmt, ...) aclAppLog(level, __FUNCTION__, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ | |||
#endif // INC_EXTERNAL_ACL_ACL_BASE_H_ |
@@ -27,19 +27,19 @@ | |||
extern "C" { | |||
#endif | |||
#define ACL_MAX_DIM_CNT 128 | |||
#define ACL_MAX_TENSOR_NAME_LEN 128 | |||
#define ACL_MAX_BATCH_NUM 128 | |||
#define ACL_MAX_HW_NUM 128 | |||
#define ACL_MAX_SHAPE_COUNT 128 | |||
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF | |||
#define ACL_MDL_LOAD_FROM_FILE 1 | |||
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 | |||
#define ACL_MDL_LOAD_FROM_MEM 3 | |||
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 | |||
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 | |||
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 | |||
#define ACL_MAX_DIM_CNT 128 | |||
#define ACL_MAX_TENSOR_NAME_LEN 128 | |||
#define ACL_MAX_BATCH_NUM 128 | |||
#define ACL_MAX_HW_NUM 128 | |||
#define ACL_MAX_SHAPE_COUNT 128 | |||
#define ACL_INVALID_NODE_INDEX 0xFFFFFFFF | |||
#define ACL_MDL_LOAD_FROM_FILE 1 | |||
#define ACL_MDL_LOAD_FROM_FILE_WITH_MEM 2 | |||
#define ACL_MDL_LOAD_FROM_MEM 3 | |||
#define ACL_MDL_LOAD_FROM_MEM_WITH_MEM 4 | |||
#define ACL_MDL_LOAD_FROM_FILE_WITH_Q 5 | |||
#define ACL_MDL_LOAD_FROM_MEM_WITH_Q 6 | |||
#define ACL_DYNAMIC_TENSOR_NAME "ascend_mbatch_shape_data" | |||
#define ACL_DYNAMIC_AIPP_NAME "ascend_dynamic_aipp_data" | |||
@@ -52,123 +52,123 @@ typedef struct aclAippExtendInfo aclAippExtendInfo; | |||
typedef struct aclmdlConfigHandle aclmdlConfigHandle; | |||
typedef enum { | |||
ACL_YUV420SP_U8 = 1, | |||
ACL_XRGB8888_U8, | |||
ACL_RGB888_U8, | |||
ACL_YUV400_U8, | |||
ACL_NC1HWC0DI_FP16, | |||
ACL_NC1HWC0DI_S8, | |||
ACL_ARGB8888_U8, | |||
ACL_YUYV_U8, | |||
ACL_YUV422SP_U8, | |||
ACL_AYUV444_U8, | |||
ACL_RAW10, | |||
ACL_RAW12, | |||
ACL_RAW16, | |||
ACL_RAW24, | |||
ACL_AIPP_RESERVED = 0xffff, | |||
ACL_YUV420SP_U8 = 1, | |||
ACL_XRGB8888_U8, | |||
ACL_RGB888_U8, | |||
ACL_YUV400_U8, | |||
ACL_NC1HWC0DI_FP16, | |||
ACL_NC1HWC0DI_S8, | |||
ACL_ARGB8888_U8, | |||
ACL_YUYV_U8, | |||
ACL_YUV422SP_U8, | |||
ACL_AYUV444_U8, | |||
ACL_RAW10, | |||
ACL_RAW12, | |||
ACL_RAW16, | |||
ACL_RAW24, | |||
ACL_AIPP_RESERVED = 0xffff, | |||
} aclAippInputFormat; | |||
typedef enum { | |||
ACL_MDL_PRIORITY_INT32 = 0, | |||
ACL_MDL_LOAD_TYPE_SIZET, | |||
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ | |||
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ | |||
ACL_MDL_MEM_SIZET, | |||
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ | |||
ACL_MDL_WEIGHT_SIZET, | |||
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ | |||
ACL_MDL_WORKSPACE_SIZET, | |||
ACL_MDL_INPUTQ_NUM_SIZET, | |||
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ | |||
ACL_MDL_OUTPUTQ_NUM_SIZET, | |||
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ | |||
ACL_MDL_PRIORITY_INT32 = 0, | |||
ACL_MDL_LOAD_TYPE_SIZET, | |||
ACL_MDL_PATH_PTR, /**< pointer to model load path with deep copy */ | |||
ACL_MDL_MEM_ADDR_PTR, /**< pointer to model memory with shallow copy */ | |||
ACL_MDL_MEM_SIZET, | |||
ACL_MDL_WEIGHT_ADDR_PTR, /**< pointer to weight memory of model with shallow copy */ | |||
ACL_MDL_WEIGHT_SIZET, | |||
ACL_MDL_WORKSPACE_ADDR_PTR, /**< pointer to worksapce memory of model with shallow copy */ | |||
ACL_MDL_WORKSPACE_SIZET, | |||
ACL_MDL_INPUTQ_NUM_SIZET, | |||
ACL_MDL_INPUTQ_ADDR_PTR, /**< pointer to inputQ with shallow copy */ | |||
ACL_MDL_OUTPUTQ_NUM_SIZET, | |||
ACL_MDL_OUTPUTQ_ADDR_PTR /**< pointer to outputQ with shallow copy */ | |||
} aclmdlConfigAttr; | |||
typedef enum { | |||
ACL_DATA_WITHOUT_AIPP = 0, | |||
ACL_DATA_WITH_STATIC_AIPP, | |||
ACL_DATA_WITH_DYNAMIC_AIPP, | |||
ACL_DYNAMIC_AIPP_NODE | |||
ACL_DATA_WITHOUT_AIPP = 0, | |||
ACL_DATA_WITH_STATIC_AIPP, | |||
ACL_DATA_WITH_DYNAMIC_AIPP, | |||
ACL_DYNAMIC_AIPP_NODE | |||
} aclmdlInputAippType; | |||
typedef struct aclmdlIODims { | |||
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ | |||
size_t dimCount; /**< dim array count */ | |||
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ | |||
char name[ACL_MAX_TENSOR_NAME_LEN]; /**< tensor name */ | |||
size_t dimCount; /**< dim array count */ | |||
int64_t dims[ACL_MAX_DIM_CNT]; /**< dim data array */ | |||
} aclmdlIODims; | |||
typedef struct aclAippDims { | |||
aclmdlIODims srcDims; /**< input dims before model transform */ | |||
size_t srcSize; /**< input size before model transform */ | |||
aclmdlIODims aippOutdims; /**< aipp output dims */ | |||
size_t aippOutSize; /**< aipp output size */ | |||
aclmdlIODims srcDims; /**< input dims before model transform */ | |||
size_t srcSize; /**< input size before model transform */ | |||
aclmdlIODims aippOutdims; /**< aipp output dims */ | |||
size_t aippOutSize; /**< aipp output size */ | |||
} aclAippDims; | |||
typedef struct aclmdlBatch { | |||
size_t batchCount; /**< batch array count */ | |||
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ | |||
size_t batchCount; /**< batch array count */ | |||
uint64_t batch[ACL_MAX_BATCH_NUM]; /**< batch data array */ | |||
} aclmdlBatch; | |||
typedef struct aclmdlHW { | |||
size_t hwCount; /**< height&width array count */ | |||
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ | |||
size_t hwCount; /**< height&width array count */ | |||
uint64_t hw[ACL_MAX_HW_NUM][2]; /**< height&width data array */ | |||
} aclmdlHW; | |||
typedef struct aclAippInfo { | |||
aclAippInputFormat inputFormat; | |||
int32_t srcImageSizeW; | |||
int32_t srcImageSizeH; | |||
int8_t cropSwitch; | |||
int32_t loadStartPosW; | |||
int32_t loadStartPosH; | |||
int32_t cropSizeW; | |||
int32_t cropSizeH; | |||
int8_t resizeSwitch; | |||
int32_t resizeOutputW; | |||
int32_t resizeOutputH; | |||
int8_t paddingSwitch; | |||
int32_t leftPaddingSize; | |||
int32_t rightPaddingSize; | |||
int32_t topPaddingSize; | |||
int32_t bottomPaddingSize; | |||
int8_t cscSwitch; | |||
int8_t rbuvSwapSwitch; | |||
int8_t axSwapSwitch; | |||
int8_t singleLineMode; | |||
int32_t matrixR0C0; | |||
int32_t matrixR0C1; | |||
int32_t matrixR0C2; | |||
int32_t matrixR1C0; | |||
int32_t matrixR1C1; | |||
int32_t matrixR1C2; | |||
int32_t matrixR2C0; | |||
int32_t matrixR2C1; | |||
int32_t matrixR2C2; | |||
int32_t outputBias0; | |||
int32_t outputBias1; | |||
int32_t outputBias2; | |||
int32_t inputBias0; | |||
int32_t inputBias1; | |||
int32_t inputBias2; | |||
int32_t meanChn0; | |||
int32_t meanChn1; | |||
int32_t meanChn2; | |||
int32_t meanChn3; | |||
float minChn0; | |||
float minChn1; | |||
float minChn2; | |||
float minChn3; | |||
float varReciChn0; | |||
float varReciChn1; | |||
float varReciChn2; | |||
float varReciChn3; | |||
aclFormat srcFormat; | |||
aclDataType srcDatatype; | |||
size_t srcDimNum; | |||
size_t shapeCount; | |||
aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; | |||
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ | |||
aclAippInputFormat inputFormat; | |||
int32_t srcImageSizeW; | |||
int32_t srcImageSizeH; | |||
int8_t cropSwitch; | |||
int32_t loadStartPosW; | |||
int32_t loadStartPosH; | |||
int32_t cropSizeW; | |||
int32_t cropSizeH; | |||
int8_t resizeSwitch; | |||
int32_t resizeOutputW; | |||
int32_t resizeOutputH; | |||
int8_t paddingSwitch; | |||
int32_t leftPaddingSize; | |||
int32_t rightPaddingSize; | |||
int32_t topPaddingSize; | |||
int32_t bottomPaddingSize; | |||
int8_t cscSwitch; | |||
int8_t rbuvSwapSwitch; | |||
int8_t axSwapSwitch; | |||
int8_t singleLineMode; | |||
int32_t matrixR0C0; | |||
int32_t matrixR0C1; | |||
int32_t matrixR0C2; | |||
int32_t matrixR1C0; | |||
int32_t matrixR1C1; | |||
int32_t matrixR1C2; | |||
int32_t matrixR2C0; | |||
int32_t matrixR2C1; | |||
int32_t matrixR2C2; | |||
int32_t outputBias0; | |||
int32_t outputBias1; | |||
int32_t outputBias2; | |||
int32_t inputBias0; | |||
int32_t inputBias1; | |||
int32_t inputBias2; | |||
int32_t meanChn0; | |||
int32_t meanChn1; | |||
int32_t meanChn2; | |||
int32_t meanChn3; | |||
float minChn0; | |||
float minChn1; | |||
float minChn2; | |||
float minChn3; | |||
float varReciChn0; | |||
float varReciChn1; | |||
float varReciChn2; | |||
float varReciChn3; | |||
aclFormat srcFormat; | |||
aclDataType srcDatatype; | |||
size_t srcDimNum; | |||
size_t shapeCount; | |||
aclAippDims outDims[ACL_MAX_SHAPE_COUNT]; | |||
aclAippExtendInfo *aippExtend; /**< reserved parameters, current version needs to be null */ | |||
} aclAippInfo; | |||
/** | |||
@@ -292,8 +292,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlAddDatasetBuffer(aclmdlDataset *dataset, aclD | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, | |||
aclTensorDesc *tensorDesc, | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetDatasetTensorDesc(aclmdlDataset *dataset, aclTensorDesc *tensorDesc, | |||
size_t index); | |||
/** | |||
@@ -355,8 +354,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFile(const char *modelPath, uint32_t | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, | |||
uint32_t *modelId); | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelSize, uint32_t *modelId); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -378,9 +376,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMem(const void *model, size_t modelS | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, | |||
uint32_t *modelId, void *workPtr, size_t workSize, | |||
void *weightPtr, size_t weightSize); | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, uint32_t *modelId, void *workPtr, | |||
size_t workSize, void *weightPtr, size_t weightSize); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -403,9 +400,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithMem(const char *modelPath, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, | |||
uint32_t *modelId, void *workPtr, size_t workSize, | |||
void *weightPtr, size_t weightSize); | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithMem(const void *model, size_t modelSize, uint32_t *modelId, | |||
void *workPtr, size_t workSize, void *weightPtr, | |||
size_t weightSize); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -440,8 +437,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadFromFileWithQ(const char *modelPath, uint | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadFromMemWithQ(const void *model, size_t modelSize, uint32_t *modelId, | |||
const uint32_t *inputQ, size_t inputQNum, | |||
const uint32_t *outputQ, size_t outputQNum); | |||
const uint32_t *inputQ, size_t inputQNum, const uint32_t *outputQ, | |||
size_t outputQNum); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -471,8 +468,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlExecute(uint32_t modelId, const aclmdlDataset | |||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | |||
* aclmdlLoadFromMemWithMem | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, | |||
aclmdlDataset *output, aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclmdlExecuteAsync(uint32_t modelId, const aclmdlDataset *input, aclmdlDataset *output, | |||
aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -647,7 +644,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetCurOutputDims(const aclmdlDesc *modelDesc, | |||
* @param modelDesc [IN] model description | |||
* @param opName [IN] op name | |||
* @param attr [IN] attr name | |||
* | |||
* | |||
* @retval the attr value | |||
*/ | |||
ACL_FUNC_VISIBILITY const char *aclmdlGetOpAttr(aclmdlDesc *modelDesc, const char *opName, const char *attr); | |||
@@ -859,11 +856,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPInputFormat(aclmdlAIPP *aippParmsSet, | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, | |||
int16_t cscMatrixR0C0, int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, | |||
int16_t cscMatrixR1C0, int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, | |||
int16_t cscMatrixR2C0, int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, int8_t csc_switch, int16_t cscMatrixR0C0, | |||
int16_t cscMatrixR0C1, int16_t cscMatrixR0C2, int16_t cscMatrixR1C0, | |||
int16_t cscMatrixR1C1, int16_t cscMatrixR1C2, int16_t cscMatrixR2C0, | |||
int16_t cscMatrixR2C1, int16_t cscMatrixR2C2, | |||
uint8_t cscOutputBiasR0, uint8_t cscOutputBiasR1, | |||
uint8_t cscOutputBiasR2, uint8_t cscInputBiasR0, | |||
uint8_t cscInputBiasR1, uint8_t cscInputBiasR2); | |||
@@ -879,7 +876,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCscParams(aclmdlAIPP *aippParmsSet, in | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t rbuvSwapSwitch); | |||
/** | |||
@@ -893,7 +890,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPRbuvSwapSwitch(aclmdlAIPP *aippParmsSe | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, int8_t axSwapSwitch); | |||
/** | |||
@@ -908,7 +905,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPAxSwapSwitch(aclmdlAIPP *aippParmsSet, | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, int32_t srcImageSizeW, | |||
int32_t srcImageSizeH); | |||
@@ -928,14 +925,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPSrcImageSize(aclmdlAIPP *aippParmsSet, | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, | |||
int8_t scfSwitch, | |||
int32_t scfInputSizeW, | |||
int32_t scfInputSizeH, | |||
int32_t scfOutputSizeW, | |||
int32_t scfOutputSizeH, | |||
uint64_t batchIndex); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, int8_t scfSwitch, int32_t scfInputSizeW, | |||
int32_t scfInputSizeH, int32_t scfOutputSizeW, | |||
int32_t scfOutputSizeH, uint64_t batchIndex); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -953,13 +946,9 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPScfParams(aclmdlAIPP *aippParmsSet, | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, | |||
int8_t cropSwitch, | |||
int32_t cropStartPosW, | |||
int32_t cropStartPosH, | |||
int32_t cropSizeW, | |||
int32_t cropSizeH, | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, int8_t cropSwitch, int32_t cropStartPosW, | |||
int32_t cropStartPosH, int32_t cropSizeW, int32_t cropSizeH, | |||
uint64_t batchIndex); | |||
/** | |||
@@ -978,7 +967,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPCropParams(aclmdlAIPP *aippParmsSet, | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet, int8_t paddingSwitch, | |||
int32_t paddingSizeTop, int32_t paddingSizeBottom, | |||
int32_t paddingSizeLeft, int32_t paddingSizeRight, | |||
@@ -999,13 +988,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPaddingParams(aclmdlAIPP *aippParmsSet | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, | |||
int16_t dtcPixelMeanChn0, | |||
int16_t dtcPixelMeanChn1, | |||
int16_t dtcPixelMeanChn2, | |||
int16_t dtcPixelMeanChn3, | |||
uint64_t batchIndex); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, int16_t dtcPixelMeanChn0, | |||
int16_t dtcPixelMeanChn1, int16_t dtcPixelMeanChn2, | |||
int16_t dtcPixelMeanChn3, uint64_t batchIndex); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1022,13 +1008,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMean(aclmdlAIPP *aippParmsSet, | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, | |||
float dtcPixelMinChn0, | |||
float dtcPixelMinChn1, | |||
float dtcPixelMinChn2, | |||
float dtcPixelMinChn3, | |||
uint64_t batchIndex); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, float dtcPixelMinChn0, | |||
float dtcPixelMinChn1, float dtcPixelMinChn2, | |||
float dtcPixelMinChn3, uint64_t batchIndex); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1045,13 +1028,10 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPDtcPixelMin(aclmdlAIPP *aippParmsSet, | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, | |||
float dtcPixelVarReciChn0, | |||
float dtcPixelVarReciChn1, | |||
float dtcPixelVarReciChn2, | |||
float dtcPixelVarReciChn3, | |||
uint64_t batchIndex); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, float dtcPixelVarReciChn0, | |||
float dtcPixelVarReciChn1, float dtcPixelVarReciChn2, | |||
float dtcPixelVarReciChn3, uint64_t batchIndex); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1067,10 +1047,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPPixelVarReci(aclmdlAIPP *aippParmsSet, | |||
* | |||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | |||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, | |||
aclmdlDataset *dataset, | |||
size_t index, | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, aclmdlDataset *dataset, size_t index, | |||
const aclmdlAIPP *aippParmsSet); | |||
/** | |||
@@ -1087,10 +1065,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetInputAIPP(uint32_t modelId, | |||
* | |||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | |||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, | |||
aclmdlDataset *dataset, | |||
size_t index, | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, aclmdlDataset *dataset, size_t index, | |||
const aclmdlAIPP *aippParmsSet); | |||
/** | |||
@@ -1108,10 +1084,8 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetAIPPByInputIndex(uint32_t modelId, | |||
* | |||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | |||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | aclmdlCreateAIPP | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, | |||
size_t index, | |||
aclmdlInputAippType *type, | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, size_t index, aclmdlInputAippType *type, | |||
size_t *dynamicAttachedDataIndex); | |||
/** | |||
@@ -1128,7 +1102,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetAippType(uint32_t modelId, | |||
* | |||
* @see aclmdlLoadFromFile | aclmdlLoadFromMem | aclmdlLoadFromFileWithMem | | |||
* aclmdlLoadFromMemWithMem | aclmdlGetInputIndexByName | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t index, aclAippInfo *aippinfo); | |||
/** | |||
@@ -1147,10 +1121,11 @@ ACL_FUNC_VISIBILITY aclError aclmdlGetFirstAippInfo(uint32_t modelId, size_t ind | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, | |||
uint32_t taskId, char *opName, size_t opNameLen, aclTensorDesc **inputDesc, size_t *numInputs, | |||
aclTensorDesc **outputDesc, size_t *numOutputs); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_t streamId, uint32_t taskId, | |||
char *opName, size_t opNameLen, aclTensorDesc **inputDesc, | |||
size_t *numInputs, aclTensorDesc **outputDesc, | |||
size_t *numOutputs); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1158,7 +1133,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlCreateAndGetOpDesc(uint32_t deviceId, uint32_ | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | |||
/** | |||
@@ -1169,7 +1144,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlInitDump(); | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | |||
/** | |||
@@ -1178,7 +1153,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlSetDump(const char *dumpCfgPath); | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | |||
/** | |||
@@ -1190,7 +1165,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlFinalizeDump(); | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *handle, uint32_t *modelId); | |||
/** | |||
@@ -1200,7 +1175,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlLoadWithConfig(const aclmdlConfigHandle *hand | |||
* @retval the aclmdlConfigHandle pointer | |||
* | |||
* @see aclmdlDestroyConfigHandle | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY aclmdlConfigHandle *aclmdlCreateConfigHandle(); | |||
/** | |||
@@ -1229,7 +1204,7 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, | |||
const void *attrValue, size_t valueSize); | |||
const void *attrValue, size_t valueSize); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1247,4 +1222,4 @@ ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelD | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ | |||
#endif // INC_EXTERNAL_ACL_ACL_MODEL_H_ |
@@ -33,9 +33,9 @@ typedef void (*aclDataDeallocator)(void *data, size_t length); | |||
static const int ACL_COMPILE_FLAG_BIN_SELECTOR = 1; | |||
typedef enum aclEngineType { | |||
ACL_ENGINE_SYS, | |||
ACL_ENGINE_AICORE, | |||
ACL_ENGINE_VECTOR, | |||
ACL_ENGINE_SYS, | |||
ACL_ENGINE_AICORE, | |||
ACL_ENGINE_VECTOR, | |||
} aclopEngineType; | |||
/** | |||
@@ -148,7 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrString(aclopAttr *attr, const char *att | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *attrName, int numValues, | |||
const uint8_t *values); | |||
const uint8_t *values); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -163,7 +163,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListBool(aclopAttr *attr, const char *a | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *attrName, int numValues, | |||
const int64_t *values); | |||
const int64_t *values); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -178,7 +178,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListInt(aclopAttr *attr, const char *at | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char *attrName, int numValues, | |||
const float *values); | |||
const float *values); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -193,7 +193,7 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListFloat(aclopAttr *attr, const char * | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char *attrName, int numValues, | |||
const char **values); | |||
const char **values); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -208,11 +208,8 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListString(aclopAttr *attr, const char | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, | |||
const char *attrName, | |||
int numLists, | |||
const int *numValues, | |||
const int64_t *const values[]); | |||
ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, const char *attrName, int numLists, | |||
const int *numValues, const int64_t *const values[]); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -242,15 +239,10 @@ ACL_FUNC_VISIBILITY aclError aclopSetAttrListListInt(aclopAttr *attr, | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_DEPRECATED_MESSAGE("aclopExecute is deprecated, use aclopExecuteV2 instead") | |||
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, | |||
int numInputs, | |||
const aclTensorDesc *const inputDesc[], | |||
const aclDataBuffer *const inputs[], | |||
int numOutputs, | |||
const aclTensorDesc *const outputDesc[], | |||
aclDataBuffer *const outputs[], | |||
const aclopAttr *attr, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||
const aclDataBuffer *const inputs[], int numOutputs, | |||
const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||
const aclopAttr *attr, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -280,15 +272,9 @@ ACL_FUNC_VISIBILITY aclError aclopExecute(const char *opType, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, | |||
int numInputs, | |||
aclTensorDesc *inputDesc[], | |||
aclDataBuffer *inputs[], | |||
int numOutputs, | |||
aclTensorDesc *outputDesc[], | |||
aclDataBuffer *outputs[], | |||
aclopAttr *attr, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||
aclDataBuffer *outputs[], aclopAttr *attr, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -306,12 +292,9 @@ ACL_FUNC_VISIBILITY aclError aclopExecuteV2(const char *opType, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, | |||
int numInputs, | |||
const aclTensorDesc *const inputDesc[], | |||
int numOutputs, | |||
const aclTensorDesc *const outputDesc[], | |||
const aclopAttr *opAttr, | |||
ACL_FUNC_VISIBILITY aclError aclopCreateHandle(const char *opType, int numInputs, | |||
const aclTensorDesc *const inputDesc[], int numOutputs, | |||
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||
aclopHandle **handle); | |||
/** | |||
@@ -343,12 +326,9 @@ ACL_FUNC_VISIBILITY void aclopDestroyHandle(aclopHandle *handle); | |||
* | |||
* @see aclopCreateHandle | aclCreateDataBuffer | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, | |||
int numInputs, | |||
const aclDataBuffer *const inputs[], | |||
int numOutputs, | |||
aclDataBuffer *const outputs[], | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, int numInputs, | |||
const aclDataBuffer *const inputs[], int numOutputs, | |||
aclDataBuffer *const outputs[], aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -364,11 +344,8 @@ ACL_FUNC_VISIBILITY aclError aclopExecWithHandle(aclopHandle *handle, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, | |||
const aclDataBuffer *srcBuffer, | |||
const aclTensorDesc *dstDesc, | |||
aclDataBuffer *dstBuffer, | |||
uint8_t truncate, | |||
ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, const aclDataBuffer *srcBuffer, | |||
const aclTensorDesc *dstDesc, aclDataBuffer *dstBuffer, uint8_t truncate, | |||
aclrtStream stream); | |||
/** | |||
@@ -383,12 +360,9 @@ ACL_FUNC_VISIBILITY aclError aclopCast(const aclTensorDesc *srcDesc, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, | |||
aclTensorDesc *dstDesc, | |||
uint8_t truncate, | |||
ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, aclTensorDesc *dstDesc, uint8_t truncate, | |||
aclopHandle **handle); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create kernel | |||
@@ -407,15 +381,10 @@ ACL_FUNC_VISIBILITY aclError aclopCreateHandleForCast(aclTensorDesc *srcDesc, | |||
* | |||
* @see aclopCompile | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, | |||
const char *kernelId, | |||
const char *kernelName, | |||
void *binData, | |||
int binSize, | |||
aclopEngineType enginetype, | |||
ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, const char *kernelId, const char *kernelName, | |||
void *binData, int binSize, aclopEngineType enginetype, | |||
aclDataDeallocator deallocator); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create kernel | |||
@@ -430,11 +399,8 @@ ACL_FUNC_VISIBILITY aclError aclopCreateKernel(const char *opType, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
typedef aclError (*aclopCompileFunc)(int numInputs, | |||
const aclTensorDesc *const inputDesc[], | |||
int numOutputs, | |||
const aclTensorDesc *const outputDesc[], | |||
const aclopAttr *opAttr, | |||
typedef aclError (*aclopCompileFunc)(int numInputs, const aclTensorDesc *const inputDesc[], int numOutputs, | |||
const aclTensorDesc *const outputDesc[], const aclopAttr *opAttr, | |||
aclopKernelDesc *aclopKernelDesc); | |||
/** | |||
@@ -475,11 +441,8 @@ ACL_FUNC_VISIBILITY aclError aclopUnregisterCompileFunc(const char *opType); | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, | |||
const char *kernelId, | |||
uint32_t blockDim, | |||
const void *args, | |||
uint32_t argSize); | |||
ACL_FUNC_VISIBILITY aclError aclopSetKernelArgs(aclopKernelDesc *kernelDesc, const char *kernelId, uint32_t blockDim, | |||
const void *args, uint32_t argSize); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -510,12 +473,9 @@ ACL_FUNC_VISIBILITY aclError aclopSetKernelWorkspaceSizes(aclopKernelDesc *kerne | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, | |||
int numInputs, | |||
const aclTensorDesc *const inputDesc[], | |||
int numOutputs, | |||
const aclTensorDesc *const outputDesc[], | |||
const aclopAttr *attr); | |||
ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, int numInputs, | |||
const aclTensorDesc *const inputDesc[], int numOutputs, | |||
const aclTensorDesc *const outputDesc[], const aclopAttr *attr); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -533,17 +493,12 @@ ACL_FUNC_VISIBILITY aclError aclopUpdateParams(const char *opType, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, | |||
int numInputs, | |||
aclTensorDesc *inputDesc[], | |||
aclDataBuffer *inputs[], | |||
int numOutputs, | |||
aclTensorDesc *outputDesc[], | |||
ACL_FUNC_VISIBILITY aclError aclopInferShape(const char *opType, int numInputs, aclTensorDesc *inputDesc[], | |||
aclDataBuffer *inputs[], int numOutputs, aclTensorDesc *outputDesc[], | |||
aclopAttr *attr); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_OP_H_ | |||
#endif // INC_EXTERNAL_ACL_ACL_OP_H_ |
@@ -24,28 +24,22 @@ | |||
extern "C" { | |||
#endif | |||
typedef enum aclCompileType { | |||
ACL_COMPILE_SYS, | |||
ACL_COMPILE_UNREGISTERED | |||
} aclopCompileType; | |||
typedef enum aclCompileType { ACL_COMPILE_SYS, ACL_COMPILE_UNREGISTERED } aclopCompileType; | |||
typedef enum { | |||
ACL_PRECISION_MODE, | |||
ACL_AICORE_NUM, | |||
ACL_AUTO_TUNE_MODE, | |||
ACL_OP_SELECT_IMPL_MODE, | |||
ACL_OPTYPELIST_FOR_IMPLMODE, | |||
ACL_OP_DEBUG_LEVEL, | |||
ACL_DEBUG_DIR, | |||
ACL_OP_COMPILER_CACHE_MODE, | |||
ACL_OP_COMPILER_CACHE_DIR, | |||
ACL_OP_PERFORMANCE_MODE | |||
ACL_PRECISION_MODE, | |||
ACL_AICORE_NUM, | |||
ACL_AUTO_TUNE_MODE, | |||
ACL_OP_SELECT_IMPL_MODE, | |||
ACL_OPTYPELIST_FOR_IMPLMODE, | |||
ACL_OP_DEBUG_LEVEL, | |||
ACL_DEBUG_DIR, | |||
ACL_OP_COMPILER_CACHE_MODE, | |||
ACL_OP_COMPILER_CACHE_DIR, | |||
ACL_OP_PERFORMANCE_MODE | |||
} aclCompileOpt; | |||
typedef enum aclCompileFlag { | |||
ACL_OP_COMPILE_DEFAULT, | |||
ACL_OP_COMPILE_FUZZ | |||
} aclOpCompileFlag; | |||
typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag; | |||
/** | |||
* @ingroup AscendCL | |||
@@ -65,15 +59,10 @@ typedef enum aclCompileFlag { | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, | |||
int numInputs, | |||
const aclTensorDesc *const inputDesc[], | |||
int numOutputs, | |||
const aclTensorDesc *const outputDesc[], | |||
const aclopAttr *attr, | |||
aclopEngineType engineType, | |||
aclopCompileType compileFlag, | |||
const char *opPath); | |||
ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], | |||
int numOutputs, const aclTensorDesc *const outputDesc[], | |||
const aclopAttr *attr, aclopEngineType engineType, | |||
aclopCompileType compileFlag, const char *opPath); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -96,11 +85,10 @@ ACL_FUNC_VISIBILITY aclError aclopCompile(const char *opType, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(const char *opType, | |||
int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], | |||
const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag, | |||
const char *opPath, aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( | |||
const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[], | |||
int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, | |||
aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -130,4 +118,4 @@ ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag); | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ | |||
#endif // INC_EXTERNAL_ACL_ACL_OP_COMPILER_H_ |
@@ -23,32 +23,31 @@ | |||
extern "C" { | |||
#endif | |||
#define ACL_PROF_ACL_API 0x0001 | |||
#define ACL_PROF_TASK_TIME 0x0002 | |||
#define ACL_PROF_AICORE_METRICS 0x0004 | |||
#define ACL_PROF_AICPU 0x0008 | |||
#define ACL_PROF_ACL_API 0x0001 | |||
#define ACL_PROF_TASK_TIME 0x0002 | |||
#define ACL_PROF_AICORE_METRICS 0x0004 | |||
#define ACL_PROF_AICPU 0x0008 | |||
/** | |||
* @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead | |||
*/ | |||
#define ACL_PROF_MAX_OP_NAME_LEN 257 | |||
#define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||
#define ACL_PROF_MAX_OP_NAME_LEN 257 | |||
#define ACL_PROF_MAX_OP_TYPE_LEN 65 | |||
typedef enum { | |||
ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||
ACL_AICORE_PIPE_UTILIZATION = 1, | |||
ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||
ACL_AICORE_L0B_AND_WIDTH = 3, | |||
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||
ACL_AICORE_NONE = 0xFF | |||
ACL_AICORE_ARITHMETIC_UTILIZATION = 0, | |||
ACL_AICORE_PIPE_UTILIZATION = 1, | |||
ACL_AICORE_MEMORY_BANDWIDTH = 2, | |||
ACL_AICORE_L0B_AND_WIDTH = 3, | |||
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4, | |||
ACL_AICORE_NONE = 0xFF | |||
} aclprofAicoreMetrics; | |||
typedef enum { | |||
ACL_STEP_START = 0, // step start | |||
ACL_STEP_END = 1 // step end | |||
ACL_STEP_START = 0, // step start | |||
ACL_STEP_END = 1 // step end | |||
} aclprofStepTag; | |||
typedef struct aclprofConfig aclprofConfig; | |||
typedef struct aclprofStopConfig aclprofStopConfig; | |||
typedef struct aclprofAicoreEvents aclprofAicoreEvents; | |||
@@ -108,7 +107,8 @@ ACL_FUNC_VISIBILITY aclError aclprofStart(const aclprofConfig *profilerConfig); | |||
* @see aclprofDestroyConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, uint32_t deviceNums, | |||
aclprofAicoreMetrics aicoreMetrics, aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||
aclprofAicoreMetrics aicoreMetrics, | |||
aclprofAicoreEvents *aicoreEvents, uint64_t dataTypeConfig); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -148,8 +148,7 @@ ACL_FUNC_VISIBILITY aclError aclprofStop(const aclprofConfig *profilerConfig); | |||
* | |||
* @see aclprofModelUnSubscribe | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, | |||
const aclprofSubscribeConfig *profSubscribeConfig); | |||
ACL_FUNC_VISIBILITY aclError aclprofModelSubscribe(uint32_t modelId, const aclprofSubscribeConfig *profSubscribeConfig); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -177,7 +176,7 @@ ACL_FUNC_VISIBILITY aclError aclprofModelUnSubscribe(uint32_t modelId); | |||
* @see aclprofDestroySubscribeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclprofSubscribeConfig *aclprofCreateSubscribeConfig(int8_t timeInfoSwitch, | |||
aclprofAicoreMetrics aicoreMetrics, void *fd); | |||
aclprofAicoreMetrics aicoreMetrics, void *fd); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -229,7 +228,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNum(const void *opInfo, size_t opInfoLe | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opInfoLen, uint32_t index, | |||
size_t *opTypeLen); | |||
size_t *opTypeLen); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -244,8 +243,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpTypeLen(const void *opInfo, size_t opIn | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, | |||
char *opType, size_t opTypeLen); | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoLen, uint32_t index, char *opType, | |||
size_t opTypeLen); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -260,7 +259,7 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpType(const void *opInfo, size_t opInfoL | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opInfoLen, uint32_t index, | |||
size_t *opNameLen); | |||
size_t *opNameLen); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -275,8 +274,8 @@ ACL_FUNC_VISIBILITY aclError aclprofGetOpNameLen(const void *opInfo, size_t opIn | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, | |||
char *opName, size_t opNameLen); | |||
ACL_FUNC_VISIBILITY aclError aclprofGetOpName(const void *opInfo, size_t opInfoLen, uint32_t index, char *opName, | |||
size_t opNameLen); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -339,28 +338,28 @@ ACL_FUNC_VISIBILITY size_t aclprofGetModelId(const void *opInfo, size_t opInfoLe | |||
* | |||
* @retval 0 for failed | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo* stepInfo, aclprofStepTag tag, aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclprofGetStepTimestamp(aclprofStepInfo *stepInfo, aclprofStepTag tag, aclrtStream stream); | |||
/** | |||
/** | |||
* @ingroup AscendCL | |||
* @brief create pointer to aclprofStepInfo data | |||
* | |||
* | |||
* @retval aclprofStepInfo pointer | |||
*/ | |||
ACL_FUNC_VISIBILITY aclprofStepInfo* aclprofCreateStepInfo(); | |||
ACL_FUNC_VISIBILITY aclprofStepInfo *aclprofCreateStepInfo(); | |||
/** | |||
/** | |||
* @ingroup AscendCL | |||
* @brief destroy aclprofStepInfo pointer | |||
* | |||
* | |||
* @retval void | |||
*/ | |||
ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo* stepinfo); | |||
ACL_FUNC_VISIBILITY void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_PROF_H_ | |||
#endif // INC_EXTERNAL_ACL_PROF_H_ |
@@ -28,63 +28,63 @@ extern "C" { | |||
#define ACL_EVENT_TIME_LINE 0x00000008u | |||
typedef enum aclrtRunMode { | |||
ACL_DEVICE, | |||
ACL_HOST, | |||
ACL_DEVICE, | |||
ACL_HOST, | |||
} aclrtRunMode; | |||
typedef enum aclrtTsId { | |||
ACL_TS_ID_AICORE = 0, | |||
ACL_TS_ID_AIVECTOR = 1, | |||
ACL_TS_ID_RESERVED = 2, | |||
ACL_TS_ID_AICORE = 0, | |||
ACL_TS_ID_AIVECTOR = 1, | |||
ACL_TS_ID_RESERVED = 2, | |||
} aclrtTsId; | |||
typedef enum aclrtEventStatus { | |||
ACL_EVENT_STATUS_COMPLETE = 0, | |||
ACL_EVENT_STATUS_NOT_READY = 1, | |||
ACL_EVENT_STATUS_RESERVED = 2, | |||
ACL_EVENT_STATUS_COMPLETE = 0, | |||
ACL_EVENT_STATUS_NOT_READY = 1, | |||
ACL_EVENT_STATUS_RESERVED = 2, | |||
} aclrtEventStatus; | |||
typedef enum aclrtCallbackBlockType { | |||
ACL_CALLBACK_NO_BLOCK, | |||
ACL_CALLBACK_BLOCK, | |||
ACL_CALLBACK_NO_BLOCK, | |||
ACL_CALLBACK_BLOCK, | |||
} aclrtCallbackBlockType; | |||
typedef enum aclrtMemcpyKind { | |||
ACL_MEMCPY_HOST_TO_HOST, | |||
ACL_MEMCPY_HOST_TO_DEVICE, | |||
ACL_MEMCPY_DEVICE_TO_HOST, | |||
ACL_MEMCPY_DEVICE_TO_DEVICE, | |||
ACL_MEMCPY_HOST_TO_HOST, | |||
ACL_MEMCPY_HOST_TO_DEVICE, | |||
ACL_MEMCPY_DEVICE_TO_HOST, | |||
ACL_MEMCPY_DEVICE_TO_DEVICE, | |||
} aclrtMemcpyKind; | |||
typedef enum aclrtMemMallocPolicy { | |||
ACL_MEM_MALLOC_HUGE_FIRST, | |||
ACL_MEM_MALLOC_HUGE_ONLY, | |||
ACL_MEM_MALLOC_NORMAL_ONLY, | |||
ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||
ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||
ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||
ACL_MEM_MALLOC_HUGE_FIRST, | |||
ACL_MEM_MALLOC_HUGE_ONLY, | |||
ACL_MEM_MALLOC_NORMAL_ONLY, | |||
ACL_MEM_MALLOC_HUGE_FIRST_P2P, | |||
ACL_MEM_MALLOC_HUGE_ONLY_P2P, | |||
ACL_MEM_MALLOC_NORMAL_ONLY_P2P, | |||
} aclrtMemMallocPolicy; | |||
typedef enum aclrtMemAttr { | |||
ACL_DDR_MEM, | |||
ACL_HBM_MEM, | |||
ACL_DDR_MEM_HUGE, | |||
ACL_DDR_MEM_NORMAL, | |||
ACL_HBM_MEM_HUGE, | |||
ACL_HBM_MEM_NORMAL, | |||
ACL_DDR_MEM_P2P_HUGE, | |||
ACL_DDR_MEM_P2P_NORMAL, | |||
ACL_HBM_MEM_P2P_HUGE, | |||
ACL_HBM_MEM_P2P_NORMAL, | |||
ACL_DDR_MEM, | |||
ACL_HBM_MEM, | |||
ACL_DDR_MEM_HUGE, | |||
ACL_DDR_MEM_NORMAL, | |||
ACL_HBM_MEM_HUGE, | |||
ACL_HBM_MEM_NORMAL, | |||
ACL_DDR_MEM_P2P_HUGE, | |||
ACL_DDR_MEM_P2P_NORMAL, | |||
ACL_HBM_MEM_P2P_HUGE, | |||
ACL_HBM_MEM_P2P_NORMAL, | |||
} aclrtMemAttr; | |||
typedef enum aclrtGroupAttr { | |||
ACL_GROUP_AICORE_INT, | |||
ACL_GROUP_AIV_INT, | |||
ACL_GROUP_AIC_INT, | |||
ACL_GROUP_SDMANUM_INT, | |||
ACL_GROUP_ASQNUM_INT, | |||
ACL_GROUP_GROUPID_INT | |||
ACL_GROUP_AICORE_INT, | |||
ACL_GROUP_AIV_INT, | |||
ACL_GROUP_AIC_INT, | |||
ACL_GROUP_SDMANUM_INT, | |||
ACL_GROUP_ASQNUM_INT, | |||
ACL_GROUP_GROUPID_INT | |||
} aclrtGroupAttr; | |||
typedef struct tagRtGroupInfo aclrtGroupInfo; | |||
@@ -487,7 +487,7 @@ ACL_FUNC_VISIBILITY aclError aclrtRecordEvent(aclrtEvent event, aclrtStream stre | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream stream); | |||
/** | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Queries an event's status | |||
* | |||
@@ -549,9 +549,7 @@ ACL_FUNC_VISIBILITY aclError aclrtEventElapsedTime(float *ms, aclrtEvent start, | |||
* | |||
* @see aclrtFree | acldvppMalloc | aclrtMallocCached | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, | |||
size_t size, | |||
aclrtMemMallocPolicy policy); | |||
ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -574,9 +572,7 @@ ACL_FUNC_VISIBILITY aclError aclrtMalloc(void **devPtr, | |||
* | |||
* @see aclrtFree | aclrtMalloc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, | |||
size_t size, | |||
aclrtMemMallocPolicy policy); | |||
ACL_FUNC_VISIBILITY aclError aclrtMallocCached(void **devPtr, size_t size, aclrtMemMallocPolicy policy); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -667,10 +663,7 @@ ACL_FUNC_VISIBILITY aclError aclrtFreeHost(void *hostPtr); | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, | |||
size_t destMax, | |||
const void *src, | |||
size_t count, | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpy(void *dst, size_t destMax, const void *src, size_t count, | |||
aclrtMemcpyKind kind); | |||
/** | |||
@@ -717,38 +710,31 @@ ACL_FUNC_VISIBILITY aclError aclrtMemset(void *devPtr, size_t maxCount, int32_t | |||
* | |||
* @see aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, | |||
size_t destMax, | |||
const void *src, | |||
size_t count, | |||
aclrtMemcpyKind kind, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclrtMemcpyAsync(void *dst, size_t destMax, const void *src, size_t count, | |||
aclrtMemcpyKind kind, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief Asynchronous initialize memory | |||
* and set contents of memory to specified value async | |||
* | |||
* @par Function | |||
* @ingroup AscendCL | |||
* @brief Asynchronous initialize memory | |||
* and set contents of memory to specified value async | |||
* | |||
* @par Function | |||
* The memory to be initialized is on the Host or device side, | |||
* and the system determines whether | |||
* it is host or device according to the address | |||
* | |||
* @param devPtr [IN] destination address pointer | |||
* @param maxCount [IN] Max length of destination address memory | |||
* @param value [IN] set value | |||
* @param count [IN] the number of byte to set | |||
* @param stream [IN] asynchronized task stream | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, | |||
size_t maxCount, | |||
int32_t value, | |||
size_t count, | |||
* @param devPtr [IN] destination address pointer | |||
* @param maxCount [IN] Max length of destination address memory | |||
* @param value [IN] set value | |||
* @param count [IN] the number of byte to set | |||
* @param stream [IN] asynchronized task stream | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
* | |||
* @see aclrtSynchronizeStream | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtMemsetAsync(void *devPtr, size_t maxCount, int32_t value, size_t count, | |||
aclrtStream stream); | |||
/** | |||
@@ -894,11 +880,8 @@ ACL_FUNC_VISIBILITY aclError aclrtGetAllGroupInfo(aclrtGroupInfo *groupInfo); | |||
* | |||
* @see aclrtGetGroupCount | aclrtGetAllGroupInfo | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, | |||
int32_t groupIndex, | |||
aclrtGroupAttr attr, | |||
void *attrValue, | |||
size_t valueLen, | |||
ACL_FUNC_VISIBILITY aclError aclrtGetGroupInfoDetail(const aclrtGroupInfo *groupInfo, int32_t groupIndex, | |||
aclrtGroupAttr attr, void *attrValue, size_t valueLen, | |||
size_t *paramRetSize); | |||
/** | |||
@@ -972,5 +955,4 @@ ACL_FUNC_VISIBILITY aclError aclrtSetOpWaitTimeout(uint32_t timeout); | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_ACL_RT_H_ | |||
#endif // INC_EXTERNAL_ACL_ACL_RT_H_ |
@@ -24,10 +24,10 @@ extern "C" { | |||
#endif | |||
enum acltdtTensorType { | |||
ACL_TENSOR_DATA_UNDEFINED = -1, | |||
ACL_TENSOR_DATA_TENSOR, | |||
ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||
ACL_TENSOR_DATA_ABNORMAL | |||
ACL_TENSOR_DATA_UNDEFINED = -1, | |||
ACL_TENSOR_DATA_TENSOR, | |||
ACL_TENSOR_DATA_END_OF_SEQUENCE, | |||
ACL_TENSOR_DATA_ABNORMAL | |||
}; | |||
typedef struct acltdtDataItem acltdtDataItem; | |||
@@ -64,7 +64,7 @@ ACL_FUNC_VISIBILITY aclDataType acltdtGetDataTypeFromItem(const acltdtDataItem * | |||
* | |||
* @retval null for failed | |||
* @retval OtherValues success | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
@@ -75,7 +75,7 @@ ACL_FUNC_VISIBILITY void *acltdtGetDataAddrFromItem(const acltdtDataItem *dataIt | |||
* | |||
* @retval 0 for failed | |||
* @retval OtherValues success | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
@@ -86,7 +86,7 @@ ACL_FUNC_VISIBILITY size_t acltdtGetDataSizeFromItem(const acltdtDataItem *dataI | |||
* | |||
* @retval 0 for failed | |||
* @retval OtherValues success | |||
*/ | |||
*/ | |||
ACL_FUNC_VISIBILITY size_t acltdtGetDimNumFromItem(const acltdtDataItem *dataItem); | |||
/** | |||
@@ -118,12 +118,8 @@ ACL_FUNC_VISIBILITY aclError acltdtGetDimsFromItem(const acltdtDataItem *dataIte | |||
* | |||
* @see acltdtDestroyDataItem | |||
*/ | |||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, | |||
const int64_t *dims, | |||
size_t dimNum, | |||
aclDataType dataType, | |||
void *data, | |||
size_t size); | |||
ACL_FUNC_VISIBILITY acltdtDataItem *acltdtCreateDataItem(acltdtTensorType tdtType, const int64_t *dims, size_t dimNum, | |||
aclDataType dataType, void *data, size_t size); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -254,8 +250,7 @@ ACL_FUNC_VISIBILITY aclError acltdtDestroyChannel(acltdtChannelHandle *handle); | |||
* | |||
* @see acltdtReceiveTensor | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, | |||
const acltdtDataset *dataset, | |||
ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, const acltdtDataset *dataset, | |||
int32_t timeout); | |||
/** | |||
@@ -271,13 +266,11 @@ ACL_FUNC_VISIBILITY aclError acltdtSendTensor(const acltdtChannelHandle *handle, | |||
* | |||
* @see acltdtSendTensor | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, | |||
acltdtDataset *dataset, | |||
ACL_FUNC_VISIBILITY aclError acltdtReceiveTensor(const acltdtChannelHandle *handle, acltdtDataset *dataset, | |||
int32_t timeout); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif //INC_EXTERNAL_ACL_ACL_TDT_H_ | |||
#endif // INC_EXTERNAL_ACL_ACL_TDT_H_ |
@@ -23,87 +23,87 @@ | |||
extern "C" { | |||
#endif | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -23,17 +23,9 @@ | |||
extern "C" { | |||
#endif | |||
typedef enum aclTransType { | |||
ACL_TRANS_N, | |||
ACL_TRANS_T, | |||
ACL_TRANS_NZ, | |||
ACL_TRANS_NZ_T | |||
} aclTransType; | |||
typedef enum aclTransType { ACL_TRANS_N, ACL_TRANS_T, ACL_TRANS_NZ, ACL_TRANS_NZ_T } aclTransType; | |||
typedef enum aclComputeType { | |||
ACL_COMPUTE_HIGH_PRECISION, | |||
ACL_COMPUTE_LOW_PRECISION | |||
} aclComputeType; | |||
typedef enum aclComputeType { ACL_COMPUTE_HIGH_PRECISION, ACL_COMPUTE_LOW_PRECISION } aclComputeType; | |||
/** | |||
* @ingroup AscendCL | |||
@@ -61,12 +53,11 @@ typedef enum aclComputeType { | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, | |||
const void *alpha, const void *a, int lda, aclDataType dataTypeA, | |||
const void *x, int incx, aclDataType dataTypeX, | |||
const void *beta, void *y, int incy, aclDataType dataTypeY, | |||
aclComputeType type, aclrtStream stream); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, const void *alpha, const void *a, int lda, | |||
aclDataType dataTypeA, const void *x, int incx, aclDataType dataTypeX, | |||
const void *beta, void *y, int incy, aclDataType dataTypeY, | |||
aclComputeType type, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -83,15 +74,10 @@ ACL_FUNC_VISIBILITY aclError aclblasGemvEx(aclTransType transA, int m, int n, | |||
* | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, | |||
int m, | |||
int n, | |||
aclDataType dataTypeA, | |||
aclDataType dataTypeX, | |||
aclDataType dataTypeY, | |||
aclComputeType type, | |||
aclopHandle **handle); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, int m, int n, aclDataType dataTypeA, | |||
aclDataType dataTypeX, aclDataType dataTypeY, | |||
aclComputeType type, aclopHandle **handle); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -115,18 +101,9 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemvEx(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, | |||
int m, | |||
int n, | |||
const aclFloat16 *alpha, | |||
const aclFloat16 *a, | |||
int lda, | |||
const aclFloat16 *x, | |||
int incx, | |||
const aclFloat16 *beta, | |||
aclFloat16 *y, | |||
int incy, | |||
aclComputeType type, | |||
ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, int m, int n, const aclFloat16 *alpha, | |||
const aclFloat16 *a, int lda, const aclFloat16 *x, int incx, | |||
const aclFloat16 *beta, aclFloat16 *y, int incy, aclComputeType type, | |||
aclrtStream stream); | |||
/** | |||
@@ -142,10 +119,7 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemv(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, | |||
int m, | |||
int n, | |||
aclComputeType type, | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, int m, int n, aclComputeType type, | |||
aclopHandle **handle); | |||
/** | |||
@@ -171,19 +145,9 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemv(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, | |||
int m, | |||
int n, | |||
const int32_t *alpha, | |||
const int8_t *a, | |||
int lda, | |||
const int8_t *x, | |||
int incx, | |||
const int32_t *beta, | |||
int32_t *y, | |||
int incy, | |||
aclComputeType type, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, int m, int n, const int32_t *alpha, const int8_t *a, | |||
int lda, const int8_t *x, int incx, const int32_t *beta, int32_t *y, | |||
int incy, aclComputeType type, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -198,10 +162,7 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemv(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, | |||
int m, | |||
int n, | |||
aclComputeType type, | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, int m, int n, aclComputeType type, | |||
aclopHandle **handle); | |||
/** | |||
@@ -233,26 +194,11 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemv(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, | |||
aclTransType transB, | |||
aclTransType transC, | |||
int m, | |||
int n, | |||
int k, | |||
const void *alpha, | |||
const void *matrixA, | |||
int lda, | |||
aclDataType dataTypeA, | |||
const void *matrixB, | |||
int ldb, | |||
aclDataType dataTypeB, | |||
const void *beta, | |||
void *matrixC, | |||
int ldc, | |||
aclDataType dataTypeC, | |||
aclComputeType type, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||
int k, const void *alpha, const void *matrixA, int lda, | |||
aclDataType dataTypeA, const void *matrixB, int ldb, aclDataType dataTypeB, | |||
const void *beta, void *matrixC, int ldc, aclDataType dataTypeC, | |||
aclComputeType type, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -274,18 +220,10 @@ ACL_FUNC_VISIBILITY aclError aclblasGemmEx(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, | |||
aclTransType transB, | |||
aclTransType transC, | |||
int m, | |||
int n, | |||
int k, | |||
aclDataType dataTypeA, | |||
aclDataType dataTypeB, | |||
aclDataType dataTypeC, | |||
aclComputeType type, | |||
aclopHandle **handle); | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, aclTransType transB, aclTransType transC, | |||
int m, int n, int k, aclDataType dataTypeA, | |||
aclDataType dataTypeB, aclDataType dataTypeC, | |||
aclComputeType type, aclopHandle **handle); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -313,22 +251,10 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForGemmEx(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, | |||
aclTransType transB, | |||
aclTransType transC, | |||
int m, | |||
int n, | |||
int k, | |||
const aclFloat16 *alpha, | |||
const aclFloat16 *matrixA, | |||
int lda, | |||
const aclFloat16 *matrixB, | |||
int ldb, | |||
const aclFloat16 *beta, | |||
aclFloat16 *matrixC, | |||
int ldc, | |||
aclComputeType type, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||
int k, const aclFloat16 *alpha, const aclFloat16 *matrixA, int lda, | |||
const aclFloat16 *matrixB, int ldb, const aclFloat16 *beta, | |||
aclFloat16 *matrixC, int ldc, aclComputeType type, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -346,13 +272,8 @@ ACL_FUNC_VISIBILITY aclError aclblasHgemm(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, | |||
aclTransType transB, | |||
aclTransType transC, | |||
int m, | |||
int n, | |||
int k, | |||
aclComputeType type, | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||
int m, int n, int k, aclComputeType type, | |||
aclopHandle **handle); | |||
/** | |||
@@ -381,23 +302,10 @@ ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForHgemm(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, | |||
aclTransType transB, | |||
aclTransType transC, | |||
int m, | |||
int n, | |||
int k, | |||
const int32_t *alpha, | |||
const int8_t *matrixA, | |||
int lda, | |||
const int8_t *matrixB, | |||
int ldb, | |||
const int32_t *beta, | |||
int32_t *matrixC, | |||
int ldc, | |||
aclComputeType type, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, int m, int n, | |||
int k, const int32_t *alpha, const int8_t *matrixA, int lda, | |||
const int8_t *matrixB, int ldb, const int32_t *beta, int32_t *matrixC, | |||
int ldc, aclComputeType type, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -415,17 +323,12 @@ ACL_FUNC_VISIBILITY aclError aclblasS8gemm(aclTransType transA, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, | |||
aclTransType transB, | |||
aclTransType transC, | |||
int m, | |||
int n, | |||
int k, | |||
aclComputeType type, | |||
ACL_FUNC_VISIBILITY aclError aclblasCreateHandleForS8gemm(aclTransType transA, aclTransType transB, aclTransType transC, | |||
int m, int n, int k, aclComputeType type, | |||
aclopHandle **handle); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ | |||
#endif // INC_EXTERNAL_ACL_OPS_ACL_CBLAS_H_ |
@@ -53,123 +53,109 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output | |||
// Supported Pixel Format | |||
enum acldvppPixelFormat { | |||
PIXEL_FORMAT_YUV_400 = 0, // 0 | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 | |||
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 | |||
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 | |||
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 | |||
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 | |||
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 | |||
PIXEL_FORMAT_RGB_888 = 12, // 12 | |||
PIXEL_FORMAT_BGR_888 = 13, // 13 | |||
PIXEL_FORMAT_ARGB_8888 = 14, // 14 | |||
PIXEL_FORMAT_ABGR_8888 = 15, // 15 | |||
PIXEL_FORMAT_RGBA_8888 = 16, // 16 | |||
PIXEL_FORMAT_BGRA_8888 = 17, // 17 | |||
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 | |||
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 | |||
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 | |||
PIXEL_FORMAT_YVU_PLANAR_422, | |||
PIXEL_FORMAT_YVU_PLANAR_444, | |||
PIXEL_FORMAT_RGB_444 = 23, | |||
PIXEL_FORMAT_BGR_444, | |||
PIXEL_FORMAT_ARGB_4444, | |||
PIXEL_FORMAT_ABGR_4444, | |||
PIXEL_FORMAT_RGBA_4444, | |||
PIXEL_FORMAT_BGRA_4444, | |||
PIXEL_FORMAT_RGB_555, | |||
PIXEL_FORMAT_BGR_555, | |||
PIXEL_FORMAT_RGB_565, | |||
PIXEL_FORMAT_BGR_565, | |||
PIXEL_FORMAT_ARGB_1555, | |||
PIXEL_FORMAT_ABGR_1555, | |||
PIXEL_FORMAT_RGBA_1555, | |||
PIXEL_FORMAT_BGRA_1555, | |||
PIXEL_FORMAT_ARGB_8565, | |||
PIXEL_FORMAT_ABGR_8565, | |||
PIXEL_FORMAT_RGBA_8565, | |||
PIXEL_FORMAT_BGRA_8565, | |||
PIXEL_FORMAT_RGB_BAYER_8BPP = 50, | |||
PIXEL_FORMAT_RGB_BAYER_10BPP, | |||
PIXEL_FORMAT_RGB_BAYER_12BPP, | |||
PIXEL_FORMAT_RGB_BAYER_14BPP, | |||
PIXEL_FORMAT_RGB_BAYER_16BPP, | |||
PIXEL_FORMAT_BGR_888_PLANAR = 70, | |||
PIXEL_FORMAT_HSV_888_PACKAGE, | |||
PIXEL_FORMAT_HSV_888_PLANAR, | |||
PIXEL_FORMAT_LAB_888_PACKAGE, | |||
PIXEL_FORMAT_LAB_888_PLANAR, | |||
PIXEL_FORMAT_S8C1, | |||
PIXEL_FORMAT_S8C2_PACKAGE, | |||
PIXEL_FORMAT_S8C2_PLANAR, | |||
PIXEL_FORMAT_S16C1, | |||
PIXEL_FORMAT_U8C1, | |||
PIXEL_FORMAT_U16C1, | |||
PIXEL_FORMAT_S32C1, | |||
PIXEL_FORMAT_U32C1, | |||
PIXEL_FORMAT_U64C1, | |||
PIXEL_FORMAT_S64C1, | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_440, | |||
PIXEL_FORMAT_FLOAT32, | |||
PIXEL_FORMAT_BUTT, | |||
PIXEL_FORMAT_UNKNOWN = 10000 | |||
PIXEL_FORMAT_YUV_400 = 0, // 0 | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 | |||
PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 | |||
PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 | |||
PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 | |||
PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 | |||
PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 | |||
PIXEL_FORMAT_RGB_888 = 12, // 12 | |||
PIXEL_FORMAT_BGR_888 = 13, // 13 | |||
PIXEL_FORMAT_ARGB_8888 = 14, // 14 | |||
PIXEL_FORMAT_ABGR_8888 = 15, // 15 | |||
PIXEL_FORMAT_RGBA_8888 = 16, // 16 | |||
PIXEL_FORMAT_BGRA_8888 = 17, // 17 | |||
PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 | |||
PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 | |||
PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 | |||
PIXEL_FORMAT_YVU_PLANAR_422, | |||
PIXEL_FORMAT_YVU_PLANAR_444, | |||
PIXEL_FORMAT_RGB_444 = 23, | |||
PIXEL_FORMAT_BGR_444, | |||
PIXEL_FORMAT_ARGB_4444, | |||
PIXEL_FORMAT_ABGR_4444, | |||
PIXEL_FORMAT_RGBA_4444, | |||
PIXEL_FORMAT_BGRA_4444, | |||
PIXEL_FORMAT_RGB_555, | |||
PIXEL_FORMAT_BGR_555, | |||
PIXEL_FORMAT_RGB_565, | |||
PIXEL_FORMAT_BGR_565, | |||
PIXEL_FORMAT_ARGB_1555, | |||
PIXEL_FORMAT_ABGR_1555, | |||
PIXEL_FORMAT_RGBA_1555, | |||
PIXEL_FORMAT_BGRA_1555, | |||
PIXEL_FORMAT_ARGB_8565, | |||
PIXEL_FORMAT_ABGR_8565, | |||
PIXEL_FORMAT_RGBA_8565, | |||
PIXEL_FORMAT_BGRA_8565, | |||
PIXEL_FORMAT_RGB_BAYER_8BPP = 50, | |||
PIXEL_FORMAT_RGB_BAYER_10BPP, | |||
PIXEL_FORMAT_RGB_BAYER_12BPP, | |||
PIXEL_FORMAT_RGB_BAYER_14BPP, | |||
PIXEL_FORMAT_RGB_BAYER_16BPP, | |||
PIXEL_FORMAT_BGR_888_PLANAR = 70, | |||
PIXEL_FORMAT_HSV_888_PACKAGE, | |||
PIXEL_FORMAT_HSV_888_PLANAR, | |||
PIXEL_FORMAT_LAB_888_PACKAGE, | |||
PIXEL_FORMAT_LAB_888_PLANAR, | |||
PIXEL_FORMAT_S8C1, | |||
PIXEL_FORMAT_S8C2_PACKAGE, | |||
PIXEL_FORMAT_S8C2_PLANAR, | |||
PIXEL_FORMAT_S16C1, | |||
PIXEL_FORMAT_U8C1, | |||
PIXEL_FORMAT_U16C1, | |||
PIXEL_FORMAT_S32C1, | |||
PIXEL_FORMAT_U32C1, | |||
PIXEL_FORMAT_U64C1, | |||
PIXEL_FORMAT_S64C1, | |||
PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, | |||
PIXEL_FORMAT_YVU_SEMIPLANAR_440, | |||
PIXEL_FORMAT_FLOAT32, | |||
PIXEL_FORMAT_BUTT, | |||
PIXEL_FORMAT_UNKNOWN = 10000 | |||
}; | |||
// Stream Format | |||
enum acldvppStreamFormat { | |||
H265_MAIN_LEVEL = 0, | |||
H264_BASELINE_LEVEL, | |||
H264_MAIN_LEVEL, | |||
H264_HIGH_LEVEL | |||
}; | |||
enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; | |||
// Supported Channel Mode | |||
enum acldvppChannelMode { | |||
DVPP_CHNMODE_VPC = 1, | |||
DVPP_CHNMODE_JPEGD = 2, | |||
DVPP_CHNMODE_JPEGE = 4 | |||
}; | |||
enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; | |||
// Supported Border Type | |||
enum acldvppBorderType { | |||
BORDER_CONSTANT = 0, | |||
BORDER_REPLICATE, | |||
BORDER_REFLECT, | |||
BORDER_REFLECT_101 | |||
}; | |||
enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; | |||
// Venc parameter type | |||
enum aclvencChannelDescParamType { | |||
ACL_VENC_THREAD_ID_UINT64 = 0, | |||
ACL_VENC_CALLBACK_PTR, | |||
ACL_VENC_PIXEL_FORMAT_UINT32, | |||
ACL_VENC_ENCODE_TYPE_UINT32, | |||
ACL_VENC_PIC_WIDTH_UINT32, | |||
ACL_VENC_PIC_HEIGHT_UINT32, | |||
ACL_VENC_KEY_FRAME_INTERVAL_UINT32, | |||
ACL_VENC_BUF_ADDR_PTR, | |||
ACL_VENC_BUF_SIZE_UINT32, | |||
ACL_VENC_RC_MODE_UINT32, | |||
ACL_VENC_SRC_RATE_UINT32, | |||
ACL_VENC_MAX_BITRATE_UINT32, | |||
ACL_VENC_MAX_IP_PROP_UINT32 | |||
ACL_VENC_THREAD_ID_UINT64 = 0, | |||
ACL_VENC_CALLBACK_PTR, | |||
ACL_VENC_PIXEL_FORMAT_UINT32, | |||
ACL_VENC_ENCODE_TYPE_UINT32, | |||
ACL_VENC_PIC_WIDTH_UINT32, | |||
ACL_VENC_PIC_HEIGHT_UINT32, | |||
ACL_VENC_KEY_FRAME_INTERVAL_UINT32, | |||
ACL_VENC_BUF_ADDR_PTR, | |||
ACL_VENC_BUF_SIZE_UINT32, | |||
ACL_VENC_RC_MODE_UINT32, | |||
ACL_VENC_SRC_RATE_UINT32, | |||
ACL_VENC_MAX_BITRATE_UINT32, | |||
ACL_VENC_MAX_IP_PROP_UINT32 | |||
}; | |||
// Jpeg picture format | |||
enum acldvppJpegFormat { | |||
ACL_JPEG_CSS_444 = 0, | |||
ACL_JPEG_CSS_422, | |||
ACL_JPEG_CSS_420, | |||
ACL_JPEG_CSS_GRAY, | |||
ACL_JPEG_CSS_440, | |||
ACL_JPEG_CSS_411, | |||
ACL_JPEG_CSS_UNKNOWN = 1000 | |||
ACL_JPEG_CSS_444 = 0, | |||
ACL_JPEG_CSS_422, | |||
ACL_JPEG_CSS_420, | |||
ACL_JPEG_CSS_GRAY, | |||
ACL_JPEG_CSS_440, | |||
ACL_JPEG_CSS_411, | |||
ACL_JPEG_CSS_UNKNOWN = 1000 | |||
}; | |||
/** | |||
@@ -523,9 +509,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD | |||
* @retval null for failed. | |||
* @retval other success | |||
*/ | |||
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, | |||
uint32_t right, | |||
uint32_t top, | |||
ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, | |||
uint32_t bottom); | |||
/** | |||
@@ -604,10 +588,7 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, | |||
uint32_t left, | |||
uint32_t right, | |||
uint32_t top, | |||
ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, | |||
uint32_t bottom); | |||
/** | |||
@@ -1096,7 +1077,8 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc | |||
* @retval ACL_SUCCESS for success, other for failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, | |||
aclvencChannelDescParamType paramType, size_t length, const void *param); | |||
aclvencChannelDescParamType paramType, size_t length, | |||
const void *param); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1245,7 +1227,8 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne | |||
* @retval ACL_SUCCESS for success, other for failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, | |||
aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param); | |||
aclvencChannelDescParamType paramType, size_t length, | |||
size_t *paramRetSize, void *param); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1545,10 +1528,7 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, | |||
uint32_t size, | |||
uint32_t *width, | |||
uint32_t *height, | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, | |||
int32_t *components); | |||
/** | |||
@@ -1565,11 +1545,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, | |||
uint32_t size, | |||
uint32_t *width, | |||
uint32_t *height, | |||
int32_t *components, | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width, | |||
uint32_t *height, int32_t *components, | |||
acldvppJpegFormat *format); | |||
/** | |||
@@ -1584,8 +1561,7 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, | |||
const acldvppJpegeConfig *config, | |||
uint32_t *size); | |||
const acldvppJpegeConfig *config, uint32_t *size); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1599,10 +1575,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, | |||
uint32_t dataSize, | |||
acldvppPixelFormat outputPixelFormat, | |||
uint32_t *decSize); | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, | |||
acldvppPixelFormat outputPixelFormat, uint32_t *decSize); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1617,11 +1591,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, | |||
uint32_t dataSize, | |||
uint32_t *width, | |||
uint32_t *height, | |||
int32_t *components); | |||
ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, | |||
uint32_t *height, int32_t *components); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1635,10 +1606,8 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, | |||
uint32_t dataSize, | |||
acldvppPixelFormat outputPixelFormat, | |||
uint32_t *decSize); | |||
ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, | |||
acldvppPixelFormat outputPixelFormat, uint32_t *decSize); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1702,10 +1671,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | |||
* | acldvppCreateResizeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
acldvppResizeConfig *resizeConfig, | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, | |||
aclrtStream stream); | |||
/** | |||
@@ -1741,10 +1708,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
acldvppRoiConfig *cropArea, | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, | |||
aclrtStream stream); | |||
/** | |||
@@ -1781,13 +1746,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
acldvppRoiConfig *cropArea, | |||
acldvppResizeConfig *resizeConfig, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, | |||
acldvppResizeConfig *resizeConfig, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1811,12 +1772,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *chann | |||
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, | |||
acldvppBatchPicDesc *srcBatchPicDescs, | |||
uint32_t *roiNums, | |||
uint32_t size, | |||
acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppRoiConfig *cropAreas[], | |||
aclrtStream stream); | |||
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, | |||
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppRoiConfig *cropAreas[], aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1841,13 +1799,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe | |||
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc, | |||
acldvppBatchPicDesc *srcBatchPicDescs, | |||
uint32_t *roiNums, | |||
uint32_t size, | |||
acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, | |||
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppRoiConfig *cropAreas[], | |||
acldvppResizeConfig *resizeConfig, | |||
aclrtStream stream); | |||
acldvppResizeConfig *resizeConfig, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1870,12 +1825,9 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc * | |||
* | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
acldvppRoiConfig *cropArea, | |||
acldvppRoiConfig *pasteArea, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, | |||
acldvppRoiConfig *pasteArea, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1899,13 +1851,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha | |||
* | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
acldvppRoiConfig *cropArea, | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, | |||
acldvppRoiConfig *pasteArea, | |||
acldvppResizeConfig *resizeConfig, | |||
aclrtStream stream); | |||
acldvppResizeConfig *resizeConfig, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1930,14 +1879,11 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc * | |||
* | |||
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, | |||
acldvppBatchPicDesc *srcBatchPicDescs, | |||
uint32_t *roiNums, | |||
uint32_t size, | |||
acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppRoiConfig *cropAreas[], | |||
acldvppRoiConfig *pasteAreas[], | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, | |||
acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, | |||
uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppRoiConfig *cropAreas[], | |||
acldvppRoiConfig *pasteAreas[], aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -1963,16 +1909,10 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc * | |||
* | |||
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelDesc *channelDesc, | |||
acldvppBatchPicDesc *srcBatchPicDescs, | |||
uint32_t *roiNums, | |||
uint32_t size, | |||
acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppRoiConfig *cropAreas[], | |||
acldvppRoiConfig *pasteAreas[], | |||
acldvppResizeConfig *resizeConfig, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( | |||
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, | |||
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[], | |||
acldvppResizeConfig *resizeConfig, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2000,11 +1940,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync(acldvppChannelD | |||
* | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, | |||
const void *data, | |||
uint32_t size, | |||
acldvppPicDesc *outputDesc, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, | |||
acldvppPicDesc *outputDesc, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2022,11 +1959,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD | |||
* | |||
* @see acldvppCreateChannel | acldvppCreateJpegeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
const void *data, | |||
uint32_t *size, | |||
acldvppJpegeConfig *config, | |||
ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
const void *data, uint32_t *size, acldvppJpegeConfig *config, | |||
aclrtStream stream); | |||
/** | |||
@@ -2044,11 +1978,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD | |||
* | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, | |||
const void *data, | |||
uint32_t size, | |||
acldvppPicDesc *outputDesc, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, | |||
acldvppPicDesc *outputDesc, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2103,11 +2034,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe | |||
* | |||
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, | |||
acldvppStreamDesc *input, | |||
acldvppPicDesc *output, | |||
aclvdecFrameConfig *config, | |||
void *userData); | |||
ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, | |||
acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2126,10 +2054,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, | |||
* | |||
* @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, | |||
acldvppStreamDesc *input, | |||
aclvdecFrameConfig *config, | |||
void *userData); | |||
ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, | |||
aclvdecFrameConfig *config, void *userData); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2150,10 +2076,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel | |||
* | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2175,11 +2099,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha | |||
* | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
void *reserve, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2191,8 +2112,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, | |||
uint32_t mode); | |||
ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2227,8 +2147,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, | |||
uint32_t outMode); | |||
ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2325,9 +2244,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); | |||
* @retval ACL_SUCCESS The function is successfully executed. | |||
* @retval OtherValues Failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, | |||
uint32_t dim, | |||
uint8_t **data, | |||
ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, | |||
uint32_t *len); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2345,10 +2262,8 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, | |||
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, | |||
const acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
const acldvppLutMap *lutMap, | |||
aclrtStream stream); | |||
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, | |||
const acldvppLutMap *lutMap, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2369,8 +2284,7 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); | |||
* | |||
* @retval ACL_SUCCESS for success, other for failure | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, | |||
uint32_t index, | |||
ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, | |||
double value); | |||
/** | |||
@@ -2515,10 +2429,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor | |||
* @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, | |||
const acldvppPicDesc *inputDesc, | |||
acldvppPicDesc *outputDesc, | |||
const acldvppBorderConfig *borderConfig, | |||
aclrtStream stream); | |||
const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, | |||
const acldvppBorderConfig *borderConfig, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2535,11 +2447,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc | |||
* | |||
* @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, | |||
acldvppPicDesc *srcPicDesc, | |||
acldvppHist *hist, | |||
void *reserve, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, | |||
acldvppHist *hist, void *reserve, aclrtStream stream); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2548,7 +2457,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel | |||
* @retval null for failed. | |||
* @retval OtherValues success. | |||
*/ | |||
ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist(); | |||
ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2605,7 +2514,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, | |||
* | |||
* @see acldvppCreateHist | acldvppVpcCalcHistAsync | |||
*/ | |||
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); | |||
ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -2624,7 +2533,6 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); | |||
/** | |||
* @ingroup AscendCL | |||
* @brief dvpp vpc batch crop, resize config and make border. | |||
@@ -2648,18 +2556,13 @@ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); | |||
* | |||
* @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig | |||
*/ | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync(acldvppChannelDesc *channelDesc, | |||
acldvppBatchPicDesc *srcBatchPicDescs, | |||
uint32_t *roiNums, | |||
uint32_t size, | |||
acldvppBatchPicDesc *dstBatchPicDescs, | |||
acldvppRoiConfig *cropAreas[], | |||
acldvppBorderConfig *borderCfgs[], | |||
acldvppResizeConfig *resizeConfig, | |||
aclrtStream stream); | |||
ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync( | |||
acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, | |||
acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], | |||
acldvppResizeConfig *resizeConfig, aclrtStream stream); | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ | |||
#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ |
@@ -32,8 +32,8 @@ typedef struct aclfvSearchResult aclfvSearchResult; | |||
// search operation type | |||
enum aclfvSearchType { | |||
SEARCH_1_N, // 1:N operation type | |||
SEARCH_N_M // N:M operation type | |||
SEARCH_1_N, // 1:N operation type | |||
SEARCH_N_M // N:M operation type | |||
}; | |||
/** | |||
@@ -104,7 +104,8 @@ ACL_FUNC_VISIBILITY aclError aclfvSetNMTopNum(aclfvInitPara *initPara, uint32_t | |||
* @retval OtherValues success. | |||
*/ | |||
ACL_FUNC_VISIBILITY aclfvFeatureInfo *aclfvCreateFeatureInfo(uint32_t id0, uint32_t id1, uint32_t offset, | |||
uint32_t featureLen, uint32_t featureCount, uint8_t *featureData, uint32_t featureDataLen); | |||
uint32_t featureLen, uint32_t featureCount, | |||
uint8_t *featureData, uint32_t featureDataLen); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -233,8 +234,9 @@ ACL_FUNC_VISIBILITY aclError aclfvDestroySearchInput(aclfvSearchInput *searchInp | |||
* @retval null for failed. OtherValues success | |||
*/ | |||
ACL_FUNC_VISIBILITY aclfvSearchResult *aclfvCreateSearchResult(uint32_t queryCnt, uint32_t *resultNum, | |||
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, uint32_t *resultOffset, float *resultDistance, | |||
uint32_t dataLen); | |||
uint32_t resultNumDataLen, uint32_t *id0, uint32_t *id1, | |||
uint32_t *resultOffset, float *resultDistance, | |||
uint32_t dataLen); | |||
/** | |||
* @ingroup AscendCL | |||
@@ -343,4 +345,4 @@ ACL_FUNC_VISIBILITY aclError aclfvSearch(aclfvSearchType type, aclfvSearchInput | |||
} | |||
#endif | |||
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ | |||
#endif // INC_EXTERNAL_ACL_OPS_ACL_RETR_H_ |
@@ -142,7 +142,7 @@ class GE_FUNC_VISIBILITY Session { | |||
/// | |||
Status BuildGraph(uint32_t graphId, const std::vector<InputTensorInfo> &inputs); | |||
Status BuildGraph(uint32_t graphId, const std::vector<ge::Tensor> &inputs); /*lint !e148*/ | |||
Status BuildGraph(uint32_t graphId, const std::vector<ge::Tensor> &inputs); /*lint !e148*/ | |||
/// | |||
/// @ingroup ge_graph | |||
@@ -27,7 +27,7 @@ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#endif // __cplusplus | |||
/** | |||
* @brief Initialize HCCL. | |||
@@ -66,14 +66,15 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param count An integer(u64) identifying the number of the output data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||
* float32. | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||
HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief Broadcast operator. | |||
@@ -84,10 +85,10 @@ HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
* @param root An integer(u32) identifying the the root rank in the operator. | |||
* @param comm A pointer identifying the communication resource based on | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||
aclrtStream stream); | |||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief ReduceScatter operator. | |||
@@ -99,10 +100,10 @@ aclrtStream stream); | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief AllGather operator. | |||
@@ -113,16 +114,16 @@ HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, | |||
HcclComm comm, aclrtStream stream); | |||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief Get the rank size of this comm. | |||
* | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param rankSize A pointer identifying the rank size. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); | |||
@@ -131,7 +132,7 @@ extern HcclResult HcclGetRankSize(HcclComm comm, uint32_t *rankSize); | |||
* | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param rankSize A pointer identifying the rank id. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); | |||
/** | |||
@@ -139,7 +140,7 @@ extern HcclResult HcclGetRankId(HcclComm comm, uint32_t *rank); | |||
* | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclBarrier(HcclComm comm, aclrtStream stream); | |||
@@ -154,5 +155,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_H_ | |||
#endif // __cplusplus | |||
#endif // HCCL_H_ |
@@ -16,10 +16,10 @@ | |||
/** | |||
* @file hccl_types.h | |||
* @brief HCCL data type definition | |||
* | |||
* @brief HCCL data type definition | |||
* | |||
*/ | |||
#ifndef HCCL_TYPES_H_ | |||
#define HCCL_TYPES_H_ | |||
@@ -27,33 +27,33 @@ | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
#endif // __cplusplus | |||
/** | |||
* @brief HCCL functions return value definition | |||
*/ | |||
typedef enum { | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
} HcclResult; | |||
/** | |||
@@ -65,37 +65,37 @@ typedef void *HcclComm; | |||
* @brief HCCL Reduction opperation | |||
*/ | |||
typedef enum { | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
} HcclReduceOp; | |||
/** | |||
* @brief HCCL data type | |||
*/ | |||
typedef enum { | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
} HcclDataType; | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
/** | |||
* @brief HCCL root info | |||
*/ | |||
typedef struct HcclRootInfoDef { | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
} HcclRootInfo; | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ |
@@ -23,87 +23,87 @@ | |||
extern "C" { | |||
#endif | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_RT_SUCCESS = 0; // success | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid | |||
static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id | |||
static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null | |||
static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context | |||
static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context | |||
static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid | |||
static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal | |||
static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned | |||
static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed | |||
static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed | |||
static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream | |||
static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set | |||
static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create | |||
static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream | |||
static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type | |||
static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle | |||
static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type | |||
static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019; // wait timeout | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support | |||
static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error | |||
static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error | |||
static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow | |||
static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device | |||
static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail | |||
static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission | |||
static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource | |||
static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource | |||
static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource | |||
static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource | |||
static const int32_t ACL_ERROR_RT_NO_CDQ_RESOURCE = 207011; // no cdq resource | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error | |||
static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream | |||
static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete | |||
static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence | |||
static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete | |||
static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error | |||
static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error | |||
static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support | |||
static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat | |||
static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed | |||
static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout | |||
static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error | |||
static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout | |||
static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception | |||
static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception | |||
static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout | |||
static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception | |||
static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error | |||
static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error | |||
static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error | |||
static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error | |||
static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal | |||
static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init | |||
static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data | |||
static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error | |||
static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate | |||
static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed | |||
static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed | |||
static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context | |||
static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out | |||
static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034; // vector core timeout | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035; // vector core exception | |||
static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036; // vector core trap exception | |||
static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037; // cdq alloc batch abnormal | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||
static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error | |||
static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error | |||
static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901; // hdc disconnect | |||
#ifdef __cplusplus | |||
} | |||
#endif | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ | |||
#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ |
@@ -125,13 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized. | |||
GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 | |||
// Init module error code definition | |||
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 | |||
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 | |||
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 | |||
GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 | |||
GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 | |||
GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 | |||
GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 | |||
GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 | |||
GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 | |||
GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 | |||
GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5, | |||
"Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 | |||
"Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 | |||
// Session module error code definition | |||
GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 | |||
@@ -216,8 +216,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed."); | |||
GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device"); // 1343246338 | |||
// Optimize errocode | |||
GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 | |||
GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 | |||
GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 | |||
GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 | |||
// Ops module error code definition | |||
GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore."); // 1343250432 | |||
@@ -169,6 +169,6 @@ GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, int | |||
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr); | |||
GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr); | |||
} | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ |
@@ -43,6 +43,11 @@ GE_FUNC_VISIBILITY ge::Status RegProfCtrlCallback(MsprofCtrlCallback func); | |||
GE_FUNC_VISIBILITY ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func); | |||
GE_FUNC_VISIBILITY ge::Status RegProfReporterCallback(MsprofReporterCallback func); | |||
GE_FUNC_VISIBILITY ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len); | |||
/// | |||
/// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading | |||
/// @return Status result | |||
/// | |||
GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream); | |||
#endif // INC_FRAMEWORK_COMMON_GE_PROFILING_H_ |
@@ -42,8 +42,9 @@ class GE_FUNC_VISIBILITY ScopeGuard { | |||
if (on_exit_scope_ != nullptr) { | |||
try { | |||
on_exit_scope_(); | |||
} catch (std::bad_function_call &e) { } | |||
catch (...) { } | |||
} catch (std::bad_function_call &e) { | |||
} catch (...) { | |||
} | |||
} | |||
} | |||
} | |||
@@ -52,7 +52,7 @@ class GE_FUNC_VISIBILITY StringUtils { | |||
return s; | |||
} | |||
// lint -esym(551,*) | |||
static std::string &Rtrim(std::string &s) { /*lint !e618*/ | |||
static std::string &Rtrim(std::string &s) { /*lint !e618*/ | |||
#if __cplusplus >= 201103L | |||
(void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); | |||
#else | |||
@@ -28,4 +28,4 @@ GE_FUNC_VISIBILITY Status ParserInitialize(const std::map<std::string, std::stri | |||
// Finalize parser, release all resources | |||
GE_FUNC_VISIBILITY Status ParserFinalize(); | |||
} // namespace ge | |||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ | |||
#endif // INC_FRAMEWORK_OMG_PARSER_PARSER_API_H_ |
@@ -1 +1 @@ | |||
Subproject commit d9a260e2b42236ffaf514bc6397116e370506068 | |||
Subproject commit 0a9ebe1c7f7b27554659f39e387110ac30d4a1e6 |
@@ -1 +1 @@ | |||
Subproject commit c074dfa5960d67f2910122d46d4d264dd6554aad | |||
Subproject commit b79ef8ad19c8ab4335a97b2c3668d2776b62ce0a |
@@ -345,6 +345,10 @@ INT32 mmIsDir(const CHAR *fileName) | |||
INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len) | |||
{ | |||
const char *env = getenv(name); | |||
if (env != nullptr) { | |||
strcpy(value, env); | |||
} | |||
return 0; | |||
} | |||
@@ -16,6 +16,7 @@ | |||
#include "toolchain/prof_engine.h" | |||
#include "toolchain/prof_mgr_core.h" | |||
#include "runtime/base.h" | |||
void * ProfMgrStartUp(const ProfMgrCfg *cfg) | |||
{ | |||
@@ -32,3 +33,10 @@ int Msprof::Engine::RegisterEngine(const std::string& module, const Msprof::Engi | |||
return 0; | |||
} | |||
rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback) { | |||
return 0; | |||
} | |||
rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback) { | |||
return 0; | |||
} |
@@ -158,6 +158,7 @@ set(COMMON_SRC_FILES | |||
"${GE_CODE_DIR}/ge/opskernel_manager/ops_kernel_builder_manager.cc" | |||
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | |||
"${GE_CODE_DIR}/ge/common/profiling/ge_profiling.cc" | |||
"${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" | |||
"${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" | |||
"${GE_CODE_DIR}/ge/session/inner_session.cc" | |||
@@ -725,7 +726,6 @@ set(PASS_TEST_FILES | |||
"graph/passes/memcpy_addr_async_unittest.cc" | |||
"graph/passes/hccl_continuous_pass_unittest.cc" | |||
"graph/passes/hccl_memcpy_pass_unittest.cc" | |||
) | |||
set(KERNEL_TEST_FILES | |||
@@ -858,7 +858,6 @@ set(HYBRID_TEST_FILES | |||
"hybrid/executor/hybrid_model_async_executor_unittest.cc" | |||
"hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | |||
"hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" | |||
) | |||
set(OTHERS_TEST_FILES | |||
@@ -886,6 +885,7 @@ add_library(ge_ut_graph STATIC | |||
target_compile_definitions(ge_ut_graph PRIVATE | |||
google=ascend_private | |||
FMK_SUPPORT_DUMP | |||
) | |||
target_compile_options(ge_ut_graph PRIVATE | |||
@@ -349,7 +349,7 @@ class UtestLogicalStreamAllocator : public testing::Test { | |||
/// B --> C(AllReduce) --- D | |||
/// / | |||
/// stream id: 0 A | |||
/// \ | |||
/// \. | |||
/// E --> F(AllReduce) --- G | |||
/// stream id: 2 2 2 | |||
/// | |||
@@ -599,7 +599,7 @@ TEST_F(UtestLogicalStreamAllocator, test_label_not_reusable2) { | |||
/// case of multi-output, then unuse stream | |||
/// sub1 | |||
/// / | \ | |||
/// / | \. | |||
/// sub2 sub3 sub4 | |||
TEST_F(UtestLogicalStreamAllocator, test_multiOut_new_stream) { | |||
SubGraphInfoPtr data = CreateDataSubgraph(); | |||
@@ -624,7 +624,7 @@ TEST_F(UtestLogicalStreamAllocator, test_multiOut_new_stream) { | |||
/// if paralle id 1, then use stream | |||
/// sub1 | |||
/// / | | \ | |||
/// / | | \. | |||
/// sub2 sub3 sub4 sub5 | |||
TEST_F(UtestLogicalStreamAllocator, test_parallel_one) { | |||
SubGraphInfoPtr data = CreateDataSubgraph(); | |||
@@ -653,7 +653,7 @@ TEST_F(UtestLogicalStreamAllocator, test_parallel_one) { | |||
/// if the param of engine independent is true, then set independent stream | |||
/// sub1 | |||
/// / | | \ | |||
/// / | | \. | |||
/// sub2 sub3 sub4 sub5 | |||
TEST_F(UtestLogicalStreamAllocator, test_independent) { | |||
SubGraphInfoPtr data = CreateDataSubgraph(); | |||
@@ -692,7 +692,7 @@ TEST_F(UtestLogicalStreamAllocator, test_independent) { | |||
/// set stream based on stream label, and then based on independent | |||
/// sub1 | |||
/// / | | \ | |||
/// / | | \. | |||
/// sub2 sub3 sub4 sub5 | |||
TEST_F(UtestLogicalStreamAllocator, test_independent_switch_label) { | |||
SubGraphInfoPtr data = CreateDataSubgraph(); | |||
@@ -36,7 +36,7 @@ class UtestStreamAllocator : public testing::Test { | |||
/// | |||
/// A | |||
/// / \ | |||
/// / \. | |||
/// B C | |||
/// | | | |||
/// D 400 | |||
@@ -116,7 +116,9 @@ TEST_F(UtestTaskGeneratorTest, FindLastBpFromBpNode) { | |||
TaskGenerator task_generator(nullptr, 0); | |||
auto net_output = graph->FindNode("Node_Output"); | |||
// netoutput has no data input, return default value 0 | |||
EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); | |||
uint32_t bp_index = 0; | |||
EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output, bp_index), 0); | |||
EXPECT_EQ(bp_index, 2); | |||
} | |||
TEST_F(UtestTaskGeneratorTest, UpdateOpIsVarAttr) { | |||
@@ -438,4 +438,22 @@ TEST_F(UtestModelManagerModelManager, test_data_input_tensor) { | |||
auto ret = mm.DataInputTensor(model_id,inputs); | |||
EXPECT_EQ(PARAM_INVALID, ret); // HybridDavinciModel::impl_ is null. | |||
} | |||
TEST_F(UtestModelManagerModelManager, test_launch_kernel_cust_aicpu) { | |||
ModelManager mm; | |||
// cust_aicpu_so_ is empty. | |||
EXPECT_EQ(mm.LaunchKernelCustAicpuSo("empty_cust_aicpu"), SUCCESS); | |||
// deleteCustOp after Launch will deleted. | |||
uintptr_t resource_id = 1; // for rtCtxGetCurrent stub | |||
std::vector<char> kernel_bin(256); | |||
auto &cust_resource_001 = mm.cust_aicpu_so_[resource_id]; | |||
auto tbe_kernel = std::shared_ptr<OpKernelBin>(new OpKernelBin("deleteCustOp", std::move(kernel_bin))); | |||
auto &cust_opkernel_001 = cust_resource_001["deleteCustOp"] = tbe_kernel; | |||
EXPECT_FALSE(mm.cust_aicpu_so_.empty()); | |||
EXPECT_EQ(mm.LaunchKernelCustAicpuSo("deleteCustOp"), SUCCESS); | |||
EXPECT_TRUE(mm.cust_aicpu_so_.empty()); | |||
} | |||
} // namespace ge |
@@ -55,7 +55,7 @@ class UtestGraphPassesAssertPass : public Test { | |||
}; | |||
/// D E | |||
/// | \ | \ | |||
/// | \ | \. | |||
/// F C G | |||
/// : | : | |||
/// H A I | |||
@@ -134,8 +134,8 @@ TEST_F(UtestGraphPassesAssertPass, assert_pass_test2) { | |||
EXPECT_EQ(graph->FindNode("D"), nullptr); | |||
} | |||
/// E F | |||
/// | \ | \ | |||
/// E F | |||
/// | \ | \. | |||
/// H C -> D G | |||
/// \ | : | |||
/// A I | |||
@@ -130,7 +130,7 @@ class UTESTGraphPassesBasePass : public testing::Test { | |||
/// reshape1 | |||
/// | | |||
/// add1 | |||
/// / \ | |||
/// / \. | |||
/// | | | |||
/// data1 const1 | |||
ComputeGraphPtr BuildGraph1() { | |||
@@ -148,9 +148,9 @@ ComputeGraphPtr BuildGraph1() { | |||
} | |||
/// sum1 | |||
/// / \ | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// / \. | |||
/// reshape1 addn1 | |||
/// | c | | |||
/// add1 <--- shape1 | |||
@@ -217,7 +217,7 @@ void CheckIterOrder(UtestTestPass *pass, std::vector<std::unordered_set<std::str | |||
/// Op1 | |||
/// | | |||
/// Merge | |||
/// / \ | |||
/// / \. | |||
/// Op2 Op3 | |||
TEST_F(UTESTGraphPassesBasePass, del_isolate_fail) { | |||
auto builder = ut::GraphBuilder("g1"); | |||
@@ -245,7 +245,7 @@ TEST_F(UTESTGraphPassesBasePass, del_isolate_fail) { | |||
/// Op1 | |||
/// | | |||
/// Merge | |||
/// / \ | |||
/// / \. | |||
/// Op2 Op3 | |||
TEST_F(UTESTGraphPassesBasePass, del_isolate_success) { | |||
auto builder = ut::GraphBuilder("g1"); | |||
@@ -459,7 +459,7 @@ TEST_F(UTESTGraphPassesBasePass, while_loop) { | |||
/// data1 const | |||
/// \ / | |||
/// while | |||
/// / \ | |||
/// / \. | |||
/// | | | |||
/// cast1 cast2 | |||
ComputeGraphPtr BuildWhileGraph1() { | |||
@@ -34,11 +34,11 @@ namespace { | |||
/// net_output | |||
/// | | |||
/// merge | |||
/// / \ | |||
/// / \. | |||
/// square add | |||
/// F| T/ T\ | |||
/// F| T/ T\. | |||
/// switch1 switch2 | |||
/// / \ / \ | |||
/// / \ / \. | |||
/// var1 var2 var3 | |||
/// | |||
ComputeGraphPtr BuildGraph1() { | |||
@@ -173,8 +173,8 @@ namespace { | |||
/// shapeNo1 | |||
/// | | |||
/// addnYes1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// const1 const2 | |||
ComputeGraphPtr BuildGraph1() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -223,8 +223,8 @@ ComputeGraphPtr BuildGraph2() { | |||
/// shapeNo1 | |||
/// | c | |||
/// addnYes1 <----- dataNo1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// const1 const2 | |||
ComputeGraphPtr BuildGraph3() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -249,8 +249,8 @@ ComputeGraphPtr BuildGraph3() { | |||
/// shapeNo1 | |||
/// | c | |||
/// addnYes1 <--------- | |||
/// / \ \ | |||
/// / \ c \ | |||
/// / \ \. | |||
/// / \ c \. | |||
/// const1 const2 <----- dataNo1 | |||
ComputeGraphPtr BuildGraph4() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -276,7 +276,7 @@ ComputeGraphPtr BuildGraph4() { | |||
/// shapeNo1 | |||
/// | c | |||
/// addnYes1 <----- dataNo1 | |||
/// / \ | |||
/// / \. | |||
/// / \ c | |||
/// const1 const2 <----- dataNo2 | |||
ComputeGraphPtr BuildGraph5() { | |||
@@ -306,8 +306,8 @@ ComputeGraphPtr BuildGraph5() { | |||
/// addYes1 <---- const3 | |||
/// | | |||
/// addnYes1 <- | |||
/// / \ \ | |||
/// / \ \ | |||
/// / \ \. | |||
/// / \ \. | |||
/// const1 const2 const4 | |||
ComputeGraphPtr BuildGraph6() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -332,12 +332,12 @@ ComputeGraphPtr BuildGraph6() { | |||
} | |||
/// netoutput1 | |||
/// / \ | |||
/// / \. | |||
/// shapeNo1 ShpaeNo2 | |||
/// \ / | |||
/// huberLoss1 | |||
/// / | \ | |||
/// / | \ | |||
/// / | \. | |||
/// / | \. | |||
/// const1 const2 const3 | |||
ComputeGraphPtr BuildGraph7() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -365,8 +365,8 @@ ComputeGraphPtr BuildGraph7() { | |||
/// shapeNo1 | |||
/// | | |||
/// addnNo1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// const1 const2 | |||
ComputeGraphPtr BuildGraph8() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -389,8 +389,8 @@ ComputeGraphPtr BuildGraph8() { | |||
/// shapeNo1 | |||
/// | | |||
/// addnYes1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// const1 data1 | |||
ComputeGraphPtr BuildGraph9() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -409,12 +409,12 @@ ComputeGraphPtr BuildGraph9() { | |||
} | |||
/// netoutput1 | |||
/// / \ | |||
/// / \. | |||
/// addDim sqrt1 | |||
/// \ / | |||
/// switch1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// const1 const2 | |||
ComputeGraphPtr BuildGraph10() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -63,8 +63,8 @@ namespace { | |||
/// shapeNo1 | |||
/// | | |||
/// addnNo1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// const1 const2 | |||
ComputeGraphPtr BuildGraph8() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -87,8 +87,8 @@ ComputeGraphPtr BuildGraph8() { | |||
/// shapeNo1 | |||
/// | | |||
/// addnYes1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
///const1 data1 | |||
ComputeGraphPtr BuildGraph9() { | |||
auto builder = ut::GraphBuilder("test"); | |||
@@ -64,6 +64,7 @@ class UtestGraphPassesFoldingKernelFillKernel : public testing::Test { | |||
op_desc_ptr->AddInputDesc(dims_tensor_desc); | |||
op_desc_ptr->AddInputDesc(value_tensor_desc); | |||
op_desc_ptr->AddOutputDesc(dims_tensor_desc); | |||
std::vector<ge::ConstGeTensorPtr> input = {dim_tensor, value_tensor}; | |||
std::vector<GeTensorPtr> outputs; | |||
@@ -124,6 +125,7 @@ TEST_F(UtestGraphPassesFoldingKernelFillKernel, FillBoolShape2And3) { | |||
op_desc_ptr->AddInputDesc(dims_tensor_desc); | |||
op_desc_ptr->AddInputDesc(value_tensor_desc); | |||
op_desc_ptr->AddOutputDesc(dims_tensor_desc); | |||
std::vector<ge::ConstGeTensorPtr> input = {dim_tensor, value_tensor}; | |||
std::vector<GeTensorPtr> outputs; | |||
@@ -230,6 +232,7 @@ TEST_F(UtestGraphPassesFoldingKernelFillKernel, FillDimsHaveNegativeNumber) { | |||
op_desc_ptr->AddInputDesc(dims_tensor_desc); | |||
op_desc_ptr->AddInputDesc(value_tensor_desc); | |||
op_desc_ptr->AddOutputDesc(dims_tensor_desc); | |||
std::vector<ge::ConstGeTensorPtr> input = {dim_tensor, value_tensor}; | |||
std::vector<GeTensorPtr> outputs; | |||
@@ -284,6 +287,7 @@ TEST_F(UtestGraphPassesFoldingKernelFillKernel, FillDimsTypeNotSupport) { | |||
op_desc_ptr->AddInputDesc(dims_tensor_desc); | |||
op_desc_ptr->AddInputDesc(value_tensor_desc); | |||
op_desc_ptr->AddOutputDesc(dims_tensor_desc); | |||
std::vector<ge::ConstGeTensorPtr> input = {dim_tensor, value_tensor}; | |||
std::vector<GeTensorPtr> outputs; | |||
@@ -310,6 +314,7 @@ TEST_F(UtestGraphPassesFoldingKernelFillKernel, FillDimsOverflow) { | |||
op_desc_ptr->AddInputDesc(dims_tensor_desc); | |||
op_desc_ptr->AddInputDesc(value_tensor_desc); | |||
op_desc_ptr->AddOutputDesc(dims_tensor_desc); | |||
std::vector<ge::ConstGeTensorPtr> input = {dim_tensor, value_tensor}; | |||
std::vector<GeTensorPtr> outputs; | |||
@@ -336,6 +341,7 @@ TEST_F(UtestGraphPassesFoldingKernelFillKernel, FillDimsMulDataTypeOverflow) { | |||
op_desc_ptr->AddInputDesc(dims_tensor_desc); | |||
op_desc_ptr->AddInputDesc(value_tensor_desc); | |||
op_desc_ptr->AddOutputDesc(dims_tensor_desc); | |||
std::vector<ge::ConstGeTensorPtr> input = {dim_tensor, value_tensor}; | |||
std::vector<GeTensorPtr> outputs; | |||
@@ -343,3 +349,33 @@ TEST_F(UtestGraphPassesFoldingKernelFillKernel, FillDimsMulDataTypeOverflow) { | |||
EXPECT_EQ(PARAM_INVALID, status); | |||
} | |||
TEST_F(UtestGraphPassesFoldingKernelFillKernel, OutputdescUnknown) { | |||
ge::OpDescPtr op_dims = std::make_shared<ge::OpDesc>(); | |||
vector <int64_t> dims_vec = {2}; | |||
vector <int32_t> dims_value_vec = {2, 3}; | |||
GeTensorDesc dims_tensor_desc(GeShape(dims_vec), FORMAT_NCHW, DT_INT32); | |||
GeTensorPtr dim_tensor = std::make_shared<GeTensor>(dims_tensor_desc, (uint8_t *) dims_value_vec.data(), | |||
dims_value_vec.size() * sizeof(int32_t)); | |||
OpDescUtils::SetWeights(op_dims, dim_tensor); | |||
ge::OpDescPtr op_value = std::make_shared<ge::OpDesc>(); | |||
vector <uint8_t> data_vec = {1}; | |||
GeTensorDesc value_tensor_desc(GeShape(), FORMAT_NCHW, DT_BOOL); | |||
GeTensorPtr value_tensor = | |||
std::make_shared<GeTensor>(value_tensor_desc, (uint8_t *) data_vec.data(), data_vec.size() * sizeof(bool)); | |||
OpDescUtils::SetWeights(op_value, value_tensor); | |||
op_desc_ptr->AddInputDesc(dims_tensor_desc); | |||
op_desc_ptr->AddInputDesc(value_tensor_desc); | |||
vector <int64_t> out_vec = {-1, -1}; | |||
GeTensorDesc out_tensor_desc(GeShape(out_vec), FORMAT_NCHW, DT_INT32); | |||
op_desc_ptr->AddOutputDesc(out_tensor_desc); | |||
std::vector <ge::ConstGeTensorPtr> input = {dim_tensor, value_tensor}; | |||
std::vector <GeTensorPtr> outputs; | |||
Status status = kernel->Compute(op_desc_ptr, input, outputs); | |||
EXPECT_EQ(NOT_CHANGED, status); | |||
} |
@@ -46,7 +46,7 @@ class UtestGraphPassesFoldingKernelSsdPriorboxKernel : public testing::Test { | |||
/// convolution data | |||
/// | / | |||
/// ssdpriorbox | |||
/// \ | |||
/// \. | |||
/// reshape | |||
class NodeBuilder { | |||
public: | |||
@@ -120,7 +120,7 @@ TEST_F(UtestFuseDataNodesWithCommonInputPass, graph_with_subgraph1) { | |||
/// graph with subgraph | |||
/// const | |||
/// / \ | |||
/// / \. | |||
/// cast1 cast1 | |||
/// \ / | |||
/// case | |||
@@ -69,62 +69,100 @@ static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string | |||
return graph.AddNode(op_desc); | |||
} | |||
static void CreateLoopGraph(ComputeGraphPtr &graph, NodePtr &merge) { | |||
static void CreateLoopGraph(ComputeGraphPtr &graph, NodePtr &merge, vector<NodePtr> &loop, vector<NodePtr> &cond) { | |||
/******************************************************************************* | |||
* Exit Identify | |||
* \ / \. | |||
* \ / \. | |||
* Switch Add | |||
* / | | | |||
* / | | | |||
* / | | | |||
* LoopCond | | | |||
* \ | | | |||
* \ | | | |||
* \ | | | |||
* Less | | | |||
* \ | NextIteration | |||
* \ | | | |||
* \ | | | |||
* Merge <---------| | |||
* | | |||
* | | |||
* Enter | |||
* | | |||
* +--------------------- Merge ----------------------+ | |||
* / | | |||
* / | | |||
* / | | |||
* / | | |||
* Exit Identify | | |||
* \ / \. | | |||
* \ / \. | | |||
* Switch Add Add | |||
* / | | | | |||
* / | | | | |||
* / | | | | |||
* LoopCond | | | | |||
* \ | | | | |||
* \ | | | | |||
* \ | | | | |||
* Less | | | | |||
* \ | NextIteration | | |||
* \ | | | | |||
* \ | | | | |||
* Merge <---------| | | |||
* | | | |||
* | | | |||
* Enter | | |||
* \ | | |||
* \ | | |||
* Switch Switch | |||
* | | | |||
* +-----------------Equal----------------------+ | |||
* | | |||
******************************************************************************/ | |||
auto data1 = CreateNode(*graph, "data", DATA, 1, 1); | |||
auto data1 = CreateNode(*graph, "data1", DATA, 1, 1); | |||
auto data2 = CreateNode(*graph, "data2", DATA, 1, 1); | |||
auto equal1 = CreateNode(*graph, "equal1", EQUAL, 2, 1); | |||
auto switch1 = CreateNode(*graph, "switch1", SWITCH, 2, 2); | |||
auto switch2 = CreateNode(*graph, "switch2", SWITCH, 2, 2); | |||
auto enter1 = CreateNode(*graph, "enter", ENTER, 1, 1); | |||
auto merge1 = CreateNode(*graph, "merge", MERGE, 2, 2); | |||
auto less1 = CreateNode(*graph, "less", LESS, 2, 1); | |||
auto merge1 = CreateNode(*graph, "merge1", MERGE, 2, 2); | |||
auto less1 = CreateNode(*graph, "less1", LESS, 2, 1); | |||
auto loop1 = CreateNode(*graph, "loopcond", LOOPCOND, 1, 1); | |||
auto switch1 = CreateNode(*graph, "switch", SWITCH, 2, 2); | |||
auto switch3 = CreateNode(*graph, "switch3", SWITCH, 2, 2); | |||
auto ident1 = CreateNode(*graph, "identity", IDENTITY, 1, 1); | |||
auto add1 = CreateNode(*graph, "add", ADD, 2, 1); | |||
auto add1 = CreateNode(*graph, "add1", ADD, 2, 1); | |||
auto next1 = CreateNode(*graph, "next", NEXTITERATION, 1, 1); | |||
auto exit1 = CreateNode(*graph, "exit", EXIT, 1, 1); | |||
auto value0 = CreateNode(*graph, "const", CONSTANT, 0, 1); | |||
auto value1 = CreateNode(*graph, "const", CONSTANT, 0, 1); | |||
auto value1 = CreateNode(*graph, "const1", CONSTANT, 0, 1); | |||
auto value2 = CreateNode(*graph, "const2", CONSTANT, 0, 1); | |||
auto add2 = CreateNode(*graph, "add2", ADD, 2, 1); | |||
auto merge2 = CreateNode(*graph, "merge2", MERGE, 2, 2); | |||
auto output1 = CreateNode(*graph, "net_output", NETOUTPUT, 1, 1); | |||
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), enter1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), equal1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(data2->GetOutDataAnchor(0), equal1->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), switch1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(data2->GetOutDataAnchor(0), switch2->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(equal1->GetOutDataAnchor(0), switch1->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(equal1->GetOutDataAnchor(0), switch2->GetInDataAnchor(1)); | |||
cond.emplace_back(switch1); | |||
cond.emplace_back(switch2); | |||
GraphUtils::AddEdge(switch1->GetOutDataAnchor(0), enter1->GetInDataAnchor(0)); // false | |||
GraphUtils::AddEdge(enter1->GetOutDataAnchor(0), merge1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), less1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(value1->GetOutDataAnchor(0), less1->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(less1->GetOutDataAnchor(0), loop1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(loop1->GetOutDataAnchor(0), switch1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), switch1->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(loop1->GetOutDataAnchor(0), switch3->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), switch3->GetInDataAnchor(1)); | |||
loop.emplace_back(merge1); | |||
GraphUtils::AddEdge(switch1->GetOutDataAnchor(0), exit1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(switch1->GetOutDataAnchor(1), ident1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(switch3->GetOutDataAnchor(0), exit1->GetInDataAnchor(0)); // false | |||
GraphUtils::AddEdge(switch3->GetOutDataAnchor(1), ident1->GetInDataAnchor(0)); // true | |||
loop.emplace_back(switch3); | |||
GraphUtils::AddEdge(ident1->GetOutDataAnchor(0), add1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(value1->GetOutDataAnchor(0), add1->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(add1->GetOutDataAnchor(0), next1->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(next1->GetOutDataAnchor(0), merge1->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(exit1->GetOutDataAnchor(0), output1->GetInDataAnchor(0)); | |||
merge = merge1; | |||
GraphUtils::AddEdge(switch2->GetOutDataAnchor(1), add2->GetInDataAnchor(1)); // true | |||
GraphUtils::AddEdge(value2->GetOutDataAnchor(0), add2->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(exit1->GetOutDataAnchor(0), merge2->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(add2->GetOutDataAnchor(0), merge2->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(merge2->GetOutDataAnchor(0), output1->GetInDataAnchor(0)); | |||
cond.emplace_back(merge2); | |||
merge = merge2; | |||
} | |||
static void CreateCondGraph(ComputeGraphPtr &graph, NodePtr &merge) { | |||
@@ -197,12 +235,24 @@ static void CreateCondGraph(ComputeGraphPtr &graph, NodePtr &merge) { | |||
TEST_F(UtestMarkForceUnknownForCondPass, skip_while_loop_merge) { | |||
auto graph = std::make_shared<ComputeGraph>("test_graph"); | |||
NodePtr merge; | |||
CreateLoopGraph(graph, merge); | |||
AttrUtils::SetBool(merge->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); | |||
vector<NodePtr> loop; | |||
vector<NodePtr> cond; | |||
CreateLoopGraph(graph, merge, loop, cond); | |||
MarkForceUnknownForCondPass mark_force_unknown_pass; | |||
EXPECT_EQ(mark_force_unknown_pass.Run(graph), SUCCESS); // skip LoopCond | |||
EXPECT_EQ(loop.size(), 2); | |||
for (const auto &node : loop) { | |||
EXPECT_FALSE(node->GetOpDesc()->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP)); | |||
} | |||
EXPECT_EQ(cond.size(), 3); | |||
for (const auto &node : cond) { | |||
int64_t group_index = -1; | |||
EXPECT_TRUE(AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index)); | |||
EXPECT_EQ(group_index, merge->GetOpDesc()->GetId()); | |||
} | |||
} | |||
TEST_F(UtestMarkForceUnknownForCondPass, skip_known_shape_merge) { | |||
@@ -110,8 +110,8 @@ TEST_F(UtestGraphPassesMergePass, multiple_inputs) { | |||
} | |||
/// Merge | |||
/// | \ | |||
/// | \ | |||
/// | \. | |||
/// | \. | |||
/// Op1 Op2 Merge2 | |||
/// \ | | | |||
/// \ | Op3 | |||
@@ -137,10 +137,10 @@ TEST_F(UtestGraphPassesMergePass, empty_input_cut_branch_meet_net_output_with_da | |||
} | |||
/// Merge | |||
/// | \ | |||
/// | \ | |||
/// | \. | |||
/// | \. | |||
/// Op1 Op2 Merge2 | |||
/// \ | | \ | |||
/// \ | | \. | |||
/// \ | Op3 | |||
/// \ | : | |||
/// NetOutput | |||
@@ -165,8 +165,8 @@ TEST_F(UtestGraphPassesMergePass, empty_input_cut_branch_meet_net_output_with_co | |||
TEST_F(UtestGraphPassesMergePass, empty_input_cut_branch) { | |||
/// Merge | |||
/// | \ | |||
/// | \ | |||
/// | \. | |||
/// | \. | |||
/// Op1 Op2 Merge2 | |||
/// \ | | | |||
/// \ | Op3 | |||
@@ -210,7 +210,7 @@ TEST_F(UtestGraphPassesMergePass, empty_input_cut_branch) { | |||
/// Op1 Op2 Merge2 | |||
/// \ | | |||
/// \ Op3 | |||
/// \ | |||
/// \. | |||
/// Merge3 | |||
ret = pass_.Run(merge_node2); | |||
@@ -224,7 +224,7 @@ TEST_F(UtestGraphPassesMergePass, single_non_const_input) { | |||
/// Op1 | |||
/// | | |||
/// Merge | |||
/// / \ | |||
/// / \. | |||
/// Op2 Op3 | |||
auto merge_node = NewNode("Merge", MERGE, 1, 2); | |||
auto node1 = NewNode("Op1", RELU, 1, 1); | |||
@@ -253,7 +253,7 @@ TEST_F(UtestGraphPassesMergePass, single_const_input) { | |||
/// Const | |||
/// | | |||
/// Merge Pass Const | |||
/// / \ ===> / \ | |||
/// / \ ===> / \. | |||
/// Op1 Op2 Op1 Op2 | |||
auto merge_node = NewNode("Merge", MERGE, 1, 2); | |||
auto const_node = NewNode("Const", CONSTANT, 1, 1); | |||
@@ -284,7 +284,7 @@ TEST_F(UtestGraphPassesMergePass, single_const_input_value_index_two_out_nodes) | |||
/// / | ===> / \(control anchor) | |||
/// Op1 | \ Op1 Constant | |||
/// Op2 Op3 | | |||
/// / \ | |||
/// / \. | |||
/// Op2 Op3 | |||
auto merge_node = NewNode("Merge", MERGE, 1, 2); | |||
auto const_node = NewNode("Const", CONSTANT, 1, 1); | |||
@@ -329,7 +329,7 @@ TEST_F(UtestGraphPassesMergePass, single_const_input_value_index_two_out_nodes1) | |||
/// / | ===> / \(control anchor) | |||
/// Op1 | \ Op1 Constant | |||
/// Op2 Op3 | | |||
/// / \ | |||
/// / \. | |||
/// Op2 Op3 | |||
auto merge_node = NewNode("Merge", MERGE, 1, 2); | |||
auto const_node = NewNode("Const", CONSTANT, 1, 1); | |||
@@ -357,7 +357,7 @@ TEST_F(UtestGraphPassesMergePass, const_with_control_input) { | |||
/// C | |||
/// | | |||
/// Merge | |||
/// / \ | |||
/// / \. | |||
/// Op1 Op2 | |||
auto switch_node = NewNode("Switch", SWITCH, 1, 2); | |||
auto identity_node = NewNode("Identity", SWITCH, 1, 1); | |||
@@ -381,7 +381,7 @@ TEST_F(UtestGraphPassesMergePass, const_with_control_input) { | |||
/// . | |||
/// . | |||
/// C | |||
/// / \ | |||
/// / \. | |||
/// Op1 Op2 | |||
auto ret = pass_.Run(merge_node); | |||
EXPECT_EQ(ret, SUCCESS); | |||
@@ -19,7 +19,8 @@ | |||
#include <string> | |||
#define private public | |||
#include "inc/graph/ge_local_context.h" | |||
#include "inc/external/ge/ge_api_types.h" | |||
#include "common/ge_inner_error_codes.h" | |||
#include "inc/pass_manager.h" | |||
#include "utils/graph_utils.h" | |||
@@ -66,11 +67,11 @@ class UtestGraphPassesParallelGgroupPass : public testing::Test { | |||
void BuildDefaultGraph() { | |||
/// input | |||
/// \ | |||
/// \. | |||
/// sqrt pred | |||
/// \ / | |||
/// cast | |||
/// / \ | |||
/// / \. | |||
/// switch_t switch_f | |||
/// | | | |||
/// F T | |||
@@ -118,13 +119,13 @@ class UtestGraphPassesParallelGgroupPass : public testing::Test { | |||
void BuildDefaultGraph1() { | |||
/// input | |||
/// \ | |||
/// \. | |||
/// sqrt pred | |||
/// \ / | |||
/// Switch | |||
/// | | | |||
/// ----F T---- | |||
/// \ | / \ | |||
/// \ | / \. | |||
/// \ Merge1 Merge2 | |||
/// \_________| | |||
input_node_ = NewNode("input", RELU, 0, 1); | |||
@@ -164,14 +165,14 @@ class UtestGraphPassesParallelGgroupPass : public testing::Test { | |||
void BuildDefaultGraph2() { | |||
/// input input1 | |||
/// \ \ | |||
/// \ \. | |||
/// sqrt pred sqrt1 pred1 | |||
/// \ / \ / | |||
/// Switch Switch1 | |||
/// | | _______| | |||
/// | | / | |||
/// ____F T____ | |||
/// \ | / \ | |||
/// \ | / \. | |||
/// \ Merge1 Merge2 | |||
/// \__________| | |||
input_node_ = NewNode("input", RELU, 0, 2); | |||
@@ -225,6 +226,70 @@ class UtestGraphPassesParallelGgroupPass : public testing::Test { | |||
output_true_node_->GetOpDesc()->SetIsInputConst({false}); | |||
} | |||
void BuildDefaultGraph3() { | |||
/// input | |||
/// \ | |||
/// sqrt pred | |||
/// \ / | |||
/// Switch | |||
/// | | | |||
/// F T ------ | |||
/// / \_/_ \ | |||
/// / / \ \ | |||
/// Merge sqrt2 sqrt3 | |||
/// / \ \ | |||
/// sqrt1 \ relu | |||
/// \ \ | |||
/// \ sqrt4 | |||
/// \ / | |||
/// Merge1 | |||
input_node_ = NewNode("input", RELU, 0, 1); | |||
AttrUtils::SetStr(input_node_->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, "1"); | |||
pred_node_ = NewNode("pred", GREATER, 2, 1); | |||
sqrt_node_ = NewNode("sqrt", SQRT, 1, 1); | |||
cast_node_ = NewNode("cast", CAST, 2, 2); | |||
switch_node_t = NewNode("switch_t", STREAMSWITCH, 1, 1); | |||
AttrUtils::SetBool(switch_node_t->GetOpDesc(), ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG, true); | |||
switch_node_f = NewNode("switch_f", STREAMSWITCH, 1, 1); | |||
AttrUtils::SetBool(switch_node_f->GetOpDesc(), ATTR_NAME_SWITCH_TRUE_BRANCH_FLAG, false); | |||
output_false_node_ = NewNode("false_output", RELU, 1, 2); | |||
AttrUtils::SetStr(output_false_node_->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, "1"); | |||
output_true_node_ = NewNode("true_output", RELU, 1, 2); | |||
AttrUtils::SetStr(output_true_node_->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, "1"); | |||
merge_node_ = NewNode("merge", STREAMMERGE, 2, 1); | |||
sqrt_node1_ = NewNode("sqrt1", SQRT, 1, 1); | |||
AttrUtils::SetStr(sqrt_node1_->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, "1"); | |||
sqrt_node2_ = NewNode("sqrt2", SQRT, 1, 1); | |||
AttrUtils::SetStr(sqrt_node2_->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, "1"); | |||
sqrt_node3_ = NewNode("sqrt3", SQRT, 1, 1); | |||
relu_node_ = NewNode("relu", RELU, 1, 1); | |||
sqrt_node4_ = NewNode("sqrt4", SQRT, 1, 1); | |||
AttrUtils::SetStr(sqrt_node4_->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, "1"); | |||
merge_node1_ = NewNode("merge1", STREAMMERGE, 2, 1); | |||
GraphUtils::AddEdge(input_node_->GetOutDataAnchor(0), sqrt_node_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(pred_node_->GetOutDataAnchor(0), cast_node_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(sqrt_node_->GetOutDataAnchor(0), cast_node_->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(cast_node_->GetOutDataAnchor(0), switch_node_t->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(cast_node_->GetOutDataAnchor(1), switch_node_f->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(switch_node_f->GetOutDataAnchor(0), output_false_node_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(switch_node_t->GetOutDataAnchor(0), output_true_node_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(output_false_node_->GetOutDataAnchor(0), merge_node_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(output_true_node_->GetOutDataAnchor(0), merge_node_->GetInDataAnchor(1)); | |||
GraphUtils::AddEdge(output_false_node_->GetOutDataAnchor(1), sqrt_node2_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(output_true_node_->GetOutDataAnchor(1), sqrt_node3_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(merge_node_->GetOutDataAnchor(0), sqrt_node1_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(sqrt_node3_->GetOutDataAnchor(0), relu_node_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(relu_node_->GetOutDataAnchor(0), sqrt_node4_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(sqrt_node2_->GetOutDataAnchor(0), merge_node1_->GetInDataAnchor(0)); | |||
GraphUtils::AddEdge(sqrt_node4_->GetOutDataAnchor(0), merge_node1_->GetInDataAnchor(1)); | |||
output_false_node_->GetOpDesc()->SetIsInputConst({false}); | |||
output_true_node_->GetOpDesc()->SetIsInputConst({false}); | |||
} | |||
ComputeGraphPtr graph_; | |||
ComputeGraphPtr sub_graph_; | |||
GeTensorDescPtr default_tensor_desc_; | |||
@@ -235,6 +300,9 @@ class UtestGraphPassesParallelGgroupPass : public testing::Test { | |||
NodePtr cast_node1_; | |||
NodePtr sqrt_node_; | |||
NodePtr sqrt_node1_; | |||
NodePtr sqrt_node2_; | |||
NodePtr sqrt_node3_; | |||
NodePtr sqrt_node4_; | |||
NodePtr input_node_; | |||
NodePtr input_node1_; | |||
NodePtr switch_node_t; | |||
@@ -278,6 +346,16 @@ TEST_F(UtestGraphPassesParallelGgroupPass, normal_graph2) { | |||
EXPECT_EQ(true, input_node1_->GetOutControlAnchor()->IsLinkedWith(cast_node1_->GetInControlAnchor())); | |||
} | |||
TEST_F(UtestGraphPassesParallelGgroupPass, normal_graph3) { | |||
std::map<std::string, std::string> options; | |||
options.emplace(OPTION_GRAPH_RUN_MODE, "1"); | |||
GetThreadLocalContext().SetGraphOption(options); | |||
BuildDefaultGraph3(); | |||
auto ret = pass_.Run(graph_); | |||
EXPECT_EQ(ret, GRAPH_SUCCESS); | |||
EXPECT_EQ(true, merge_node1_->GetOutControlAnchor()->IsLinkedWith(sqrt_node1_->GetInControlAnchor())); | |||
} | |||
TEST_F(UtestGraphPassesParallelGgroupPass, normal_subgraph) { | |||
BuildDefaultGraph1(); | |||
NodePtr input_node1 = NewNode("input1", RELU, 0, 1, true); | |||
@@ -31,9 +31,9 @@ class UtestReshapeRecoveryPass : public testing::Test { | |||
namespace { | |||
/// netoutput1 | |||
/// | \ | |||
///transdata1 \ | |||
/// | \ | |||
/// | \. | |||
///transdata1 \. | |||
/// | \. | |||
/// | transdata2 | |||
/// | / | |||
/// var1 const1 | |||
@@ -35,7 +35,7 @@ namespace { | |||
/// transdata1 | |||
/// | | |||
/// reshape1 | |||
/// | \ | |||
/// | \. | |||
/// var1 const1 | |||
ut::GraphBuilder Graph1Builder() { | |||
ut::GraphBuilder builder = ut::GraphBuilder("g1"); | |||
@@ -55,11 +55,11 @@ ut::GraphBuilder Graph1Builder() { | |||
} | |||
/// netoutput1 | |||
/// | \ | |||
///transdata1 \ | |||
/// | \ | |||
/// | \. | |||
///transdata1 \. | |||
/// | \. | |||
/// reshape1 reshape2 | |||
/// | \ / \ | |||
/// | \ / \. | |||
/// var1 const1 var2 | |||
ut::GraphBuilder Graph2Builder() { | |||
ut::GraphBuilder builder = ut::GraphBuilder("g2"); | |||
@@ -83,9 +83,9 @@ ut::GraphBuilder Graph2Builder() { | |||
} | |||
/// netoutput1 | |||
/// | \ | |||
///transdata1 \ | |||
/// | \ | |||
/// | \. | |||
///transdata1 \. | |||
/// | \. | |||
/// reshape1 transdata2 | |||
/// | \ / | |||
/// var1 const1 | |||
@@ -34,7 +34,7 @@ class UtestResourcePairControlPass : public testing::Test { | |||
namespace { | |||
/// netoutput1 | |||
/// | \ | |||
/// | \. | |||
/// StackPush StackPop | |||
/// | | | |||
/// var1 const1 | |||
@@ -63,9 +63,9 @@ ComputeGraphPtr BuildGraph1() { | |||
/// netoutput1 | |||
/// | | |||
/// merge1 | |||
/// / \ | |||
/// / \. | |||
/// / add1 | |||
/// / F| \ | |||
/// / F| \. | |||
/// addn1 swtich2 var3 | |||
/// \F T/ | | |||
/// switch1 | | |||
@@ -101,9 +101,9 @@ ComputeGraphPtr BuildGraph2() { | |||
/// add1 | |||
/// / \T | |||
/// var3 swtich2 | |||
/// T/ \ | |||
/// switch1 \ | |||
/// / \ \ | |||
/// T/ \. | |||
/// switch1 \. | |||
/// / \ \. | |||
/// var1 var2 var4 | |||
ComputeGraphPtr BuildGraph3() { | |||
auto builder = ut::GraphBuilder("g3"); | |||
@@ -129,7 +129,7 @@ ComputeGraphPtr BuildGraph3() { | |||
/// netoutput1 | |||
/// | | |||
/// merge1 | |||
/// / \ | |||
/// / \. | |||
/// add1 addn1 | |||
/// / \T F/ | |||
/// var3 swtich2 | |||
@@ -402,7 +402,7 @@ TEST_F(UtestGraphPassesTransOpBreadthFusionPass, test_multi_anchor_case) { | |||
} | |||
/// ----> netoutput1 | |||
/// / | \ | |||
/// / | \. | |||
/// transdata1 transdata2 transdata3 | |||
/// \ / | | |||
/// var1-------------- | |||
@@ -432,7 +432,7 @@ static ComputeGraphPtr BuildGraph1() { | |||
} | |||
/// ---------> netoutput1 | |||
/// / | \ | |||
/// / | \. | |||
/// transdata1 transdata2(l1) transdata3(l1) | |||
/// \ / | | |||
/// var1------------------ | |||
@@ -456,19 +456,19 @@ TEST_F(UtestGraphPassesTransOpDepthFusionPass, test_transop_with_multi_out_edge) | |||
/// -->transpose1 -->transpose3-->sinh2 | |||
/// | \ / | |||
/// | -->transpose2 | |||
/// | \ | |||
/// | \. | |||
/// / -->cast3-->cast4-->sinh3 | |||
/// / | |||
/// / -->transpose4-->transpose5-->sinh4 | |||
/// / / | |||
/// Node4D-->Cast1-->Cast2-->Cast5 -->reshape2-->sinh5 | |||
/// \ \ | |||
/// \ \. | |||
/// \ -->sinh6 | |||
/// \ | |||
/// \. | |||
/// \ -->transpose6-->transpose7-->sinh9 | |||
/// \ / | |||
/// -->reshape-->cast6-->cast7-->sinh8 | |||
/// \ | |||
/// \. | |||
/// -->sinh7 | |||
/// after optimized graph | |||
@@ -479,15 +479,15 @@ TEST_F(UtestGraphPassesTransOpDepthFusionPass, test_transop_with_multi_out_edge) | |||
/// / /-->transpose3-->sinh2 | |||
/// -->Cast1 | |||
/// / \-->sinh7 | |||
/// / \ | |||
/// / \. | |||
/// / -->sinh9 | |||
/// Node4D | |||
/// \ -->sinh4 | |||
/// \ / | |||
/// -->Cast5-->sinh5 | |||
/// \ \ | |||
/// \ \. | |||
/// \ -->sinh6 | |||
/// \ | |||
/// \. | |||
/// -->Cast7-->sinh8 | |||
ge::ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
@@ -180,7 +180,7 @@ ComputeGraphPtr GetGraph7(size_t symmetric_transdata_num, size_t asymmetric_tran | |||
/// TransData TransData ... MatMul ... | |||
/// \ | / / / | |||
/// HcomAllReduce | |||
/// / | \ \ \ | |||
/// / | \ \ \. | |||
/// TransData TransData ... RealDiv ... | |||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
NodePtr allreduce = | |||
@@ -340,7 +340,7 @@ TEST(UtestTransopNearbyAllreduceFusionPass, test7_all_reduce_with_multiple_trans | |||
/// TransData TransData ... MatMul ... | |||
/// \ | / / / | |||
/// HcomAllReduce | |||
/// / | \ \ \ | |||
/// / | \ \ \. | |||
/// TransData TransData ... RealDiv ... | |||
size_t symmetric_transdata_num = 20; | |||
size_t asymmetric_transdata_num = 20; | |||
@@ -66,7 +66,7 @@ namespace { | |||
/// transdata2 | |||
/// | | |||
/// assign1 | |||
/// / \ | |||
/// / \. | |||
/// transdata1 | | |||
/// | | | |||
/// var1 const1 | |||
@@ -35,8 +35,8 @@ namespace { | |||
/// shapeNo1 | |||
/// | | |||
/// addnYes1 | |||
/// / \ | |||
/// / \ | |||
/// / \. | |||
/// / \. | |||
/// const1 const2 | |||
ComputeGraphPtr BuildGraph1() { | |||
@@ -57,9 +57,9 @@ ComputeGraphPtr BuildGraph1() { | |||
/// | |||
/// netoutput1 | |||
/// / \ \ | |||
/// add1 assign1 \ | |||
/// / \ / \ \ | |||
/// / \ \. | |||
/// add1 assign1 \. | |||
/// / \ / \ \. | |||
/// var1 var2 const1 var3 | |||
ComputeGraphPtr BuildGraph2() { | |||
@@ -103,4 +103,32 @@ TEST_F(UtestHybridModelAsyncExecutor, Test_execute) { | |||
context.callback_manager->callback_queue_.Push(eof_entry); | |||
ASSERT_EQ(executor.Execute(args), SUCCESS); | |||
} | |||
TEST_F(UtestHybridModelAsyncExecutor, test_PrepareInputs) { | |||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||
ge_root_model->SetModelName("test_name"); | |||
GeModelPtr ge_sub_model = make_shared<GeModel>(); | |||
HybridModel hybrid_model(ge_root_model); | |||
HybridModelAsyncExecutor executor(&hybrid_model); | |||
GeTensorDescPtr tensor_desc = make_shared<GeTensorDesc>(GeShape({-1, 16, 16, 3})); | |||
tensor_desc->SetShapeRange({{1, 256}, {16, 16}, {16, 16}, {3, 3}}); | |||
executor.input_tensor_desc_.insert({0, tensor_desc}); | |||
executor.device_id_ = 0; | |||
executor.input_sizes_.insert({0, -1}); | |||
executor.is_input_dynamic_.push_back(true); | |||
unique_ptr<uint8_t[]> data_buf(new (std::nothrow)uint8_t[3072]); | |||
InputData input_data; | |||
input_data.blobs.push_back(DataBuffer(data_buf.get(), 3072, false)); | |||
input_data.shapes.push_back({1, 16, 16, 3}); | |||
HybridModelExecutor::ExecuteArgs args; | |||
auto ret = executor.PrepareInputs(input_data, args); | |||
ASSERT_EQ(ret, SUCCESS); | |||
ASSERT_EQ(args.input_desc[0]->GetShape().ToString(), GeShape({1, 16, 16, 3}).ToString()); | |||
int64_t tensor_size = 0; | |||
TensorUtils::GetSize(*(args.input_desc[0]), tensor_size); | |||
ASSERT_EQ(tensor_size, 3104); | |||
} | |||
} // namespace ge |
@@ -249,6 +249,9 @@ TEST_F(UtestSubgraphExecutor, cond_graph_schedule_tasks) { | |||
graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||
ASSERT_EQ(graph_context.callback_manager->Init(), SUCCESS); | |||
auto root_graph = hybrid_model.root_graph_; | |||
switch_t = root_graph->FindNode("switch_t"); | |||
switch_f = root_graph->FindNode("switch_f"); | |||
const auto node_it_t = hybrid_model.node_items_.find(switch_t); | |||
const auto node_it_f = hybrid_model.node_items_.find(switch_f); | |||
ASSERT_NE(hybrid_model.node_items_.end(), node_it_t); | |||
@@ -214,11 +214,17 @@ TEST_F(UtestHybridModelBuilder, normal_hybrid_model_build) { | |||
ASSERT_EQ(it->second->frame_index_, index); | |||
ASSERT_EQ(it->second->parent_frame_, -1); | |||
}; | |||
TestFrameGroup(enter1, control_group_index); | |||
TestFrameGroup(active1, control_group_index); | |||
TestFrameGroup(active2, control_group_index); | |||
TestFrameGroup(active3, control_group_index); | |||
TestFrameGroup(output1, -1); | |||
auto root_graph = hybrid_model.root_graph_; | |||
auto enter1_node = root_graph->FindNode("enter"); | |||
auto active1_node = root_graph->FindNode("active1"); | |||
auto active2_node = root_graph->FindNode("active2"); | |||
auto active3_node = root_graph->FindNode("active3"); | |||
auto output1_node = root_graph->FindNode("net_output"); | |||
TestFrameGroup(enter1_node, control_group_index); | |||
TestFrameGroup(active1_node, control_group_index); | |||
TestFrameGroup(active2_node, control_group_index); | |||
TestFrameGroup(active3_node, control_group_index); | |||
TestFrameGroup(output1_node, -1); | |||
engine_mapping.clear(); | |||
task_executor.clear(); | |||
@@ -373,4 +379,14 @@ TEST_F(UtestHybridModelBuilder, TestInitHcclExecutorOnDemand) { | |||
NodeExecutorManager::GetInstance().builders_.erase(NodeExecutorManager::ExecutorType::HCCL); | |||
ASSERT_EQ(HybridModelBuilder::InitHcclExecutorOnDemand(ge_model), SUCCESS); | |||
} | |||
TEST_F(UtestHybridModelBuilder, copy_graph_success) { | |||
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||
HybridModel hybrid_model(ge_root_model); | |||
HybridModelBuilder hybrid_model_builder(hybrid_model); | |||
Status st = hybrid_model_builder.CopyGraph(); | |||
EXPECT_EQ(st, SUCCESS); | |||
} | |||
} // namespace ge |
@@ -25,6 +25,7 @@ | |||
#define private public | |||
#include "common/profiling/profiling_manager.h" | |||
#include "graph/ge_local_context.h" | |||
#include "inc/framework/common/profiling/ge_profiling.h" | |||
#undef protected | |||
#undef private | |||
@@ -115,4 +116,20 @@ TEST_F(UtestGeProfilinganager, get_fp_bp_point_empty) { | |||
ProfilingManager::Instance().GetFpBpPoint(fp_point, bp_point); | |||
EXPECT_EQ(fp_point, ""); | |||
EXPECT_EQ(bp_point, ""); | |||
} | |||
} | |||
TEST_F(UtestGeProfilinganager, set_step_info_success) { | |||
uint64_t index_id = 0; | |||
auto stream = (rtStream_t)0x1; | |||
Status ret = ProfSetStepInfo(index_id, 0, stream); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
ret = ProfSetStepInfo(index_id, 1, stream); | |||
EXPECT_EQ(ret, ge::SUCCESS); | |||
} | |||
TEST_F(UtestGeProfilinganager, set_step_info_failed) { | |||
uint64_t index_id = 0; | |||
auto stream = (rtStream_t)0x1; | |||
Status ret = ProfSetStepInfo(index_id, 1, stream); | |||
EXPECT_EQ(ret, ge::FAILED); | |||
} |