From: @HW_KK Reviewed-by: @liujunzhu,@youui Signed-off-by: @youuir1.0
@@ -0,0 +1,134 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** | |||
* @file hccl.h | |||
* @brief HCCL API | |||
*/ | |||
#ifndef HCCL_H_ | |||
#define HCCL_H_ | |||
#include <hccl/hccl_types.h> | |||
#include <acl/acl.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
/** | |||
* @brief Initialize HCCL. | |||
* | |||
* @param clusterInfo A string identifying the cluster info file path, include file name. | |||
* @param rank A integer identifying the identify for the rank. | |||
* @param comm A pointer identifying the initialized communication resource. | |||
* @return HcclResult | |||
* @see HcclCommDestroy() | |||
*/ | |||
extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); | |||
/** | |||
* @brief Get hccl root info. | |||
* | |||
* @param rootInfo A pointer identifying the hccl root info. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); | |||
/** | |||
* @brief Initialize HCCL with root info. | |||
* | |||
* @param nRanks A integer identifying the rank size of the cluster. | |||
* @param rootInfo A struct identifying the hccl root info. | |||
* @param rank A integer identifying the identify for the rank. | |||
* @param comm A pointer identifying the initialized communication resource. | |||
* @return HcclResult | |||
* @see HcclCommDestroy() | |||
*/ | |||
extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); | |||
/** | |||
* @brief AllReduce operator. | |||
* | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param count An integer(u64) identifying the number of the output data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, | |||
* float32. | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, | |||
HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief Broadcast operator. | |||
* | |||
* @param buf A pointer identifying the data address of the operator. | |||
* @param count An integer(u64) identifying the number of the data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param root An integer(u32) identifying the the root rank in the operator. | |||
* @param comm A pointer identifying the communication resource based on | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief ReduceScatter operator. | |||
* | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param recvCount An integer(u64) identifying the number of the output data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, | |||
HcclReduceOp op, HcclComm comm, aclrtStream stream); | |||
/** | |||
* @brief AllGather operator. | |||
* | |||
* @param sendBuf A pointer identifying the input data address of the operator. | |||
* @param recvBuf A pointer identifying the output data address of the operator. | |||
* @param sendCount An integer(u64) identifying the number of the input data. | |||
* @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. | |||
* @param comm A pointer identifying the communication resource based on. | |||
* @param stream A pointer identifying the stream information. | |||
* @return HcclResult | |||
*/ | |||
extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, | |||
aclrtStream stream); | |||
/** | |||
* @brief Destroy HCCL comm | |||
* | |||
* @param comm A pointer identifying the communication resource targetting | |||
* @return HcclResult | |||
* @see HcclCommInitClusterInfo() | |||
*/ | |||
extern HcclResult HcclCommDestroy(HcclComm comm); | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_H_ |
@@ -0,0 +1,101 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
/** | |||
* @file hccl_types.h | |||
* @brief HCCL data type definition | |||
* | |||
*/ | |||
#ifndef HCCL_TYPES_H_ | |||
#define HCCL_TYPES_H_ | |||
#include <stdint.h> | |||
#ifdef __cplusplus | |||
extern "C" { | |||
#endif // __cplusplus | |||
/** | |||
* @brief HCCL functions return value definition | |||
*/ | |||
typedef enum { | |||
HCCL_SUCCESS = 0, /**< success */ | |||
HCCL_E_PARA = 1, /**< parameter error */ | |||
HCCL_E_PTR = 2, /**< empty pointer */ | |||
HCCL_E_MEMORY = 3, /**< memory error */ | |||
HCCL_E_INTERNAL = 4, /**< internal error */ | |||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ | |||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ | |||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */ | |||
HCCL_E_SYSCALL = 8, /**< call system interface error */ | |||
HCCL_E_TIMEOUT = 9, /**< timeout */ | |||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ | |||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ | |||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ | |||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ | |||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ | |||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */ | |||
HCCL_E_DRV = 16, /**< call driver api fail */ | |||
HCCL_E_PROFILING = 17, /**< call profiling api fail */ | |||
HCCL_E_CCE = 18, /**< call cce api fail */ | |||
HCCL_E_NETWORK = 19, /**< call network api fail */ | |||
HCCL_E_RESERVED /**< reserved */ | |||
} HcclResult; | |||
/** | |||
* @brief handle to HCCL communicator | |||
*/ | |||
typedef void *HcclComm; | |||
/** | |||
* @brief HCCL Reduction opperation | |||
*/ | |||
typedef enum { | |||
HCCL_REDUCE_SUM = 0, /**< sum */ | |||
HCCL_REDUCE_PROD = 1, /**< prod */ | |||
HCCL_REDUCE_MAX = 2, /**< max */ | |||
HCCL_REDUCE_MIN = 3, /**< min */ | |||
HCCL_REDUCE_RESERVED /**< reserved */ | |||
} HcclReduceOp; | |||
/** | |||
* @brief HCCL data type | |||
*/ | |||
typedef enum { | |||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ | |||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ | |||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ | |||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ | |||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ | |||
HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ | |||
HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ | |||
HCCL_DATA_TYPE_RESERVED /**< reserved */ | |||
} HcclDataType; | |||
const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length | |||
/** | |||
* @brief HCCL root info | |||
*/ | |||
typedef struct HcclRootInfoDef { | |||
char internal[HCCL_ROOT_INFO_BYTES]; | |||
} HcclRootInfo; | |||
#ifdef __cplusplus | |||
} | |||
#endif // __cplusplus | |||
#endif // HCCL_TYPES_H_ |
@@ -790,22 +790,24 @@ Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { | |||
} | |||
Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { | |||
static int index = 0; | |||
std::vector<int64_t> input_shape_dim(1, max_dynamic_aipp_size); | |||
GeShape input_shape(input_shape_dim); | |||
// construct input tensor | |||
GeTensorDesc input_tensor(input_shape, FORMAT_ND, DT_UINT8); | |||
TensorUtils::SetReuseInput(input_tensor, false); | |||
TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); | |||
GE_CHECK_NOTNULL(aipp_node); | |||
const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); | |||
string node_name; | |||
if (index == 0) { | |||
// First aippdata name should be definite. | |||
if (graph->FindFirstNodeMatchType(AIPPDATA) == nullptr) { | |||
GELOGI("Current graph has no aippdata node, so the name of it must be definite."); | |||
node_name = kDynamicAippData; | |||
} else { | |||
node_name = string(kDynamicAippData) + "_" + to_string(index); | |||
node_name = string(kDynamicAippData) + "_" + aipp_node->GetName(); | |||
} | |||
++index; | |||
GELOGI("Current add aippdata node name is %s", node_name.c_str()); | |||
// new add aipp_data ops for dynamic aipp param input | |||
OpDescPtr op_desc_ptr_data = MakeShared<OpDesc>(node_name, AIPPDATA); | |||
GE_CHECK_NOTNULL(op_desc_ptr_data); | |||
@@ -1 +0,0 @@ | |||
../../proto/task.proto |
@@ -0,0 +1,170 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
syntax = "proto3"; | |||
package domi; | |||
message ModelTaskDef { | |||
string version = 1; | |||
map<string, string> attr = 9; // Extended field | |||
repeated TaskDef task = 10; | |||
uint64 memory_size = 11; | |||
uint32 stream_num = 12; | |||
uint32 event_num = 13; | |||
uint64 weight_size = 14; | |||
repeated bytes op = 15; // input/output opdef in bytes | |||
uint64 base_addr = 16; // base addr | |||
uint64 weight_addr = 17; // weight addr | |||
uint32 batch_num = 18; | |||
} | |||
message TaskDef { | |||
uint32 id = 1; | |||
uint32 type = 2; | |||
uint32 stream_id = 10; | |||
uint32 event_id = 11; | |||
KernelDef kernel = 20; | |||
KernelExDef kernel_ex = 21; | |||
KernelHcclDef kernel_hccl = 25; | |||
EventExDef event_ex = 26; | |||
LogTimeStampDef log_timestamp = 28; | |||
uint32 label_id = 30; | |||
MemcpyAsyncDef memcpy_async = 31; | |||
StreamSwitchDef stream_switch = 32; | |||
StreamActiveDef stream_active = 33; | |||
bytes private_def = 34; | |||
uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future | |||
StreamSwitchNDef stream_switch_n = 36; | |||
LabelSetDef label_set = 37; | |||
LabelGotoExDef label_goto_ex = 38; | |||
LabelSwitchByIndexDef label_switch_by_index = 39; | |||
} | |||
message KernelDef { | |||
KernelContext context = 1; | |||
string stub_func = 10; | |||
uint32 block_dim = 11; | |||
uint32 args_size = 12; | |||
bytes args = 13; | |||
bytes sm_desc = 14; | |||
bytes flowtable = 15; | |||
string so_name = 16; | |||
string kernel_name = 17; | |||
bytes kernel_ext_info = 18; | |||
uint32 kernel_ext_info_size = 19; | |||
} | |||
message KernelContext { | |||
uint32 kernel_type = 1; | |||
uint32 op_id = 2; // OP type in CCE | |||
uint32 kernel_func_id = 3; | |||
uint32 op_index = 4; // TE/Custom operator | |||
bool is_flowtable = 5; // Identify whether args is a flowtable structure | |||
bytes args_offset = 6; // args offset information | |||
uint32 args_count = 7; // args count | |||
repeated uint32 origin_op_index = 8; | |||
} | |||
message KernelExDef { | |||
uint32 flags = 1; | |||
uint32 op_index = 4; | |||
uint32 args_size = 12; | |||
bytes args = 13; | |||
bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput | |||
uint32 task_info_size = 15; | |||
bytes kernel_ext_info = 16; | |||
uint32 kernel_ext_info_size = 17; | |||
} | |||
message KernelHcclDef { | |||
uint32 op_index = 8; | |||
string hccl_type = 9; | |||
} | |||
message EventExDef { | |||
uint32 op_index = 1; | |||
uint32 event_type = 2; | |||
} | |||
message LogTimeStampDef { | |||
uint64 logid = 1; | |||
bool notify = 2; | |||
uint32 flat = 3; | |||
} | |||
message MemcpyAsyncDef { | |||
uint64 dst = 1; | |||
uint64 dst_max = 2; | |||
uint64 src = 3; | |||
uint64 count = 4; | |||
uint32 kind = 5; | |||
uint32 op_index = 6; | |||
} | |||
message StreamSwitchDef { | |||
uint32 op_index = 1; | |||
uint32 true_stream_id = 2; | |||
int64 value = 3; | |||
uint64 value_ptr = 4; | |||
uint32 data_type = 5; | |||
} | |||
message StreamActiveDef { | |||
uint32 op_index = 1; | |||
uint32 active_stream_id = 2; | |||
} | |||
message StreamSwitchNDef { | |||
uint32 op_index = 1; | |||
uint32 size = 2; | |||
repeated int64 target_value = 3; | |||
repeated uint32 true_stream_id = 4; | |||
uint32 element_size = 5; | |||
uint32 data_type = 6; | |||
} | |||
message LabelSetDef { | |||
uint32 op_index = 1; | |||
uint32 label_id = 2; | |||
uint32 model_id = 3; | |||
} | |||
message LabelGotoExDef { | |||
uint32 op_index = 1; | |||
uint32 label_id = 2; | |||
uint32 model_id = 3; | |||
} | |||
message LabelSwitchByIndexDef { | |||
uint32 op_index = 1; | |||
uint32 label_max = 2; | |||
} |
@@ -889,14 +889,29 @@ REG_OP(ReadVariableOp) | |||
.ATTR(dtype, Int, DT_INT32) | |||
.OP_END_FACTORY_REG(ReadVariableOp) | |||
/** | |||
*@brief Mark outputs of one sub graph which partitioned by engine type. | |||
*@par Inputs: | |||
*x: A tensor. \n | |||
*@par Outputs: | |||
*y: A tensor. \n | |||
*@par Attributes: | |||
*@li peerIndex: The index of the corresponding 'placeholder' node it's connected to. | |||
*@li parentOpType: Op type of original node. | |||
*@par Restrictions: | |||
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. | |||
*/ | |||
REG_OP(End) | |||
.INPUT(x, TensorType::ALL()) | |||
.OUTPUT(y, TensorType::ALL()) | |||
.ATTR(peerIndex, Int, 0) // the index of the corresponding 'placeholder' node it's connected to | |||
.ATTR(parentOpType, String, "") // op type of original node | |||
.ATTR(peerIndex, Int, 0) | |||
.ATTR(parentOpType, String, "") | |||
.OP_END_FACTORY_REG(End) | |||
/** | |||
*@brief Operations for writing summary data, for use in analysis and visualization. | |||