Author | SHA1 | Message | Date |
---|---|---|---|
|
5232669e12 |
!81 fix securec download links due to mistakes made by openeuler community
Merge pull request !81 from yanghaoran/r0.5 |
4 years ago |
|
9344279b34 | fix securec download links due to mistakes made by openeuler community | 4 years ago |
|
00cec7c019 |
!65 Synchronize latest Ascend software suite 17 Sep 2020 on branch r0.5
Merge pull request !65 from yanghaoran/r0.5 |
4 years ago |
|
ddc6c58a3b | Synchronize latest Ascend software suite on r0.5 17 Sep 2020 | 4 years ago |
|
b828a21393 |
update tests/depends/cce/src/op_kernel_registry.cc.
add copyright |
5 years ago |
|
389dbe82be | update RELEASE.md. | 5 years ago |
@@ -1,3 +1,22 @@ | |||
# Release 0.5.0-beta | |||
## Major Features and Improvements | |||
- Optimize Allreduce trailing parallelism, rebuild the calculation graph dependencies, adjust the calculation order, and maximize the efficiency of calculation and gradient aggregation communication in parallel, especially in large data volume gradient aggregation and low bandwidth/large cluster scenarios You can get a bigger income. | |||
- Advance constant folding, variable fusion, conversion operator related optimization pass to the end of the graph preparation. | |||
- Modify memory allocation algorithm, optimize GE memory allocation, and reduce memory usage in training multi-PCS scenarios. | |||
- Support IR composition, model compilation, inference execution in the same process. | |||
## Bugfixes | |||
- Fix the bug that the graphic attribute "output_name_idx_" is not serialized to the GEIR model file, resulting in the failure of the Fast-RCNN network offline inference model generation。 | |||
- Introduce timestamp in the dump data storage directory, to ensure that the dump file generated is in a different directory each time it is executed. | |||
- Reinforce the ParserJsonFile interface to fix the program coredump bug caused by the injection of abnormal json files. | |||
- Fix the bug that Stream binding failure scenario and sream resource leakage. | |||
## Thanks to our Contributors | |||
Thanks goes to these wonderful people: | |||
wangcong,weiyang,yanghaorang,xutianchun,shibeiji | |||
Contributions of any kind are welcome! | |||
# Release 0.3.0-alpha | |||
## Major Features and Improvements | |||
@@ -1,7 +1,7 @@ | |||
graphengine_add_pkg(securec | |||
VER 1.1.10 | |||
URL https://gitee.com/openeuler/bounds_checking_function/repository/archive/v1.1.10.tar.gz | |||
MD5 0782dd2351fde6920d31a599b23d8c91 | |||
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz | |||
MD5 193f0ca5246c1dd84920db34d2d8249f | |||
LIBS c_sec | |||
PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001 | |||
CMAKE_OPTION " " | |||
@@ -204,9 +204,6 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel"; | |||
// Save original model file name | |||
const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile"; | |||
// FE enable quant optimize | |||
const std::string QUANT_OPTIMIZE = "ge.quantOptimize"; | |||
const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum"; | |||
const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize"; | |||
const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum"; | |||
@@ -274,7 +271,6 @@ static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; | |||
static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); | |||
static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); | |||
static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); | |||
static const char *const QUANT_OPTIMIZE = ge::QUANT_OPTIMIZE.c_str(); | |||
static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); | |||
static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); | |||
static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); | |||
@@ -304,7 +300,6 @@ const std::set<std::string> global_options = {CORE_TYPE, | |||
AICORE_NUM, | |||
FUSION_SWITCH_FILE, | |||
ENABLE_SMALL_CHANNEL, | |||
QUANT_OPTIMIZE, | |||
OP_SELECT_IMPL_MODE, | |||
OPTYPELIST_FOR_IMPLMODE}; | |||
} // namespace ir_option | |||
@@ -43,6 +43,7 @@ | |||
#define DYNAMIC_INPUT_TD_NUM(name) ("__dynamic_input_" + name + "_cnt") | |||
namespace ge { | |||
class Operator; | |||
class OperatorImpl; | |||
class NamedAttrs; | |||
class Graph; | |||
@@ -50,6 +51,7 @@ class AttrValue; | |||
using SubgraphBuilder = std::function<Graph()>; | |||
using OperatorImplPtr = std::shared_ptr<OperatorImpl>; | |||
using OperatorPtr = std::shared_ptr<Operator>; | |||
class OpIO; | |||
using OutHandler = std::shared_ptr<OpIO>; | |||
@@ -67,6 +67,7 @@ using google::protobuf::Message; | |||
class OpRegistrationDataImpl; | |||
using ParseParamFunc = std::function<domi::Status(const google::protobuf::Message *, ge::Operator &)>; | |||
using ParseParamByOpFunc = std::function<domi::Status(const ge::Operator &, ge::Operator &)>; | |||
using FusionParseParamFunc = | |||
std::function<domi::Status(const std::vector<const google::protobuf::Message *>, ge::Operator &)>; | |||
using ParseSubgraphFunc = std::function<Status(const std::string &subgraph_name, const ge::Graph &graph)>; | |||
@@ -85,6 +86,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { | |||
OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn); | |||
OpRegistrationData &ParseParamsByOperatorFn(const ParseParamByOpFunc &parse_param_by_op_fn); | |||
OpRegistrationData &FusionParseParamsFn(const FusionParseParamFunc &fusionParseParamFn); | |||
OpRegistrationData &ParseSubgraphPostFn(const ParseSubgraphFunc &subgraph_post_fn); | |||
@@ -100,6 +103,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData { | |||
std::set<std::string> GetOriginOpTypeSet() const; | |||
domi::FrameworkType GetFrameworkType() const; | |||
ParseParamFunc GetParseParamFn() const; | |||
ParseParamByOpFunc GetParseParamByOperatorFn() const; | |||
FusionParseParamFunc GetFusionParseParamFn() const; | |||
ParseSubgraphFunc GetParseSubgraphPostFn() const; | |||
@@ -183,6 +183,7 @@ struct ModelData { | |||
uint32_t model_len = 0; // Model binary data length | |||
int32_t priority = 0; // Model priority | |||
std::string key; // Key path for encrypt model, Empty for unencrypt | |||
std::string om_name; // om file name, used for data dump | |||
}; | |||
// The definition of Model information | |||
@@ -46,6 +46,8 @@ class ModelHelper { | |||
static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model); | |||
static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr); | |||
Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name); | |||
Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name); | |||
private: | |||
bool is_assign_model_ = false; | |||
@@ -62,7 +62,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||
// Get input and output descriptor | |||
ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||
std::vector<ge::TensorDesc> &output_desc); | |||
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false); | |||
/// | |||
/// @ingroup ge | |||
@@ -98,8 +98,10 @@ Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); | |||
Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); | |||
Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||
std::vector<std::string> &output_nodes_name); | |||
Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info); | |||
void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||
std::vector<std::string> &output_nodes_name); | |||
void UpdateOmgCtxWithParserCtx(); | |||
@@ -94,6 +94,8 @@ struct OmgContext { | |||
std::vector<std::pair<std::string, int32_t>> user_out_nodes; | |||
// net out nodes (where user_out_nodes or leaf nodes) | |||
std::vector<std::string> net_out_nodes; | |||
// net out nodes top names(only caffe has top) | |||
std::vector<std::string> out_top_names; | |||
// path for the aicpu custom operator so_file | |||
std::vector<std::string> aicpu_op_run_paths; | |||
// ddk version | |||
@@ -139,6 +139,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; | |||
@@ -181,6 +183,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; | |||
// to be deleted | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION; | |||
@@ -31,6 +31,7 @@ class ShapeRefiner { | |||
static graphStatus InferShapeAndType(const NodePtr &node, bool before_subgraph); | |||
static graphStatus InferShapeAndType(const NodePtr &node); | |||
static graphStatus InferShapeAndType(const ConstNodePtr &node, Operator &op); | |||
static void ClearContextMap(); | |||
private: | |||
static void PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase); | |||
@@ -121,6 +121,8 @@ const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp"; | |||
const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; | |||
const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; | |||
const std::string ATTR_NAME_INPUT_DIMS = "input_dims"; | |||
const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; | |||
const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; | |||
@@ -154,6 +156,7 @@ const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id"; | |||
const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; | |||
const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; | |||
const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | |||
const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; | |||
// To be deleted | |||
const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; | |||
@@ -1,5 +1,5 @@ | |||
LOCAL_PATH := $(call my-dir) | |||
include $(LOCAL_PATH)/stub/Makefile | |||
COMMON_LOCAL_SRC_FILES := \ | |||
./proto/om.proto \ | |||
./proto/ge_ir.proto \ | |||
@@ -85,6 +85,29 @@ LOCAL_PROPRIETARY_MODULE := true | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
#compiler for host | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := stub/libgraph | |||
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 | |||
LOCAL_CPPFLAGS += -fexceptions | |||
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||
LOCAL_SRC_FILES := \ | |||
../../out/atc/lib64/stub/graph.cc \ | |||
../../out/atc/lib64/stub/operator.cc \ | |||
../../out/atc/lib64/stub/tensor.cc \ | |||
../../out/atc/lib64/stub/operator_factory.cc \ | |||
LOCAL_SHARED_LIBRARIES := | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
LOCAL_MULTILIB := 64 | |||
LOCAL_PROPRIETARY_MODULE := true | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
#compiler for device | |||
include $(CLEAR_VARS) | |||
@@ -111,6 +134,32 @@ LOCAL_PROPRIETARY_MODULE := true | |||
include $(BUILD_SHARED_LIBRARY) | |||
#compiler for device | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := stub/libgraph | |||
LOCAL_CFLAGS += -O2 | |||
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||
LOCAL_SRC_FILES := \ | |||
../../out/atc/lib64/stub/graph.cc \ | |||
../../out/atc/lib64/stub/operator.cc \ | |||
../../out/atc/lib64/stub/tensor.cc \ | |||
../../out/atc/lib64/stub/operator_factory.cc \ | |||
LOCAL_SHARED_LIBRARIES := | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
ifeq ($(device_os),android) | |||
LOCAL_LDFLAGS := -ldl | |||
endif | |||
LOCAL_MULTILIB := 64 | |||
LOCAL_PROPRIETARY_MODULE := true | |||
include $(BUILD_SHARED_LIBRARY) | |||
# compile for ut/st | |||
include $(CLEAR_VARS) | |||
@@ -759,6 +759,7 @@ graphStatus Node::Verify() const { | |||
GELOGW("Verify UpdateOutputName failed"); | |||
} | |||
} | |||
node_op.BreakConnect(); | |||
} | |||
if (op_->CommonVerify() == GRAPH_SUCCESS) { | |||
@@ -818,7 +818,9 @@ graphStatus OpDesc::InferShapeAndType() { | |||
} | |||
} | |||
Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); | |||
return (graphStatus)infer_func_(op_proxy); | |||
graphStatus ret = (graphStatus)infer_func_(op_proxy); | |||
op_proxy.BreakConnect(); | |||
return ret; | |||
} | |||
graphStatus OpDesc::DefaultInferFormat() { | |||
@@ -863,12 +865,14 @@ graphStatus OpDesc::DefaultInferFormat() { | |||
} | |||
graphStatus OpDesc::OpVerify() { | |||
Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); | |||
if (verifier_func_ == nullptr) { | |||
verifier_func_ = OperatorFactoryImpl::GetVerifyFunc(GetType()); | |||
} | |||
if (verifier_func_ != nullptr) { | |||
return (graphStatus)verifier_func_(op_proxy); | |||
Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); | |||
graphStatus ret = (graphStatus)verifier_func_(op_proxy); | |||
op_proxy.BreakConnect(); | |||
return ret; | |||
} | |||
return GRAPH_SUCCESS; | |||
} | |||
@@ -21,7 +21,7 @@ | |||
#include <mutex> | |||
#include <queue> | |||
#include <set> | |||
#include "array_ops.h" | |||
#include "./array_ops.h" | |||
#include "debug/ge_log.h" | |||
#include "debug/ge_op_types.h" | |||
#include "debug/ge_util.h" | |||
@@ -931,7 +931,7 @@ OperatorImplPtr Operator::GetOperatorImplPtr() const { return operator_impl_; } | |||
void Operator::BreakConnect() const { | |||
if (operator_impl_ == nullptr) { | |||
GELOGE(GRAPH_FAILED, "operator impl is nullptr."); | |||
GELOGW("operator impl is nullptr."); | |||
return; | |||
} | |||
operator_impl_->ClearInputLinks(); | |||
@@ -1318,6 +1318,8 @@ class GraphBuilderImpl { | |||
string type = src_op_impl->op_desc_->GetType(); | |||
auto node_op = ge::OperatorFactory::CreateOperator("node_op", type); | |||
auto tensor_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); | |||
node_op.BreakConnect(); | |||
GE_CHK_BOOL_EXEC(tensor_desc != nullptr, continue, "tensor_desc is null."); | |||
if ((tensor_desc->GetInputsSize() == 0 && tensor_desc->GetOutputsSize() > 0) || type == DATA || | |||
type == VARIABLE || type == INITDATA || type == GETNEXT) { | |||
@@ -1542,6 +1544,7 @@ void GraphUtils::BreakConnect(const std::map<OperatorImplPtr, NodePtr> &all_node | |||
} | |||
op_impl->ClearOutputLinks(); | |||
op_impl->ClearInputLinks(); | |||
OperatorKeeper::GetInstance().CheckOutOperator(op_impl); | |||
} | |||
} | |||
} // namespace ge |
@@ -235,6 +235,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator & | |||
GELOGD("get op from OperatorFactory success. opType: %s", op_type.c_str()); | |||
auto temp_op_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); | |||
node_op.BreakConnect(); | |||
if (temp_op_desc == nullptr) { | |||
GELOGE(GRAPH_FAILED, "temp op desc is null"); | |||
return GRAPH_FAILED; | |||
@@ -328,6 +329,9 @@ InferenceContextPtr CreateInferenceContext(const std::unordered_map<NodePtr, Inf | |||
namespace { | |||
std::unordered_map<NodePtr, InferenceContextPtr> context_map; | |||
} | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ShapeRefiner::ClearContextMap() { context_map.clear(); } | |||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node) { | |||
return InferShapeAndType(node, true); | |||
} | |||
@@ -0,0 +1,6 @@ | |||
inc_path := $(shell pwd)/inc/external/ | |||
out_path := $(shell pwd)/out/atc/lib64/stub/ | |||
stub_path := $(shell pwd)/common/graph/stub/ | |||
mkdir_stub := $(shell mkdir -p $(out_path)) | |||
graph_local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) |
@@ -0,0 +1,573 @@ | |||
import os | |||
import re | |||
import sys | |||
import logging | |||
logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', | |||
level=logging.INFO) | |||
""" | |||
this attr is used for symbol table visible | |||
""" | |||
GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' | |||
""" | |||
generate stub func body by return type | |||
""" | |||
RETURN_STATEMENTS = { | |||
'graphStatus': ' return GRAPH_SUCCESS;', | |||
'Status': ' return SUCCESS;', | |||
'Graph': ' return Graph();', | |||
'Graph&': ' return *this;', | |||
'Format': ' return Format();', | |||
'Format&': ' return *this;', | |||
'Shape': ' return Shape();', | |||
'Shape&': ' return *this;', | |||
'TensorDesc': ' return TensorDesc();', | |||
'TensorDesc&': ' return *this;', | |||
'Tensor': ' return Tensor();', | |||
'Tensor&': ' return *this;', | |||
'Operator': ' return Operator();', | |||
'Operator&': ' return *this;', | |||
'Ptr': ' return nullptr;', | |||
'std::string': ' return "";', | |||
'std::string&': ' return "";', | |||
'string': ' return "";', | |||
'int': ' return 0;', | |||
'DataType': ' return DT_FLOAT;', | |||
'InferenceContextPtr': ' return nullptr;', | |||
'SubgraphBuilder': ' return nullptr;', | |||
'OperatorImplPtr': ' return nullptr;', | |||
'OutHandler': ' return nullptr;', | |||
'std::vector<std::string>': ' return {};', | |||
'std::vector<int64_t>': ' return {};', | |||
'std::map': ' return {};', | |||
'uint32_t': ' return 0;', | |||
'int64_t': ' return 0;', | |||
'uint64_t': ' return 0;', | |||
'size_t': ' return 0;', | |||
'float': ' return 0.0f;', | |||
'bool': ' return false;', | |||
} | |||
""" | |||
max code len per line in hua_wei software programming specifications | |||
""" | |||
max_code_len_per_line = 100 | |||
""" | |||
white_list_for_debug, include_dir_key_words is to | |||
determines which header files to generate cc files from | |||
when DEBUG on | |||
""" | |||
white_list_for_debug = ["operator.h", "tensor.h", | |||
"graph.h", "operator_factory.h", | |||
"ge_ir_build.h"] | |||
include_dir_key_words = ["ge", "graph"] | |||
DEBUG = True | |||
def need_generate_func(func_line): | |||
""" | |||
:param func_line: | |||
:return: | |||
""" | |||
if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ | |||
or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): | |||
return False | |||
return True | |||
def file_endswith_white_list_suffix(file): | |||
""" | |||
:param file: | |||
:return: | |||
""" | |||
if DEBUG: | |||
for suffix in white_list_for_debug: | |||
if file.endswith(suffix): | |||
return True | |||
return False | |||
else: | |||
return True | |||
""" | |||
belows are patterns used for analyse .h file | |||
""" | |||
# pattern function | |||
pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after | |||
([a-zA-Z~_] # void int likely | |||
.* | |||
[)] #we find ) | |||
(?!.*{) # we do not want the case int abc() const { return 1;} | |||
.*) | |||
(;.*) #we want to find ; and after for we will replace these later | |||
\n$ | |||
""", re.VERBOSE | re.MULTILINE | re.DOTALL) | |||
# pattern comment | |||
pattern_comment = re.compile(r'^\s*//') | |||
pattern_comment_2_start = re.compile(r'^\s*/[*]') | |||
pattern_comment_2_end = re.compile(r'[*]/\s*$') | |||
# pattern define | |||
pattern_define = re.compile(r'^\s*#define') | |||
pattern_define_return = re.compile(r'\\\s*$') | |||
# blank line | |||
pattern_blank_line = re.compile(r'^\s*$') | |||
# virtual,explicit,friend,static | |||
pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') | |||
# lead space | |||
pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') | |||
# functions will have patterns such as func ( or func( | |||
# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist | |||
# format like :"operator = ()" | |||
pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') | |||
# template | |||
pattern_template = re.compile(r'^\s*template') | |||
pattern_template_end = re.compile(r'>\s*$') | |||
# namespace | |||
pattern_namespace = re.compile(r'namespace.*{') | |||
# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with | |||
pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR) | |||
# {} | |||
pattern_start = re.compile('{') | |||
pattern_end = re.compile('}') | |||
line_index = 0 | |||
class H2CC(object): | |||
def __init__(self, input_file, output_file, shared_includes_content): | |||
""" | |||
:param input_file: | |||
:param output_file: | |||
:param shared_includes_content: | |||
""" | |||
self.input_file = input_file | |||
self.output_file = output_file | |||
self.shared_includes_content = shared_includes_content | |||
self.line_index = 0 | |||
self.input_fd = open(self.input_file, 'r') | |||
self.input_content = self.input_fd.readlines() | |||
self.output_fd = open(self.output_file, 'w') | |||
# The state may be normal_now(in the middle of {}),class_now,namespace_now | |||
self.stack = [] | |||
self.stack_class = [] | |||
self.stack_template = [] | |||
# record funcs generated by h2cc func | |||
self.func_list_exist = [] | |||
def __del__(self): | |||
self.input_fd.close() | |||
self.output_fd.close() | |||
del self.stack | |||
del self.stack_class | |||
del self.stack_template | |||
del self.func_list_exist | |||
def just_skip(self): | |||
# skip blank line or comment | |||
if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search( | |||
self.input_content[self.line_index]): # /n or comment using // | |||
self.line_index += 1 | |||
if pattern_comment_2_start.search(self.input_content[self.line_index]): # comment using /* | |||
while not pattern_comment_2_end.search(self.input_content[self.line_index]): # */ | |||
self.line_index += 1 | |||
self.line_index += 1 | |||
# skip define | |||
if pattern_define.search(self.input_content[self.line_index]): | |||
while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search( | |||
self.input_content[self.line_index]): | |||
self.line_index += 1 | |||
self.line_index += 1 | |||
def write_inc_content(self): | |||
for shared_include_content in self.shared_includes_content: | |||
self.output_fd.write(shared_include_content) | |||
def h2cc(self): | |||
""" | |||
:return: | |||
""" | |||
logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file) | |||
global pattern_comment | |||
global pattern_comment_2_start | |||
global pattern_comment_2_end | |||
global pattern_blank_line | |||
global pattern_func | |||
global pattern_keyword | |||
global pattern_leading_space | |||
global pattern_func_name | |||
global pattern_template | |||
global pattern_template_end | |||
global pattern_namespace | |||
global pattern_class | |||
global pattern_start | |||
global pattern_end | |||
global line_index | |||
# write inc content | |||
self.write_inc_content() | |||
# core processing cycle, process the input .h file by line | |||
while self.line_index < len(self.input_content): | |||
# handle comment and blank line | |||
self.just_skip() | |||
# match namespace | |||
self.handle_namespace() | |||
# match template | |||
template_string = self.handle_template() | |||
# match class | |||
line = self.input_content[self.line_index] | |||
match_class = pattern_class.search(line) | |||
match_start = pattern_start.search(line) | |||
handle_class_result = self.handle_class(template_string, line, match_start, match_class) | |||
if handle_class_result == "continue": | |||
continue | |||
# match "}" | |||
handle_stack_result = self.handle_stack(match_start) | |||
if handle_stack_result == "continue": | |||
continue | |||
# handle func | |||
handle_func1_result, line, start_i = self.handle_func1(line) | |||
if handle_func1_result == "continue": | |||
continue | |||
# here means func is found | |||
# delete key word | |||
line = pattern_keyword.sub('', line) | |||
logging.info("line[%s]", line) | |||
# Class member function | |||
# if friend we will not add class name | |||
friend_match = re.search('friend ', line) | |||
if len(self.stack_class) > 0 and not friend_match: | |||
line, func_name = self.handle_class_member_func(line, template_string) | |||
# Normal functions | |||
else: | |||
line, func_name = self.handle_normal_func(line, template_string) | |||
need_generate = need_generate_func(line) | |||
# func body | |||
line += self.implement_function(line) | |||
# comment | |||
line = self.gen_comment(start_i) + line | |||
# write to out file | |||
self.write_func_content(line, func_name, need_generate) | |||
# next loop | |||
self.line_index += 1 | |||
logging.info('Added %s functions', len(self.func_list_exist)) | |||
logging.info('Successfully converted,please see ' + self.output_file) | |||
def handle_func1(self, line): | |||
""" | |||
:param line: | |||
:return: | |||
""" | |||
find1 = re.search('[(]', line) | |||
if not find1: | |||
self.line_index += 1 | |||
return "continue", line, None | |||
find2 = re.search('[)]', line) | |||
start_i = self.line_index | |||
space_match = pattern_leading_space.search(line) | |||
# deal with | |||
# int abc(int a, | |||
# int b) | |||
if find1 and (not find2): | |||
self.line_index += 1 | |||
line2 = self.input_content[self.line_index] | |||
if space_match: | |||
line2 = re.sub('^' + space_match.group(1), '', line2) | |||
line += line2 | |||
while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): | |||
self.line_index += 1 | |||
line2 = self.input_content[self.line_index] | |||
line2 = re.sub('^' + space_match.group(1), '', line2) | |||
line += line2 | |||
match_start = pattern_start.search(self.input_content[self.line_index]) | |||
match_end = pattern_end.search(self.input_content[self.line_index]) | |||
if match_start: # like ) { or ) {} int the last line | |||
if not match_end: | |||
self.stack.append('normal_now') | |||
ii = start_i | |||
while ii <= self.line_index: | |||
ii += 1 | |||
self.line_index += 1 | |||
return "continue", line, start_i | |||
logging.info("line[%s]", line) | |||
# ' int abc();'->'int abc()' | |||
(line, match) = pattern_func.subn(r'\2\n', line) | |||
logging.info("line[%s]", line) | |||
# deal with case: | |||
# 'int \n abc(int a, int b)' | |||
if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): | |||
line = self.input_content[start_i - 1] + line | |||
line = line.lstrip() | |||
if not match: | |||
self.line_index += 1 | |||
return "continue", line, start_i | |||
return "pass", line, start_i | |||
def handle_stack(self, match_start): | |||
""" | |||
:param match_start: | |||
:return: | |||
""" | |||
line = self.input_content[self.line_index] | |||
match_end = pattern_end.search(line) | |||
if match_start: | |||
self.stack.append('normal_now') | |||
if match_end: | |||
top_status = self.stack.pop() | |||
if top_status == 'namespace_now': | |||
self.output_fd.write(line + '\n') | |||
elif top_status == 'class_now': | |||
self.stack_class.pop() | |||
self.stack_template.pop() | |||
if match_start or match_end: | |||
self.line_index += 1 | |||
return "continue" | |||
if len(self.stack) > 0 and self.stack[-1] == 'normal_now': | |||
self.line_index += 1 | |||
return "continue" | |||
return "pass" | |||
def handle_class(self, template_string, line, match_start, match_class): | |||
""" | |||
:param template_string: | |||
:param line: | |||
:param match_start: | |||
:param match_class: | |||
:return: | |||
""" | |||
if match_class: # we face a class | |||
self.stack_template.append(template_string) | |||
self.stack.append('class_now') | |||
class_name = match_class.group(3) | |||
# class template specializations: class A<u,Node<u> > | |||
if '<' in class_name: | |||
k = line.index('<') | |||
fit = 1 | |||
for ii in range(k + 1, len(line)): | |||
if line[ii] == '<': | |||
fit += 1 | |||
if line[ii] == '>': | |||
fit -= 1 | |||
if fit == 0: | |||
break | |||
class_name += line[k + 1:ii + 1] | |||
logging.info('class_name[%s]', class_name) | |||
self.stack_class.append(class_name) | |||
while not match_start: | |||
self.line_index += 1 | |||
line = self.input_content[self.line_index] | |||
match_start = pattern_start.search(line) | |||
self.line_index += 1 | |||
return "continue" | |||
return "pass" | |||
def handle_template(self): | |||
line = self.input_content[self.line_index] | |||
match_template = pattern_template.search(line) | |||
template_string = '' | |||
if match_template: | |||
match_template_end = pattern_template_end.search(line) | |||
template_string = line | |||
while not match_template_end: | |||
self.line_index += 1 | |||
line = self.input_content[self.line_index] | |||
template_string += line | |||
match_template_end = pattern_template_end.search(line) | |||
self.line_index += 1 | |||
return template_string | |||
def handle_namespace(self): | |||
line = self.input_content[self.line_index] | |||
match_namespace = pattern_namespace.search(line) | |||
if match_namespace: # we face namespace | |||
self.output_fd.write(line + '\n') | |||
self.stack.append('namespace_now') | |||
self.line_index += 1 | |||
def handle_normal_func(self, line, template_string): | |||
template_line = '' | |||
self.stack_template.append(template_string) | |||
if self.stack_template[-1] != '': | |||
template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) | |||
# change '< class T = a, class U = A(3)>' to '<class T, class U>' | |||
template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||
template_line = re.sub(r'\s*=.*,', ',', template_line) | |||
template_line = re.sub(r'\s*=.*', '', template_line) | |||
line = re.sub(r'\s*=.*,', ',', line) | |||
line = re.sub(r'\s*=.*\)', ')', line) | |||
line = template_line + line | |||
self.stack_template.pop() | |||
func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||
logging.info("line[%s]", line) | |||
logging.info("func_name[%s]", func_name) | |||
return line, func_name | |||
def handle_class_member_func(self, line, template_string): | |||
template_line = '' | |||
x = '' | |||
if template_string != '': | |||
template_string = re.sub(r'\s*template', 'template', template_string) | |||
template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) | |||
template_string = re.sub(r'\s*=.*,', ',', template_string) | |||
template_string = re.sub(r'\s*=.*', '', template_string) | |||
if self.stack_template[-1] != '': | |||
if not (re.search(r'<\s*>', stack_template[-1])): | |||
template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) | |||
if not (re.search(r'<.*>', self.stack_class[-1])): | |||
# for x we get like template<class T, typename U> -> <T,U> | |||
x = re.sub(r'template\s*<', '<', template_line) # remove template -> <class T, typename U> | |||
x = re.sub(r'\n', '', x) | |||
x = re.sub(r'\s*=.*,', ',', x) | |||
x = re.sub(r'\s*=.*\>', '>', x) | |||
x = x.rstrip() # remove \n | |||
x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '', | |||
x) # remove class,typename -> <T, U> | |||
x = re.sub(r'<\s+', '<', x) | |||
x = re.sub(r'\s+>', '>', x) | |||
x = re.sub(r'\s+,', ',', x) | |||
x = re.sub(r',\s+', ', ', x) | |||
line = re.sub(r'\s*=\s+0', '', line) | |||
line = re.sub(r'\s*=\s+.*,', ',', line) | |||
line = re.sub(r'\s*=\s+.*\)', ')', line) | |||
logging.info("x[%s]\nline[%s]", x, line) | |||
# if the function is long, void ABC::foo() | |||
# breaks into two lines void ABC::\n foo() | |||
temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) | |||
if len(temp_line) > max_code_len_per_line: | |||
line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) | |||
else: | |||
line = temp_line | |||
logging.info("line[%s]", line) | |||
# add template as the above if there is one | |||
template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||
template_line = re.sub(r'\s*=.*,', ',', template_line) | |||
template_line = re.sub(r'\s*=.*', '', template_line) | |||
line = template_line + template_string + line | |||
func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||
logging.info("line[%s]", line) | |||
logging.info("func_name[%s]", func_name) | |||
return line, func_name | |||
def write_func_content(self, content, func_name, need_generate): | |||
if not (func_name in self.func_list_exist) and need_generate: | |||
self.output_fd.write(content) | |||
self.func_list_exist.append(func_name) | |||
logging.info('add func:[%s]', func_name) | |||
def gen_comment(self, start_i): | |||
comment_line = '' | |||
# Function comments are on top of function declarations, copy them over | |||
k = start_i - 1 # one line before this func start | |||
if pattern_template.search(self.input_content[k]): | |||
k -= 1 | |||
if pattern_comment_2_end.search(self.input_content[k]): | |||
comment_line = self.input_content[k].lstrip() | |||
while not pattern_comment_2_start.search(self.input_content[k]): | |||
k -= 1 | |||
comment_line = self.input_content[k].lstrip() + comment_line | |||
else: | |||
for j in range(k, 0, -1): | |||
c_line = self.input_content[j] | |||
if pattern_comment.search(c_line): | |||
c_line = re.sub(r'\s*//', '//', c_line) | |||
comment_line = c_line + comment_line | |||
else: | |||
break | |||
return comment_line | |||
@staticmethod | |||
def implement_function(func): | |||
function_def = '' | |||
function_def += '{\n' | |||
all_items = func.split() | |||
start = 0 | |||
return_type = all_items[start] | |||
if return_type == "const": | |||
start += 1 | |||
return_type = all_items[start] | |||
if return_type.startswith(('std::map', 'std::set', 'std::vector')): | |||
return_type = "std::map" | |||
if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): | |||
return_type = "Ptr" | |||
if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): | |||
return_type += "&" | |||
if RETURN_STATEMENTS.__contains__(return_type): | |||
function_def += RETURN_STATEMENTS[return_type] | |||
else: | |||
logging.warning("Unhandled return type[%s]", return_type) | |||
function_def += '\n' | |||
function_def += '}\n' | |||
function_def += '\n' | |||
return function_def | |||
def collect_header_files(path): | |||
""" | |||
:param path: | |||
:return: | |||
""" | |||
header_files = [] | |||
shared_includes_content = [] | |||
for root, dirs, files in os.walk(path): | |||
files.sort() | |||
for file in files: | |||
if file.find("git") >= 0: | |||
continue | |||
if not file.endswith('.h'): | |||
continue | |||
file_path = os.path.join(root, file) | |||
file_path = file_path.replace('\\', '/') | |||
header_files.append(file_path) | |||
include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) | |||
shared_includes_content.append(include_str) | |||
return header_files, shared_includes_content | |||
def generate_stub_file(inc_dir, out_cc_dir): | |||
""" | |||
:param inc_dir: | |||
:param out_cc_dir: | |||
:return: | |||
""" | |||
target_header_files, shared_includes_content = collect_header_files(inc_dir) | |||
for header_file in target_header_files: | |||
if not file_endswith_white_list_suffix(header_file): | |||
continue | |||
cc_file = re.sub('.h*$', '.cc', header_file) | |||
h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) | |||
h_2_cc.h2cc() | |||
def gen_code(inc_dir, out_cc_dir): | |||
""" | |||
:param inc_dir: | |||
:param out_cc_dir: | |||
:return: | |||
""" | |||
if not inc_dir.endswith('/'): | |||
inc_dir += '/' | |||
if not out_cc_dir.endswith('/'): | |||
out_cc_dir += '/' | |||
for include_dir_key_word in include_dir_key_words: | |||
generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) | |||
if __name__ == '__main__': | |||
inc_dir = sys.argv[1] | |||
out_cc_dir = sys.argv[2] | |||
gen_code(inc_dir, out_cc_dir) |
@@ -187,12 +187,9 @@ void TBEPluginManager::LoadCustomOpLib() { | |||
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | |||
GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | |||
for (OpRegistrationData reg_data : registration_datas) { | |||
bool ret = CheckRegisterStatus(reg_data); | |||
if (ret) { | |||
GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), | |||
static_cast<uint32_t>(reg_data.GetImplyType())); | |||
domi::OpRegistry::Instance()->Register(reg_data); | |||
} | |||
GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), | |||
static_cast<uint32_t>(reg_data.GetImplyType())); | |||
domi::OpRegistry::Instance()->Register(reg_data); | |||
} | |||
} | |||
@@ -230,31 +227,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPlug | |||
} | |||
} | |||
bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData ®_data) { | |||
bool ret = true; | |||
static char *parser_priority = std::getenv("PARSER_PRIORITY"); | |||
static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce"; | |||
auto ori_optype_set = reg_data.GetOriginOpTypeSet(); | |||
for (const auto &op_type : ori_optype_set) { | |||
domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type); | |||
GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str()); | |||
if (imply_type != domi::ImplyType::BUILDIN) { | |||
if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) || | |||
(!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) { | |||
GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(), | |||
reg_data.GetOmOptype().c_str()); | |||
ret = false; | |||
} else { | |||
GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str()); | |||
} | |||
} else { | |||
GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(), | |||
reg_data.GetOmOptype().c_str(), static_cast<int>(reg_data.GetImplyType())); | |||
} | |||
} | |||
return ret; | |||
} | |||
Status TBEPluginManager::CheckCustomAiCpuOpLib() { | |||
std::vector<std::string> vec_op_type; | |||
@@ -63,7 +63,6 @@ class TBEPluginManager { | |||
static void GetCustomOpPath(std::string &customop_path); | |||
void LoadCustomOpLib(); | |||
static Status CheckCustomAiCpuOpLib(); | |||
static bool CheckRegisterStatus(const OpRegistrationData ®_data); | |||
SoHandlesVec handles_vec_; | |||
static std::map<string, string> options_; | |||
@@ -184,7 +184,8 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin | |||
// Model | |||
ModelPtr model_ptr = ge::MakeShared<ge::Model>(); | |||
GE_CHECK_NOTNULL_EXEC(model_ptr, return MEMALLOC_FAILED); | |||
model_ptr->SetName(compute_graph->GetName()); | |||
std::string original_model_name = compute_graph->GetName() + "_original"; | |||
model_ptr->SetName(original_model_name); | |||
model_ptr->SetGraph(graph); | |||
model_ptr->SetVersion(static_cast<uint32_t>(OM_PROTO_VERSION)); | |||
string framework_version; | |||
@@ -504,4 +505,36 @@ Status ModelHelper::ReleaseLocalModelData() noexcept { | |||
} | |||
return result; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName(const string &file_name, | |||
string &base_name) { | |||
GELOGD("Get base_name from file, file_name:%s", file_name.c_str()); | |||
GE_CHK_BOOL_EXEC_WARN(!file_name.empty(), return FAILED, "File path may not valid, check params --output"); | |||
size_t start_position = 0; | |||
// using output as base_name (ignore ".om") | |||
size_t filename_suffixes = 3; | |||
if (file_name.find_last_of('/') != string::npos) { | |||
start_position = file_name.find_last_of('/') + 1; | |||
} | |||
size_t end_position = file_name.length() - filename_suffixes; | |||
base_name = file_name.substr(start_position, end_position - start_position); | |||
GE_CHK_BOOL_EXEC_WARN(!base_name.empty(), return FAILED, "Get base_name failed, check params --output"); | |||
return SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status | |||
ModelHelper::GetModelNameFromMergedGraphName(const string &graph_name, string &model_name) { | |||
GELOGD("Get model_name from graph_name, graph_name:%s", graph_name.c_str()); | |||
// this can only be used after merged graph(graph name will be append with "_x", x is index); | |||
GE_CHK_BOOL_EXEC_WARN(!graph_name.empty(), return FAILED, "File path may not valid, check params --output"); | |||
size_t start_position = 0; | |||
size_t end_position = graph_name.length(); | |||
// using graph as model_name (ignore "_x", x is the index of graph) | |||
if (graph_name.find_last_of('_') != string::npos) { | |||
end_position = graph_name.find_last_of('_'); | |||
} | |||
model_name = graph_name.substr(start_position, end_position); | |||
GE_CHK_BOOL_EXEC_WARN(!model_name.empty(), return FAILED, "Get model_name failed, check params --output"); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -15,7 +15,7 @@ | |||
*/ | |||
#include "common/model_parser/base.h" | |||
#include "common/helper/model_helper.h" | |||
#include <securec.h> | |||
#include <sys/sysinfo.h> | |||
#include <fstream> | |||
@@ -61,7 +61,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||
// read data as a block: | |||
(void)fs.read(data, len); | |||
ModelHelper model_helper; | |||
model_helper.GetBaseNameFromFileName(model_path, model_data.om_name); | |||
// Set the model data parameter | |||
model_data.model_data = data; | |||
model_data.model_len = len; | |||
@@ -16,15 +16,12 @@ | |||
#include "common/profiling/profiling_manager.h" | |||
#include <nlohmann/json.hpp> | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/debug/log.h" | |||
#include "framework/common/string_util.h" | |||
#include "graph/ge_context.h" | |||
#include "runtime/base.h" | |||
using Json = nlohmann::json; | |||
namespace { | |||
const char *const kJobID = "jobID"; | |||
const char *const kDeviceID = "deviceID"; | |||
@@ -35,6 +32,7 @@ const char *const kEvents = "events"; | |||
const char *const kAiCoreEvents = "ai_core_events"; | |||
const char *const kName = "name"; | |||
const char *const kTraceID = "traceId"; | |||
const char *const kProfDir = "resultPath"; | |||
const size_t kReportMaxLen = 2048; | |||
} // namespace | |||
@@ -100,6 +98,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
Json start_prof_conf = Json::parse(config); | |||
Json &prof_conf = start_prof_conf[kStartCfg][0]; | |||
job_id_ = prof_conf[kJobID]; | |||
auto iter = prof_conf.find(kProfDir); | |||
if (iter != prof_conf.end()) { | |||
prof_dir_ = prof_conf[kProfDir]; | |||
} | |||
Json &device_id = prof_conf[kDeviceID]; | |||
if (device_id.size() != 0) { | |||
vector<int32_t>().swap(device_id_); | |||
@@ -126,23 +128,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
} | |||
} | |||
GELOGI("Profiling json config from acl:%s", config.c_str()); | |||
Json &features = prof_conf[kFeatures]; | |||
if (ParseFeaturesFromAclCfg(features) != SUCCESS) { | |||
GELOGE(FAILED, "Parse feature from acl cfg failed."); | |||
return FAILED; | |||
} | |||
is_profiling_ = true; | |||
} catch (...) { | |||
GELOGE(FAILED, "Json conf is not invalid !"); | |||
return ge::PARAM_INVALID; | |||
} | |||
#endif | |||
return ge::SUCCESS; | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( | |||
const Json &features) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
try { | |||
for (size_t i = 0; i < features.size(); ++i) { | |||
Json &feature = features[i]; | |||
const Json &feature = features[i]; | |||
if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) { | |||
continue; | |||
} | |||
const std::string &name = feature[kName]; | |||
if (name == "op_trace") { | |||
GELOGI("Op trace config from acl"); | |||
Json &conf = feature[kConf]; | |||
Json &events = conf[0][kEvents]; | |||
const Json &conf = feature[kConf]; | |||
const Json &events = conf[0][kEvents]; | |||
const std::string &ai_core_events = events[0][kAiCoreEvents]; | |||
GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str()); | |||
is_op_trace_ = true; | |||
// op trace get conf | |||
ProfMgrConf prof_mgr_conf; | |||
int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf); | |||
if (result != 0) { | |||
@@ -154,10 +169,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_); | |||
} else if (name == "task_trace") { | |||
is_op_trace_ = false; | |||
if (feature.find(kConf) != feature.end()) { | |||
const Json &conf = feature[kConf]; | |||
std::stringstream task_trace_conf; | |||
task_trace_conf << conf; | |||
task_trace_conf_ = task_trace_conf.str(); | |||
} | |||
GELOGI("Task trace config from acl"); | |||
} else if (name == "system_trace") { | |||
is_op_trace_ = false; | |||
Json &conf = feature[kConf]; | |||
const Json &conf = feature[kConf]; | |||
std::stringstream system_trace_conf; | |||
system_trace_conf << conf; | |||
system_trace_conf_ = system_trace_conf.str(); | |||
@@ -165,10 +186,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In | |||
} | |||
profiling_opts_.push_back(name); | |||
} | |||
is_profiling_ = true; | |||
} catch (...) { | |||
GELOGE(FAILED, "Json conf is not invalid !"); | |||
GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !"); | |||
return ge::PARAM_INVALID; | |||
} | |||
#endif | |||
@@ -235,6 +254,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||
p_device[kDeviceID] = std::to_string(device_id); | |||
p_device[kJobID] = job_id_; | |||
p_device[kTraceID] = std::to_string(GetContext().TraceId()); | |||
if (!prof_dir_.empty()) { | |||
p_device[kProfDir] = prof_dir_; | |||
GELOGI("Prof dir: %s.", prof_dir_.c_str()); | |||
} | |||
Json features; | |||
if (is_op_trace_) { | |||
@@ -258,6 +281,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||
Json f; | |||
if (profiling_opts_[i] == "system_trace") { | |||
f[kConf] = nlohmann::json::parse(system_trace_conf_); | |||
} else if (profiling_opts_[i] == "task_trace") { | |||
if (!task_trace_conf_.empty()) { | |||
f[kConf] = nlohmann::json::parse(task_trace_conf_); | |||
} | |||
} | |||
f[kName] = profiling_opts_[i]; | |||
features[i] = f; | |||
@@ -292,6 +319,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St | |||
GELOGW("ProfMgrStartUp failed."); | |||
return FAILED; | |||
} | |||
GELOGD("StartProfiling, prof_handle: %p", prof_handle); | |||
prof_handle_vec_.push_back(prof_handle); | |||
} | |||
#endif | |||
@@ -314,8 +342,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf | |||
for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { | |||
int result = ProfMgrStop(prof_handle_vec_[i]); | |||
if (result != 0) { | |||
GELOGW("ProfMgr stop return fail:%d.", result); | |||
return; | |||
GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]); | |||
} | |||
} | |||
vector<void *>().swap(prof_handle_vec_); | |||
@@ -17,6 +17,7 @@ | |||
#ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | |||
#define GE_COMMON_PROFILING_PROFILING_MANAGER_H_ | |||
#include <nlohmann/json.hpp> | |||
#include <map> | |||
#include <string> | |||
#include <vector> | |||
@@ -30,6 +31,7 @@ | |||
using std::map; | |||
using std::string; | |||
using std::vector; | |||
using Json = nlohmann::json; | |||
namespace ge { | |||
const std::string GE_PROFILING_MODULE = "Framework"; | |||
@@ -84,11 +86,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
void PluginUnInit(const std::string &module) const; | |||
private: | |||
ge::Status ParseFeaturesFromAclCfg(const Json &feature); | |||
bool is_profiling_ = false; | |||
bool is_op_trace_ = false; | |||
bool is_load_ = false; | |||
int32_t op_trace_iter_num_ = 0; | |||
string job_id_; | |||
string prof_dir_; | |||
vector<int32_t> device_id_; | |||
vector<string> op_trace_conf_; | |||
vector<string> profiling_opts_; | |||
@@ -96,6 +100,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||
string recv_profiling_config_; | |||
string send_profiling_config_; | |||
string system_trace_conf_; | |||
string task_trace_conf_; | |||
const ProfilingEngineImpl engine_; | |||
}; | |||
} // namespace ge | |||
@@ -208,6 +208,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> Propertie | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayerNeedDump(const std::string &model, | |||
const std::string &om_name, | |||
const std::string &op_name) { | |||
std::lock_guard<std::mutex> lock(dump_mutex_); | |||
// if dump all | |||
@@ -216,9 +217,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayer | |||
} | |||
// if this model need dump | |||
auto model_iter = model_dump_properties_map_.find(model); | |||
if (model_iter != model_dump_properties_map_.end()) { | |||
auto om_name_iter = model_dump_properties_map_.find(om_name); | |||
auto model_name_iter = model_dump_properties_map_.find(model); | |||
if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) { | |||
// if no dump layer info, dump all layer in this model | |||
auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter; | |||
if (model_iter->second.empty()) { | |||
return true; | |||
} | |||
@@ -84,7 +84,7 @@ class PropertiesManager { | |||
void AddDumpPropertyValue(const std::string &model, const std::set<std::string> &layers); | |||
std::set<std::string> GetAllDumpModel(); | |||
std::set<std::string> GetDumpPropertyValue(const std::string &model); | |||
bool IsLayerNeedDump(const std::string &model, const std::string &op_name); | |||
bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name); | |||
void DeleteDumpPropertyValue(const std::string &model); | |||
void ClearDumpPropertyValue(); | |||
bool QueryModelDumpStatus(const std::string &model); | |||
@@ -452,7 +452,7 @@ Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData | |||
// Get input and output descriptor | |||
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||
std::vector<ge::TensorDesc> &output_desc) { | |||
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | |||
GELOGI("get model desc info begin."); | |||
if (!isInit_) { | |||
GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
@@ -464,8 +464,8 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||
std::vector<uint32_t> input_formats; | |||
std::vector<uint32_t> output_formats; | |||
Status ret = | |||
GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats); | |||
Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, | |||
output_formats, new_model_desc); | |||
if (ret != domi::SUCCESS) { | |||
GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); | |||
return TransferDomiErrorCode(ret); | |||
@@ -641,7 +641,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||
model_data.model_data = nullptr; | |||
} | |||
} | |||
return ret; | |||
} | |||
@@ -1,5 +1,5 @@ | |||
LOCAL_PATH := $(call my-dir) | |||
include $(LOCAL_PATH)/stub/Makefile | |||
COMMON_LOCAL_SRC_FILES := \ | |||
proto/fusion_model.proto \ | |||
proto/optimizer_priority.proto \ | |||
@@ -355,6 +355,28 @@ LOCAL_LDFLAGS := -lrt -ldl | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
#compiler for host infer | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := stub/libge_compiler | |||
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 | |||
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP | |||
ifeq ($(DEBUG), 1) | |||
LOCAL_CFLAGS += -g -O0 | |||
endif | |||
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) | |||
LOCAL_SRC_FILES := ../../out/atc/lib64/stub/ge_ir_build.cc | |||
LOCAL_SHARED_LIBRARIES := | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
#compiler for device | |||
include $(CLEAR_VARS) | |||
@@ -131,6 +131,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_ke | |||
GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret); | |||
return FAILED; | |||
} | |||
op.BreakConnect(); | |||
return SUCCESS; | |||
} | |||
@@ -20,6 +20,7 @@ | |||
#include "common/helper/model_helper.h" | |||
#include "common/helper/om_file_helper.h" | |||
#include "common/util.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "ge/ge_api.h" | |||
#include "graph/ge_context.h" | |||
@@ -125,17 +126,7 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen | |||
if (data_op == nullptr) { | |||
return FAILED; | |||
} | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); | |||
auto input_desc = op_desc->MutableInputDesc(index); | |||
GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID); | |||
ge::Format old_format = input_desc->GetFormat(); | |||
if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { | |||
input_desc->SetFormat(FORMAT_ND); | |||
input_desc->SetOriginFormat(FORMAT_ND); | |||
(void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format)); | |||
(void)AttrUtils::SetBool(data_op, "_is_single_op", true); | |||
} | |||
(void)AttrUtils::SetBool(data_op, "_is_single_op", true); | |||
GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); | |||
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); | |||
@@ -157,17 +148,7 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons | |||
if (op_desc == nullptr) { | |||
return FAILED; | |||
} | |||
auto single_op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID); | |||
auto output_desc = single_op_desc->MutableOutputDesc(0); | |||
GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID); | |||
ge::Format old_format = output_desc->GetFormat(); | |||
if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { | |||
output_desc->SetFormat(FORMAT_ND); | |||
output_desc->SetOriginFormat(FORMAT_ND); | |||
(void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format)); | |||
(void)AttrUtils::SetBool(op_desc, "_is_single_op", true); | |||
} | |||
(void)AttrUtils::SetBool(op_desc, "_is_single_op", true); | |||
int32_t count = 0; | |||
for (const auto &out_desc : outputs) { | |||
GeTensorDesc tensor = out_desc.GetTensorDesc(); | |||
@@ -212,19 +193,6 @@ static void GetOpsProtoPath(string &opsproto_path) { | |||
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); | |||
} | |||
static string GetModelNameFromFileName(const string &file_name_prefix) { | |||
int start_position = 0; | |||
// using output as model_name (ignore ".om") | |||
int filename_suffixes = 3; | |||
if (file_name_prefix.find_last_of('/') != string::npos) { | |||
start_position += 1; | |||
} | |||
int end_position = file_name_prefix.length() - filename_suffixes; | |||
string model_name = file_name_prefix.substr(start_position, end_position - start_position); | |||
GELOGI("Get model_name from file, model_name:%s", model_name.c_str()); | |||
return model_name; | |||
} | |||
class GeGenerator::Impl { | |||
public: | |||
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); | |||
@@ -332,8 +300,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
GraphId graph_id; | |||
GeRootModelPtr ge_root_model = nullptr; | |||
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); | |||
const string model_name = GetModelNameFromFileName(file_name_prefix); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!"); | |||
impl_->is_offline_ = is_offline; | |||
Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); | |||
if (ret != SUCCESS) { | |||
@@ -345,9 +311,15 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
} | |||
GE_CHECK_NOTNULL(ge_root_model); | |||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | |||
ModelHelper model_helper; | |||
string model_name = ""; | |||
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name); | |||
if (name_ret != SUCCESS) { | |||
GELOGE(FAILED, "Get model_name failed. Param --output is invalid"); | |||
return PARAM_INVALID; | |||
} | |||
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null"); | |||
ge_model->SetName(model_name); | |||
ret = impl_->SaveModel(file_name_prefix, ge_model, model); | |||
@@ -38,6 +38,7 @@ | |||
namespace { | |||
const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag"; | |||
const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op"; | |||
const char *const kOpNoReuseMem = "no_reuse_mem_flag"; | |||
const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory"; | |||
const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM"; | |||
const int kReuseMaxCount = 10; | |||
@@ -624,8 +625,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||
(void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env); | |||
if (ge_disable_reuse_mem_env != "1") { | |||
bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); | |||
is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && reuse_mem_flag && is_op_reuse_mem && | |||
(IsPreReuse(n, out_index)); | |||
is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && | |||
reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); | |||
auto stream_id = node_op_desc->GetStreamId(); | |||
auto map_iter = reusable_streams_map_.find(stream_id); | |||
if (is_reuse_memory && map_iter != reusable_streams_map_.end()) { | |||
@@ -1182,6 +1183,9 @@ void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks, | |||
GELOGI("Block continuous input index:%d", memory_block->input_index_); | |||
count++; | |||
if (count == 1) { | |||
memory_block->first_continuous_block_ = true; | |||
} | |||
if (count == continuous_blocks.size()) { | |||
memory_block->last_continuous_block_ = true; | |||
} | |||
@@ -1242,6 +1246,10 @@ void BlockMemAssigner::ResizeMemoryBlocks() { | |||
if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { | |||
continue; | |||
} | |||
if (memory_block->first_continuous_block_) { | |||
mem_offset_ += MEM_ALIGN_SIZE; | |||
} | |||
memory_block->Resize(); | |||
memory_block->SetHeadOffset(mem_offset_); | |||
mem_offset_ += memory_block->Size(); | |||
@@ -64,6 +64,7 @@ class MemoryBlock { | |||
reuse_mem_(reuse_mem), | |||
input_index_(0), | |||
continuous_block_(false), | |||
first_continuous_block_(false), | |||
last_continuous_block_(false), | |||
is_zero_copy_(false), | |||
block_size_(block_size), | |||
@@ -129,6 +130,7 @@ class MemoryBlock { | |||
bool reuse_mem_; | |||
uint32_t input_index_; | |||
bool continuous_block_; | |||
bool first_continuous_block_; | |||
bool last_continuous_block_; | |||
bool is_zero_copy_; | |||
std::map<int64_t, size_t> depend_stream_life_; | |||
@@ -446,6 +446,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||
return ge::FAILED; | |||
} | |||
memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; | |||
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { | |||
output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_; | |||
size_t pre_mem_offset = memory_offset_[0].mem_offset_; | |||
@@ -450,11 +450,13 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp | |||
Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc, | |||
std::vector<uint32_t> &input_formats, std::vector<uint32_t> &out_formats) { | |||
std::vector<uint32_t> &input_formats, std::vector<uint32_t> &out_formats, | |||
bool new_model_desc) { | |||
try { | |||
auto model_manager = ge::ModelManager::GetInstance(); | |||
GE_CHECK_NOTNULL(model_manager); | |||
Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats); | |||
Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats, | |||
new_model_desc); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "GetInputOutputDescInfo failed."); | |||
CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||
@@ -71,7 +71,7 @@ class GraphExecutor { | |||
static Status GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats, | |||
std::vector<uint32_t> &output_formats); | |||
std::vector<uint32_t> &output_formats, bool new_model_desc = false); | |||
static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); | |||
@@ -21,6 +21,7 @@ | |||
#include <utility> | |||
#include <vector> | |||
#include "common/debug/log.h" | |||
#include "common/properties_manager.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/util.h" | |||
@@ -28,6 +29,7 @@ | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/load/new_model_manager/model_utils.h" | |||
#include "graph/utils/attr_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
#include "proto/ge_ir.pb.h" | |||
#include "proto/op_mapping_info.pb.h" | |||
#include "runtime/mem.h" | |||
@@ -106,6 +108,7 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_ | |||
} | |||
void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | |||
GELOGI("Start to save data %s message", node->GetName().c_str()); | |||
if (node != nullptr) { | |||
auto input_op_desc = node->GetOpDesc(); | |||
if (input_op_desc == nullptr) { | |||
@@ -126,6 +129,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | |||
{op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); | |||
} | |||
} | |||
GELOGI("Save data message successfully"); | |||
} | |||
} | |||
@@ -159,30 +163,39 @@ void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::s | |||
return; | |||
} | |||
GELOGI("Save input dump task %s, id: %u.", data_op->GetName().c_str(), task_id); | |||
int64_t data_size = 0; | |||
if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) { | |||
GELOGI("Get aipp data size according to attr is %ld", data_size); | |||
} else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Get input size filed"); | |||
return; | |||
} | |||
GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id, | |||
stream_id, data_size); | |||
op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index, | |||
inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims()}); | |||
inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size}); | |||
} | |||
} | |||
static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond, | |||
aicpu::dump::OpMappingInfo &op_mapping_info) { | |||
if (step_id != 0) { | |||
GELOGI("step_id exist."); | |||
GELOGI("step_id exists."); | |||
op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id)); | |||
} else { | |||
GELOGI("step_id is null."); | |||
} | |||
if (loop_per_iter != 0) { | |||
GELOGI("loop_per_iter exist."); | |||
GELOGI("loop_per_iter exists."); | |||
op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter)); | |||
} else { | |||
GELOGI("loop_per_iter is null."); | |||
} | |||
if (loop_cond != 0) { | |||
GELOGI("loop_cond exist."); | |||
GELOGI("loop_cond exists."); | |||
op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond)); | |||
} else { | |||
GELOGI("loop_cond is null."); | |||
@@ -211,10 +224,19 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||
output.mutable_shape()->add_dim(dim); | |||
} | |||
int64_t output_size = 0; | |||
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Get output size filed"); | |||
return PARAM_INVALID; | |||
} | |||
GELOGI("Get output size in dump is %ld", output_size); | |||
std::string origin_name; | |||
int32_t origin_output_index = -1; | |||
(void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | |||
(void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | |||
GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size); | |||
return PARAM_INVALID) | |||
output.set_size(output_size); | |||
output.set_original_name(origin_name); | |||
output.set_original_output_index(origin_output_index); | |||
output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat())); | |||
@@ -247,6 +269,10 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump: | |||
int32_t origin_output_index = -1; | |||
(void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name); | |||
(void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index); | |||
GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0, | |||
GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size); | |||
return PARAM_INVALID) | |||
output.set_size(inner_dump_info.data_size); | |||
output.set_original_name(origin_name); | |||
output.set_original_output_index(origin_output_index); | |||
output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat())); | |||
@@ -283,6 +309,17 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump:: | |||
input.mutable_shape()->add_dim(dim); | |||
} | |||
int64_t input_size = 0; | |||
if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | |||
GELOGI("Get aipp input size according to attr is %ld", input_size); | |||
} else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Get input size filed"); | |||
return PARAM_INVALID; | |||
} | |||
GELOGI("Get input size in dump is %ld", input_size); | |||
GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size); | |||
return PARAM_INVALID;) | |||
input.set_size(input_size); | |||
input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i)); | |||
task.mutable_input()->Add(std::move(input)); | |||
} | |||
@@ -323,7 +360,7 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in | |||
} | |||
load_flag_ = true; | |||
GELOGI("LoadDumpInfo success, proto size: %zu.", proto_size); | |||
GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size); | |||
return SUCCESS; | |||
} | |||
@@ -360,11 +397,12 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_ | |||
return RT_FAILED; | |||
} | |||
load_flag_ = false; | |||
GELOGI("UnloadDumpInfo success, proto size: %zu.", proto_size); | |||
GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size); | |||
return SUCCESS; | |||
} | |||
Status DataDumper::LoadDumpInfo() { | |||
PrintCheckLog(); | |||
std::string dump_list_key; | |||
PrintCheckLog(dump_list_key); | |||
if (op_list_.empty()) { | |||
return SUCCESS; | |||
@@ -374,12 +412,13 @@ Status DataDumper::LoadDumpInfo() { | |||
auto dump_path = PropertiesManager::Instance().GetDumpOutputPath(); | |||
op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/"); | |||
op_mapping_info.set_model_name(model_name_); | |||
op_mapping_info.set_model_name(dump_list_key); | |||
op_mapping_info.set_model_id(model_id_); | |||
op_mapping_info.set_flag(kAicpuLoadFlag); | |||
op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep()); | |||
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
GELOGD("Dump step in load dump info is %s", PropertiesManager::Instance().GetDumpStep().c_str()); | |||
GELOGI("Dump step is %s and dump path is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(), | |||
dump_path.c_str()); | |||
for (const auto &op_iter : op_list_) { | |||
aicpu::dump::Task task; | |||
@@ -441,7 +480,7 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, | |||
if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput || | |||
PropertiesManager::Instance().GetDumpMode() == kDumpInput || | |||
PropertiesManager::Instance().GetDumpMode() == kDumpAll) { | |||
GELOGI("add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||
GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); | |||
aicpu::dump::Task task; | |||
task.set_end_graph(true); | |||
task.set_task_id(end_graph_task_id_); | |||
@@ -477,7 +516,7 @@ Status DataDumper::UnloadDumpInfo() { | |||
return SUCCESS; | |||
} | |||
void DataDumper::PrintCheckLog() { | |||
void DataDumper::PrintCheckLog(string &dump_list_key) { | |||
std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel(); | |||
if (model_list.empty()) { | |||
GELOGI("No model need dump."); | |||
@@ -485,19 +524,21 @@ void DataDumper::PrintCheckLog() { | |||
} | |||
GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str()); | |||
if (model_list.find(ge::DUMP_ALL_MODEL) == model_list.end()) { | |||
if (model_list.find(model_name_) == model_list.end()) { | |||
bool not_find_by_omname = model_list.find(om_name_) == model_list.end(); | |||
bool not_find_by_modelname = model_list.find(model_name_) == model_list.end(); | |||
if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) { | |||
if (not_find_by_omname && not_find_by_modelname) { | |||
std::string model_list_str; | |||
for (auto &model : model_list) { | |||
model_list_str += "[" + model + "]."; | |||
} | |||
GELOGW("Model %s not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str()); | |||
GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str()); | |||
return; | |||
} | |||
} | |||
std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(model_name_); | |||
dump_list_key = not_find_by_omname ? model_name_ : om_name_; | |||
std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key); | |||
std::set<std::string> dump_op_list; | |||
for (auto &inner_dump_info : op_list_) { | |||
// oplist value OpDescPtr is not nullptr | |||
@@ -506,7 +547,7 @@ void DataDumper::PrintCheckLog() { | |||
for (auto &dump_op : config_dump_op_list) { | |||
if (dump_op_list.find(dump_op) == dump_op_list.end()) { | |||
GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), model_name_.c_str()); | |||
GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str()); | |||
} | |||
} | |||
} | |||
@@ -64,6 +64,8 @@ class DataDumper { | |||
void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args); | |||
void SaveEndGraphId(uint32_t task_id, uint32_t stream_id); | |||
void SetOmName(const std::string &om_name) { om_name_ = om_name; } | |||
Status LoadDumpInfo(); | |||
Status UnloadDumpInfo(); | |||
@@ -71,9 +73,13 @@ class DataDumper { | |||
private: | |||
void ReleaseDevMem(void **ptr) noexcept; | |||
void PrintCheckLog(); | |||
void PrintCheckLog(string &dump_list_key); | |||
std::string model_name_; | |||
// for inference data dump | |||
std::string om_name_; | |||
uint32_t model_id_; | |||
RuntimeParam runtime_param_; | |||
void *dev_mem_load_; | |||
@@ -107,6 +113,7 @@ struct DataDumper::InnerDumpInfo { | |||
int input_anchor_index; | |||
int output_anchor_index; | |||
std::vector<int64_t> dims; | |||
int64_t data_size; | |||
}; | |||
struct DataDumper::InnerInputMapping { | |||
@@ -78,7 +78,7 @@ namespace { | |||
const uint32_t kDataIndex = 0; | |||
const uint32_t kOutputNum = 1; | |||
const uint32_t kTrueBranchStreamNum = 1; | |||
const uint32_t kThreadNum = 16; | |||
const uint32_t kThreadNum = 1; | |||
const uint32_t kAddrLen = sizeof(void *); | |||
const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; | |||
const int kDecimal = 10; | |||
@@ -94,42 +94,9 @@ inline bool IsCallDumpInputOp(const OpDescPtr &op_desc) { | |||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, skip_task_generate); | |||
return skip_task_generate; | |||
} | |||
void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { | |||
uint32_t n, c, h, w; | |||
n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; | |||
c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; | |||
h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; | |||
w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; | |||
if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||
if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||
input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n); | |||
input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h); | |||
input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w); | |||
input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c); | |||
} | |||
for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) { | |||
input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k)); | |||
} | |||
} else { | |||
vector<int64_t> origin_input_dims; | |||
(void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||
if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||
input.shape_info.num = origin_input_dims[n]; | |||
input.shape_info.height = origin_input_dims[h]; | |||
input.shape_info.width = origin_input_dims[w]; | |||
input.shape_info.channel = origin_input_dims[c]; | |||
} | |||
for (size_t k = 0; k < origin_input_dims.size(); ++k) { | |||
input.shape_info.dims.push_back(origin_input_dims[k]); | |||
} | |||
} | |||
} | |||
} // namespace | |||
std::mutex DavinciModel::tvm_bin_mutex_; | |||
std::set<std::string> DavinciModel::tvm_bin_kernel_; | |||
DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener) | |||
: weights_mem_base_(nullptr), | |||
@@ -536,7 +503,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
compute_graph_ = GraphUtils::GetComputeGraph(graph); | |||
GE_CHK_BOOL_RET_STATUS(compute_graph_ != nullptr, INTERNAL_ERROR, "Get compute graph is nullptr."); | |||
runtime_param_.graph_id = GetGraphID(compute_graph_->GetName()); | |||
runtime_param_.graph_id = compute_graph_->GetGraphID(); | |||
GE_TIMESTAMP_START(TransAllVarData); | |||
GE_CHK_STATUS_RET(TransAllVarData(compute_graph_, runtime_param_.graph_id), "TransAllVarData failed."); | |||
@@ -1447,6 +1414,55 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf | |||
return SUCCESS; | |||
} | |||
void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) { | |||
uint32_t n, c, h, w; | |||
n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N; | |||
c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C; | |||
h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H; | |||
w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W; | |||
if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) { | |||
// When static aipp is set, need to get the model input dims which processed by aipp | |||
vector<int64_t> model_input_dims; | |||
(void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims); | |||
if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||
input.shape_info.num = model_input_dims[n]; | |||
input.shape_info.height = model_input_dims[h]; | |||
input.shape_info.width = model_input_dims[w]; | |||
input.shape_info.channel = model_input_dims[c]; | |||
} | |||
for (size_t k = 0; k < model_input_dims.size(); ++k) { | |||
input.shape_info.dims.push_back(model_input_dims[k]); | |||
} | |||
is_new_model_desc_ = false; | |||
return; | |||
} | |||
if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||
if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||
input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n); | |||
input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h); | |||
input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w); | |||
input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c); | |||
} | |||
for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) { | |||
input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k)); | |||
} | |||
} else { | |||
vector<int64_t> origin_input_dims; | |||
(void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||
if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) { | |||
input.shape_info.num = origin_input_dims[n]; | |||
input.shape_info.height = origin_input_dims[h]; | |||
input.shape_info.width = origin_input_dims[w]; | |||
input.shape_info.channel = origin_input_dims[c]; | |||
} | |||
for (size_t k = 0; k < origin_input_dims.size(); ++k) { | |||
input.shape_info.dims.push_back(origin_input_dims[k]); | |||
} | |||
} | |||
} | |||
Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) { | |||
for (size_t index = 0; index < data_op_list_.size(); ++index) { | |||
InputOutputDescInfo input; | |||
@@ -1455,6 +1471,7 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s | |||
Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat(); | |||
CreateInputDimsInfo(data_op_list_[index], format, input); | |||
input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType(); | |||
input.name = data_op_list_[index]->GetName(); | |||
int64_t input_size = 0; | |||
@@ -1535,7 +1552,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, | |||
"construct output_name failed."); | |||
// forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||
if (out_size == out_node_name.size()) { | |||
output_name = out_node_name[index] + ":" + std::to_string(src_index[index]); | |||
// neweast plan, the index will add to name during generate model. | |||
bool contains_colon = out_node_name[index].find(":") != std::string::npos; | |||
output_name = | |||
contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); | |||
} else { | |||
output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + | |||
std::to_string(src_index[index]); | |||
@@ -1966,6 +1986,10 @@ Status DavinciModel::CopyOutputDataToUser(OpDescPtr &op_desc, std::vector<DataBu | |||
"Model output data size(%u) does not match required size(%u).", v_output_size[i], | |||
data_buf.length); | |||
if (copy_only_addrs_.count(v_output_data_addr[i]) == 0) { | |||
GELOGI("[ZCPY] This addr[%p] has already feed by zero copy.", v_output_data_addr[i]); | |||
continue; // Skip: Feed by zero copy. | |||
} | |||
GELOGI( | |||
"CopyOutputDataToUser memcpy graph_%u type[F] name[%s] output[%lu] dst[%p] src[%p] mem_size[%u] datasize[%u]", | |||
runtime_param_.graph_id, op_desc->GetName().c_str(), i, data_buf.data, v_output_data_addr[i], data_buf.length, | |||
@@ -2510,51 +2534,19 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||
} | |||
Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | |||
GELOGI("InitTaskInfo in,task size %zu", model_task_def.task().size()); | |||
GELOGI("InitTaskInfo in,task size %d", model_task_def.task().size()); | |||
task_list_.resize(model_task_def.task_size()); | |||
std::vector<std::future<Status>> futures(model_task_def.task_size()); | |||
ThreadPool executor(kThreadNum); | |||
rtContext_t ctx = nullptr; | |||
rtError_t rt_ret = rtCtxGetCurrent(&ctx); | |||
if (rt_ret != RT_ERROR_NONE || ctx == nullptr) { | |||
GELOGE(RT_FAILED, "Failed to get current context from rt, error-code 0x%X.", rt_ret); | |||
return RT_FAILED; | |||
} | |||
for (int32_t i = 0; i < model_task_def.task_size(); ++i) { | |||
std::future<Status> f = executor.commit( | |||
[](const domi::TaskDef &task, DavinciModel *model, rtContext_t ctx, int32_t idx) -> Status { | |||
rtError_t rt_ret = rtCtxSetCurrent(ctx); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Failed to set context from rt, error-code 0x%X.", rt_ret); | |||
return RT_FAILED; | |||
} | |||
Status ret = FAILED; | |||
// dynamic shape will create task_list_ before | |||
if (model->task_list_[idx] == nullptr) { | |||
model->task_list_[idx] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type())); | |||
GE_CHECK_NOTNULL(model->task_list_[idx]); | |||
} | |||
ret = model->task_list_[idx]->Init(task, model); | |||
return ret; | |||
}, | |||
model_task_def.task(i), this, ctx, i); | |||
if (!f.valid()) { | |||
GELOGE(FAILED, "Future is invalid"); | |||
return FAILED; | |||
} | |||
futures[i] = std::move(f); | |||
} | |||
Status ret; | |||
for (size_t i = 0; i < futures.size(); ++i) { | |||
ret = futures[i].get(); | |||
for (int i = 0; i < model_task_def.task_size(); ++i) { | |||
// dynamic shape will create task_list_ before | |||
const domi::TaskDef &task = model_task_def.task(i); | |||
task_list_[i] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type())); | |||
GE_CHECK_NOTNULL(task_list_[i]); | |||
Status ret = task_list_[i]->Init(task, this); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Task index %zu init failed.", i); | |||
GELOGE(ret, "Task index %d init failed.", i); | |||
return ret; | |||
} | |||
} | |||
GELOGI("InitTaskInfo out"); | |||
return SUCCESS; | |||
} | |||
@@ -2623,7 +2615,7 @@ Status DavinciModel::DistributeTask() { | |||
return PARAM_INVALID; | |||
} | |||
if (PropertiesManager::Instance().IsLayerNeedDump(name_, op->GetName())) { | |||
if (PropertiesManager::Instance().IsLayerNeedDump(name_, om_name_, op->GetName())) { | |||
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); | |||
} | |||
} | |||
@@ -2661,8 +2653,9 @@ Status DavinciModel::DistributeTask() { | |||
void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||
auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); | |||
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||
all_dump_model.find(name_) != all_dump_model.end()) { | |||
bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); | |||
bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); | |||
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { | |||
GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id); | |||
data_dumper_.SaveEndGraphId(task_id, stream_id); | |||
} | |||
@@ -2696,7 +2689,7 @@ void DavinciModel::SetOutputOutsideAddr(const std::vector<void *> &outside_addrs | |||
if (output_outside_addrs_.find(addr) != output_outside_addrs_.end()) { | |||
continue; | |||
} | |||
DisableZeroCopy(addr); // Data to NetOutput directly. | |||
(void)output_outside_addrs_.emplace(std::pair<const void *, std::vector<void *>>(addr, {})); | |||
GELOGI("SetOutputOutsideAddr success."); | |||
} | |||
@@ -2902,11 +2895,15 @@ Status DavinciModel::UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>> | |||
} | |||
// For input data, just copy for rts task. | |||
if (is_input && copy_only_addrs_.count(addr) > 0) { | |||
if (rtMemcpy(addr, size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Non-zero copy data node copy failed"); | |||
return FAILED; | |||
if (copy_only_addrs_.count(addr) > 0) { | |||
if (is_input) { | |||
GELOGI("[IMAS] Find addr %p need direct copy from user malloc input %p.", addr, buffer.data); | |||
if (rtMemcpy(addr, size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Non-zero copy data node copy failed"); | |||
return FAILED; | |||
} | |||
} | |||
GELOGI("No need to exeucte zero copy task because this addr %p need direct copy.", addr); | |||
continue; | |||
} | |||
@@ -2953,7 +2950,6 @@ const char *DavinciModel::GetRegisterStub(const string &binfile, const string &s | |||
} else { | |||
binfile_key = session_graph_id + "_" + binfile; | |||
} | |||
std::lock_guard<std::mutex> lock(tvm_bin_mutex_); | |||
auto it = tvm_bin_kernel_.find(binfile_key); | |||
if (it != tvm_bin_kernel_.end()) { | |||
return it->c_str(); | |||
@@ -3089,7 +3085,6 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) { | |||
// Online mode FE may call rtFunctionRegister. | |||
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||
// Need protection of tvm_bin_mutex_. | |||
auto it = used_tbe_handle_map_.find(handle_key); | |||
if (it != used_tbe_handle_map_.end()) { | |||
// GE registered, increase reference. | |||
@@ -3109,9 +3104,9 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) { | |||
void DavinciModel::CleanTbeHandle() { | |||
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | |||
std::lock_guard<std::mutex> lock(tvm_bin_mutex_); | |||
kernel_store.EraseTBEHandle(used_tbe_handle_map_); | |||
used_tbe_handle_map_.clear(); | |||
tvm_bin_kernel_.clear(); | |||
} | |||
/// | |||
@@ -3246,15 +3241,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
bool is_dynamic_batch = input_data.is_dynamic_batch; | |||
InitZeroCopyUtil(is_dynamic_batch, input_use_zero_copy, output_use_zero_copy); | |||
// Empty task, Just copy input to output, need direct copy. | |||
if (task_list_.empty() && (input_use_zero_copy || output_use_zero_copy)) { | |||
GELOGE(FAILED, "Empty task, Just copy input to output, need direct copy."); | |||
return FAILED; | |||
} | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START)); | |||
Status ret = | |||
input_use_zero_copy ? CopyModelData(input_data, output_data, is_dynamic_batch) : CopyInputData(input_data, true); | |||
Status ret = CopyModelData(input_data, output_data, is_dynamic_batch); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy input data to model failed."); | |||
GELOGI("current_data.index=%u", input_data.index); | |||
@@ -3271,7 +3259,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
if (!is_async_mode_) { | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START)); | |||
ret = output_use_zero_copy ? SyncDataAndDump() : CopyOutputData(input_data.index, output_data); | |||
ret = CopyOutputData(input_data.index, output_data); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy Output data to user failed."); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END)); | |||
} | |||
@@ -3344,17 +3332,6 @@ void DavinciModel::FreeWeightsMem() { | |||
} | |||
} | |||
uint32_t DavinciModel::GetGraphID(const std::string &session_graph_id) { | |||
std::string session_id = "_"; | |||
auto pos = session_graph_id.find(session_id); | |||
if (pos != std::string::npos) { | |||
size_t graph_id_length = session_graph_id.length() - pos - session_id.length(); | |||
std::string graph_id = session_graph_id.substr(pos + session_id.length(), graph_id_length); | |||
return static_cast<uint32_t>(std::strtol(graph_id.c_str(), nullptr, kDecimal)); | |||
} | |||
return 0; | |||
} | |||
Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) { | |||
GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id); | |||
rtContext_t ctx = nullptr; | |||
@@ -3387,6 +3364,7 @@ void DavinciModel::SetDataDumperArgs() { | |||
data_dumper_.SetModelName(name_); | |||
data_dumper_.SetModelId(model_id_); | |||
data_dumper_.SetMemory(runtime_param_); | |||
data_dumper_.SetOmName(om_name_); | |||
int32_t device_id = 0; | |||
rtError_t rt_ret = rtGetDevice(&device_id); | |||
@@ -187,6 +187,8 @@ class DavinciModel { | |||
// model name | |||
string Name() { return name_; } | |||
// om_name | |||
string OmName() { return om_name_; } | |||
// version | |||
uint32_t Version() const { return version_; } | |||
@@ -273,7 +275,7 @@ class DavinciModel { | |||
/// @brief For TVM Op, avoid Addr Reuse. | |||
/// @return void* | |||
/// | |||
static const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = ""); | |||
const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = ""); | |||
/// | |||
/// @ingroup ge | |||
@@ -471,6 +473,9 @@ class DavinciModel { | |||
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | |||
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | |||
std::vector<InputOutputDims> &output_dims); | |||
void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | |||
// om file name | |||
void SetOmName(string om_name) { om_name_ = om_name; } | |||
private: | |||
// memory address of weights | |||
@@ -560,6 +565,8 @@ class DavinciModel { | |||
Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); | |||
void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); | |||
Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | |||
Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | |||
@@ -752,8 +759,6 @@ class DavinciModel { | |||
void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||
uint32_t GetGraphID(const std::string &session_graph_id); | |||
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | |||
Status CopyVarData(ComputeGraphPtr &graph); | |||
@@ -771,6 +776,10 @@ class DavinciModel { | |||
uint32_t model_id_; | |||
uint32_t runtime_model_id_; | |||
string name_; | |||
// used for inference data dump | |||
string om_name_; | |||
uint32_t version_; | |||
GeModelPtr ge_model_; | |||
@@ -860,8 +869,8 @@ class DavinciModel { | |||
std::set<uint32_t> hcom_streams_; | |||
RuntimeParam runtime_param_; | |||
static std::mutex tvm_bin_mutex_; // lock for tvm maps. | |||
static std::set<std::string> tvm_bin_kernel_; | |||
static std::mutex tvm_bin_mutex_; | |||
std::set<std::string> tvm_bin_kernel_; | |||
std::map<std::string, uint32_t> used_tbe_handle_map_; | |||
@@ -884,6 +893,7 @@ class DavinciModel { | |||
std::map<const void *, void *> knonw_output_data_info_; | |||
vector<uint64_t> batch_size_; | |||
bool is_new_model_desc_{false}; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ |
@@ -325,6 +325,12 @@ Status ModelManager::DeleteModel(uint32_t id) { | |||
auto it = model_map_.find(id); | |||
auto hybrid_model_it = hybrid_model_map_.find(id); | |||
if (it != model_map_.end()) { | |||
uint64_t session_id = it->second->GetSessionId(); | |||
std::string model_key = std::to_string(session_id) + "_" + std::to_string(id); | |||
auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key); | |||
if (iter_aicpu_kernel != model_aicpu_kernel_.end()) { | |||
(void)model_aicpu_kernel_.erase(iter_aicpu_kernel); | |||
} | |||
(void)model_map_.erase(it); | |||
} else if (hybrid_model_it != hybrid_model_map_.end()) { | |||
(void)hybrid_model_map_.erase(hybrid_model_it); | |||
@@ -685,11 +691,14 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||
Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc, | |||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats) { | |||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, | |||
bool new_model_desc) { | |||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, | |||
"GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id); | |||
davinci_model->SetModelDescVersion(new_model_desc); | |||
return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); | |||
} | |||
@@ -820,6 +829,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
return FAILED; | |||
} | |||
davinci_model->SetDeviceId(device_id); | |||
davinci_model->SetOmName(model.om_name); | |||
/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. | |||
/// These session_ids come from the same model, so the values of session_id are the same. | |||
@@ -178,7 +178,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
ge::Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc, | |||
std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &inputFormats, | |||
std::vector<uint32_t> &outputFormats); | |||
std::vector<uint32_t> &outputFormats, bool new_model_desc = false); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Get dynamic batch_info | |||
@@ -47,7 +47,8 @@ Status EndGraphTaskInfo::Distribute() { | |||
GE_CHECK_NOTNULL(davinci_model_); | |||
auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel(); | |||
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || | |||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end()) { | |||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() || | |||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) { | |||
GELOGI("Start to call rtEndGraphEx"); | |||
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
@@ -153,7 +153,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); | |||
return FAILED;) | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
dump_args_ = input_output_addr_; | |||
} | |||
@@ -63,7 +63,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
return ret; | |||
} | |||
domi::KernelDef kernel_def = task_def.kernel(); | |||
const domi::KernelDef &kernel_def = task_def.kernel(); | |||
block_dim_ = kernel_def.block_dim(); | |||
args_size_ = kernel_def.args_size(); | |||
// get opcontext stored in model | |||
@@ -92,7 +92,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
string session_graph_model_id; | |||
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | |||
// get bin_file_key | |||
const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||
// new aicpu kernel(rtCpuKernelLaunch) no need to check function | |||
if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) { | |||
rtError_t rt_ret; | |||
@@ -494,7 +494,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
// When inferencing, stub_func_ is different from dynamic-registration to runtime, and needs to be modified. | |||
string session_graph_model_id; | |||
davinci_model_->GetUniqueId(op_desc, session_graph_model_id); | |||
const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc->GetName(), session_graph_model_id); | |||
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc->GetName(), session_graph_model_id); | |||
rtError_t rt_ret = rtQueryFunctionRegistered(const_cast<char *>(bin_file_key)); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
stub_func_ = const_cast<char *>(bin_file_key); | |||
@@ -549,7 +549,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne | |||
return FAILED; | |||
} | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
dump_args_ = static_cast<char *>(args_) + offset; | |||
} | |||
@@ -818,7 +819,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
return RT_FAILED; | |||
} | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) { | |||
if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | |||
op_desc->GetName())) { | |||
dump_flag_ = RT_KERNEL_DUMPFLAG; | |||
dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead); | |||
} | |||
@@ -396,8 +396,6 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge | |||
if (save_ret != SUCCESS) { | |||
GELOGW("Fail to save cache."); | |||
} | |||
// release rts generate context | |||
RtContextUtil::GetInstance().DestroyrtContexts(); | |||
GEEVENT("[GEPERFTRACE] GE PreRun End"); | |||
return SUCCESS; | |||
} | |||
@@ -420,6 +418,8 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: | |||
ret = IncreBuild(graph_node, ge_model); | |||
if (ret != SUCCESS) { | |||
ret = PreRun(graph_node, inputs, ge_root_model, session_id); | |||
// release rts generate context | |||
RtContextUtil::GetInstance().DestroyrtContexts(); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "PreRun Failed."); | |||
return ret; | |||
@@ -2165,6 +2165,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { | |||
GeModelPtr ge_model = nullptr; | |||
if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) { | |||
ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id); | |||
// release rts generate context | |||
RtContextUtil::GetInstance().DestroyrtContexts(); | |||
if (ret != SUCCESS) { | |||
graph_node->SetRunFlag(false); | |||
ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit.."); | |||
@@ -91,7 +91,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen | |||
std::string var_key = VarKey(var_name, tensor_desc); | |||
GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | |||
if (var_addr_mgr_map_.count(var_key) == 0) { | |||
uint64_t logic_address = VarManager::Instance(0)->GetVarMemLogicBase() + | |||
uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + | |||
reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | |||
TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | |||
@@ -105,9 +105,8 @@ void ge::GraphPartitioner::SetMergedGraphId(ge::ComputeGraphPtr &output_merged_c | |||
Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr &output_merged_compute_graph, | |||
const std::vector<SubGraphInfoPtr> &sub_graph_list) { | |||
ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>("mergedGraph"); | |||
output_merged_compute_graph = new_sub_graph; | |||
if ((new_sub_graph == nullptr) || (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) { | |||
if ((output_merged_compute_graph == nullptr) || | |||
(MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) { | |||
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MergeAllSubGraph failed."); | |||
return FAILED; | |||
} | |||
@@ -229,6 +228,9 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co | |||
return FAILED; | |||
} | |||
} | |||
ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>(original_compute_graph->GetName()); | |||
GE_CHECK_NOTNULL(new_sub_graph); | |||
output_merged_compute_graph = new_sub_graph; | |||
GE_TIMESTAMP_START(MergeGraphRemoveNode); | |||
if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) { | |||
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed"); | |||
@@ -70,6 +70,7 @@ OpDescPtr SameTransdataBreadthFusionPass::GetCastOp(const GeTensorDesc &in_desc, | |||
cast_op_name << "fusion_cast_" << fusion_cast_op_count++; | |||
auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); | |||
auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); | |||
node_op.BreakConnect(); | |||
if (cast_op == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "new fusion cast op failed!"); | |||
return nullptr; | |||
@@ -501,6 +501,7 @@ OpDescPtr TransOpWithoutReshapeFusionPass::GetCastOp(const GeTensorDesc &cast_in | |||
cast_op_name << "fusion_cast_op_" << fusion_cast_op_count++; | |||
auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST); | |||
auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op); | |||
node_op.BreakConnect(); | |||
if (cast_op == nullptr) { | |||
GELOGE(INTERNAL_ERROR, "new cast op failed!"); | |||
return nullptr; | |||
@@ -19,8 +19,6 @@ | |||
#include <set> | |||
#include <string> | |||
#include <utility> | |||
#include "common/formats/format_transfers/format_transfer_fractal_nz.h" | |||
#include "common/formats/format_transfers/format_transfer_fractal_z.h" | |||
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | |||
#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | |||
#include "common/formats/format_transfers/format_transfer_transpose.h" | |||
@@ -34,6 +32,7 @@ | |||
#include "graph/common/transop_util.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "graph/ge_context.h" | |||
#include "graph/shape_refiner.h" | |||
#include "graph/manager/graph_var_manager.h" | |||
#include "graph/manager/util/rt_context_util.h" | |||
#include "graph/optimize/graph_optimize.h" | |||
@@ -123,9 +122,6 @@ static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | |||
{"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; | |||
const char *const kMbatchSwitchnName = "mbatch-switch-name"; | |||
const int64_t kGemmNdShapeSize = 2; | |||
const int64_t kGemmAlignSize32 = 32; | |||
const int64_t kGemmAlignSize16 = 16; | |||
OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { | |||
GeTensorPtr tensor = MakeShared<GeTensor>(); | |||
@@ -1135,114 +1131,9 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No | |||
return SUCCESS; | |||
} | |||
Status ProcessGemmFractalZ(GeShape &src_shape, std::vector<int64_t> &dst_shape_vec) { | |||
dst_shape_vec.clear(); | |||
if (src_shape.GetDims().size() != kGemmNdShapeSize) { | |||
GELOGE(INTERNAL_ERROR, "gemm shape size must be 2"); | |||
return FAILED; | |||
} | |||
dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(0), kGemmAlignSize32)); | |||
dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(1), kGemmAlignSize16)); | |||
dst_shape_vec.push_back(kGemmAlignSize16); | |||
dst_shape_vec.push_back(kGemmAlignSize32); | |||
return SUCCESS; | |||
} | |||
Status SetInOutForGemm(GeTensorDescPtr &input, GeTensorDescPtr &output, GeShape shape, Format format) { | |||
input->SetShape(shape); | |||
input->SetFormat(format); | |||
output->SetShape(shape); | |||
output->SetFormat(format); | |||
int64_t input_shape_size = 0; | |||
int64_t output_shape_size = 0; | |||
ge::graphStatus input_graph_status = ge::TensorUtils::GetTensorSizeInBytes(*input, input_shape_size); | |||
ge::graphStatus output_graph_status = ge::TensorUtils::GetTensorMemorySizeInBytes(*output, output_shape_size); | |||
if ((input_graph_status != ge::GRAPH_SUCCESS) && (output_graph_status != ge::GRAPH_SUCCESS)) { | |||
GELOGE(GRAPH_FAILED, "GetTensorSize failed!"); | |||
return FAILED; | |||
} | |||
ge::TensorUtils::SetSize(*input, input_shape_size); | |||
ge::TensorUtils::SetSize(*output, output_shape_size); | |||
return SUCCESS; | |||
} | |||
Status ProcessSingleOpInput(NodePtr &node_ptr, string &single_op_input_format) { | |||
ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_input_format); | |||
auto op_desc = node_ptr->GetOpDesc(); | |||
auto data_input = op_desc->MutableInputDesc(0); | |||
auto data_output = op_desc->MutableOutputDesc(0); | |||
ge::Format src_format = data_input->GetFormat(); | |||
ge::DataType src_dt = data_input->GetDataType(); | |||
ge::GeShape src_shape = data_input->GetShape(); | |||
std::vector<int64_t> dst_shape_vec; | |||
if (input_format == FORMAT_FRACTAL_NZ) { | |||
formats::FormatTransferFractalNz transfer; | |||
if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
ge::GeShape dst_shape(dst_shape_vec); | |||
if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_NZ) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_NZ desc failed.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} else if (input_format == FORMAT_FRACTAL_Z) { | |||
if (ProcessGemmFractalZ(src_shape, dst_shape_vec) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Op [%s] trans FRACTAL_Z Shape failed.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
ge::GeShape dst_shape(dst_shape_vec); | |||
if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_Z) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_Z desc failed.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
// Gemm shape and format should be set at this stage, temporary solution. | |||
auto out_anchor = node_ptr->GetOutDataAnchor(0); | |||
for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||
GE_CHECK_NOTNULL(in_anchor); | |||
auto index = static_cast<uint32_t>(in_anchor->GetIdx()); | |||
ge::NodePtr next_node = in_anchor->GetOwnerNode(); | |||
GE_CHECK_NOTNULL(next_node); | |||
auto next_op_desc = next_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(next_op_desc); | |||
auto input_desc = next_op_desc->MutableInputDesc(index); | |||
GE_CHECK_NOTNULL(input_desc); | |||
input_desc->SetFormat(input_format); | |||
input_desc->SetShape(data_output->GetShape()); | |||
} | |||
return SUCCESS; | |||
} | |||
Status ProcessSingleOpOutput(OpDescPtr &op_desc, string &single_op_output_format) { | |||
ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_output_format); | |||
auto data_input = op_desc->MutableInputDesc(0); | |||
ge::Format src_format = data_input->GetFormat(); | |||
ge::DataType src_dt = data_input->GetDataType(); | |||
ge::GeShape src_shape = data_input->GetShape(); | |||
std::vector<int64_t> dst_shape_vec; | |||
if (input_format == FORMAT_FRACTAL_NZ) { | |||
formats::FormatTransferFractalNz transfer; | |||
if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
ge::GeShape dst_shape(dst_shape_vec); | |||
data_input->SetShape(dst_shape); | |||
data_input->SetFormat(FORMAT_FRACTAL_NZ); | |||
} | |||
return SUCCESS; | |||
} | |||
Status ProcessDataNodeDynShape(NodePtr &node_ptr, bool &is_single_op) { | |||
Status ProcessDataNodeDynShape(NodePtr &node_ptr) { | |||
auto op_desc = node_ptr->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
std::string single_op_input_format; | |||
if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_input_format", single_op_input_format))) { | |||
if (ProcessSingleOpInput(node_ptr, single_op_input_format) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Process single op input [%s] failed.", node_ptr->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
bool set_fp16 = false; | |||
if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) { | |||
return SUCCESS; | |||
@@ -1375,16 +1266,9 @@ bool NeedUpdateOutputByOutputTypeParm(std::string &output_type, NodePtr &src_nod | |||
return false; | |||
} | |||
Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type, bool &is_single_op) { | |||
Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
std::string single_op_output_format; | |||
if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_output_format", single_op_output_format))) { | |||
if (ProcessSingleOpOutput(op_desc, single_op_output_format) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Process single op output [%s] failed.", node->GetName().c_str()); | |||
return FAILED; | |||
} | |||
} | |||
ge::DataType output_data_type = ge::DT_FLOAT; | |||
for (const auto &in_anchor : node->GetAllInDataAnchors()) { | |||
@@ -1717,7 +1601,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) { | |||
auto format = desc.GetFormat(); | |||
auto origin_format = desc.GetOriginFormat(); | |||
bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); | |||
if (is_internal) { | |||
bool need_check_internal_format = (!options_.is_single_op) && is_internal; | |||
if (need_check_internal_format) { | |||
GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.", | |||
TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); | |||
return FAILED; | |||
@@ -2164,6 +2049,7 @@ Status GraphPrepare::GenerateInfershapeGraph(ConstGraphPtr graph) { | |||
GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret); | |||
return ret; | |||
} | |||
ShapeRefiner::ClearContextMap(); | |||
return SUCCESS; | |||
} | |||
@@ -2389,6 +2275,7 @@ Status GraphPrepare::InferShapeForPreprocess() { | |||
} | |||
} | |||
} | |||
ShapeRefiner::ClearContextMap(); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret); | |||
return ret; | |||
@@ -2821,14 +2708,14 @@ Status GraphPrepare::UpdateInputOutputByOptions() { | |||
} | |||
if (node_ptr->GetType() == DATA) { | |||
if (ProcessDataNodeDynShape(node_ptr, options_.is_single_op) != SUCCESS) { | |||
if (ProcessDataNodeDynShape(node_ptr) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Process data node failed"); | |||
return FAILED; | |||
} | |||
} | |||
if (node_ptr->GetType() == ge::NETOUTPUT) { | |||
if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype, options_.is_single_op) != SUCCESS) { | |||
if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Process netoutput node failed"); | |||
return FAILED; | |||
} | |||
@@ -389,8 +389,8 @@ Status AippOp::SetDefaultParams() { | |||
GELOGI("parse aipp params:input_format:%s, csc_switch:%d.", | |||
domi::AippOpParams::InputFormat_Name(aipp_params_->input_format()).c_str(), aipp_params_->csc_switch()); | |||
GELOGI("parse aipp params:mean_chn_0:%d, mean_chn_1:%d, mean_chn_2:%d.", aipp_params_->mean_chn_0(), | |||
aipp_params_->mean_chn_1(), aipp_params_->mean_chn_2()); | |||
GELOGI("parse aipp params:mean_chn_0:%d, mean_chn_1:%d, mean_chn_2:%d, mean_chn_3:%d.", aipp_params_->mean_chn_0(), | |||
aipp_params_->mean_chn_1(), aipp_params_->mean_chn_2(), aipp_params_->mean_chn_3()); | |||
GELOGI("parse aipp params:min_chn_0:%f, min_chn_1:%f, min_chn_2:%f.", aipp_params_->min_chn_0(), | |||
aipp_params_->min_chn_1(), aipp_params_->min_chn_2()); | |||
@@ -40,6 +40,23 @@ namespace ge { | |||
namespace { | |||
const char *const kMbatchSwitchnName = "mbatch-switch-name"; | |||
} // namespace | |||
static void ConvertShape2Nhwc(Format &format, vector<int64_t> &shape_vec) { | |||
if ((format == FORMAT_NHWC) || (shape_vec.size() != static_cast<size_t>(NORMAL_TENSOR_SIZE))) { | |||
return; | |||
} | |||
if (format != FORMAT_NCHW) { | |||
GELOGW("The format is not NCHW, current format is %s", TypeUtils::FormatToSerialString(format).c_str()); | |||
return; | |||
} | |||
vector<int64_t> shape_vec_tmp; | |||
shape_vec.swap(shape_vec_tmp); | |||
shape_vec.push_back(shape_vec_tmp[NCHW_DIM_N]); | |||
shape_vec.push_back(shape_vec_tmp[NCHW_DIM_H]); | |||
shape_vec.push_back(shape_vec_tmp[NCHW_DIM_W]); | |||
shape_vec.push_back(shape_vec_tmp[NCHW_DIM_C]); | |||
return; | |||
} | |||
Status InsertNewOpUtil::Init() { | |||
insert_op_conf_.reset((new (std::nothrow) domi::InsertNewOps())); | |||
GE_CHECK_NOTNULL(insert_op_conf_); | |||
@@ -223,11 +240,13 @@ Status InsertNewOpUtil::UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &s | |||
GELOGE(FAILED, "UpdateOutputDesc fail, graph_ret:%d", graph_ret); | |||
return FAILED; | |||
} | |||
GELOGI("Get size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str()); | |||
GELOGI("Get input size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str()); | |||
if (size == 0) { | |||
GELOGE(FAILED, "Can not get size from aipp [%s]", aipp_op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
// Save the input size of aipp node, which will be used in dumping aipp node or fused aipp node | |||
(void)AttrUtils::SetInt(aipp_input, ATTR_NAME_INPUT_ORIGIN_SIZE, size); | |||
auto in_data_anchor = node->GetInDataAnchor(0); | |||
GE_CHECK_NOTNULL(in_data_anchor); | |||
@@ -305,6 +324,8 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt | |||
auto data_opdesc = data->GetOpDesc(); | |||
GE_CHECK_NOTNULL(data_opdesc); | |||
Format old_format = data_opdesc->MutableOutputDesc(0)->GetFormat(); | |||
auto ret = data_opdesc->UpdateOutputDesc(0, *input_desc); | |||
if (ret != GRAPH_SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "Failed to update data %s output using switchn %s", data->GetName().c_str(), | |||
@@ -317,9 +338,34 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt | |||
switchn->GetName().c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
// Update attr _mbatch_origin_input_dims for data when it is linked to aipp | |||
UpdateMultiBatchInputDims(data_opdesc, old_format); | |||
return SUCCESS; | |||
} | |||
void InsertNewOpUtil::UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format) { | |||
if (!data_opdesc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) { | |||
GELOGW("Failed to acquire _mbatch_origin_input_dims attr from node [%s]", data_opdesc->GetName().c_str()); | |||
return; | |||
} | |||
auto new_data_dims = data_opdesc->GetOutputDesc(0).GetShape().GetDims(); | |||
vector<int64_t> origin_input_dims; | |||
(void)AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||
// Convert origin_input_dims to NHWC because data format is set to NHWC when it is linked to aipp. | |||
ConvertShape2Nhwc(old_format, origin_input_dims); | |||
if (new_data_dims.size() != origin_input_dims.size()) { | |||
return; | |||
} | |||
for (size_t i = 0; i < origin_input_dims.size(); ++i) { | |||
// Need to update shape when aipp has crop function because H,W is different, ignore -1. | |||
if (origin_input_dims[i] > 0) { | |||
origin_input_dims[i] = new_data_dims[i]; | |||
} | |||
} | |||
(void)AttrUtils::SetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims); | |||
return; | |||
} | |||
Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map) { | |||
GELOGI("Start to get data and next node %s.", node->GetName().c_str()); | |||
OpDescPtr data_op = node->GetOpDesc(); | |||
@@ -420,15 +466,18 @@ Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) { | |||
GetInputOutputInfo(data_node, aipp_it, input, output); | |||
input_dims.emplace_back(input); | |||
output_dims.emplace_back(output); | |||
// When static aipp is set, need to get the model input dims which processed by aipp | |||
GE_RETURN_IF_ERROR(SetModelInputDims(data_node, aipp_it)); | |||
} | |||
if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_INPUTS, input_dims)) { | |||
GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_INPUTS.c_str()); | |||
GELOGE(FAILED, "SetListStr of %s failed.", ATTR_NAME_AIPP_INPUTS.c_str()); | |||
return FAILED; | |||
} | |||
if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_OUTPUTS, output_dims)) { | |||
GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_OUTPUTS.c_str()); | |||
GELOGE(FAILED, "SetListStr of %s failed.", ATTR_NAME_AIPP_OUTPUTS.c_str()); | |||
return FAILED; | |||
} | |||
} | |||
@@ -473,4 +522,41 @@ Status InsertNewOpUtil::GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_nod | |||
data_node->GetName().c_str(), aipp_node->GetName().c_str(), input.c_str(), output.c_str()); | |||
return SUCCESS; | |||
} | |||
Status InsertNewOpUtil::SetModelInputDims(NodePtr &data_node, NodePtr &aipp_node) { | |||
GE_CHECK_NOTNULL(data_node); | |||
GE_CHECK_NOTNULL(aipp_node); | |||
OpDescPtr data_opdesc = data_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(data_opdesc); | |||
OpDescPtr aipp_opdesc = aipp_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(aipp_opdesc); | |||
// In dynamic bacth/hw scenario, the new model input dims only need be set once | |||
if (data_node->GetOpDesc()->HasAttr(ATTR_NAME_INPUT_DIMS)) { | |||
GELOGD("Data %s already has attribute %s", data_node->GetOpDesc()->GetName().c_str(), ATTR_NAME_INPUT_DIMS.c_str()); | |||
return SUCCESS; | |||
} | |||
vector<int64_t> model_input_dims; | |||
vector<int64_t> origin_input_dims; | |||
if (AttrUtils::GetListInt(aipp_opdesc, ATTR_NAME_INPUT_DIMS, model_input_dims) && !model_input_dims.empty()) { | |||
// When dynamic bacth/hw is set, N or HW need to be set to -1 | |||
if (AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims) && | |||
!origin_input_dims.empty()) { | |||
GELOGI("In dynamic bacth/hw scenario, N or HW need to be set to -1. model_input_dims: %s, origin_input_dims: %s", | |||
formats::JoinToString(model_input_dims).c_str(), formats::JoinToString(origin_input_dims).c_str()); | |||
for (size_t i = 0; i < origin_input_dims.size(); ++i) { | |||
// N or HW need to be set to -1 | |||
if (origin_input_dims[i] < 0) { | |||
model_input_dims[i] = origin_input_dims[i]; | |||
} | |||
} | |||
} | |||
GELOGD("After set H/W to -1, the model input dims: %s.", formats::JoinToString(model_input_dims).c_str()); | |||
if (!AttrUtils::SetListInt(data_opdesc, ATTR_NAME_INPUT_DIMS, model_input_dims)) { | |||
GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_INPUT_DIMS.c_str()); | |||
return FAILED; | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -61,11 +61,13 @@ class InsertNewOpUtil { | |||
std::unique_ptr<domi::InsertNewOps> insert_op_conf_; | |||
void UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format); | |||
Status UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &switchns); | |||
Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data); | |||
Status GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map); | |||
Status GetAllAipps(const NodePtr &node, std::vector<NodePtr> &aipps); | |||
Status GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_node, std::string &input, std::string &output); | |||
Status SetModelInputDims(NodePtr &data_node, NodePtr &aipp_node); | |||
}; | |||
} // namespace ge | |||
@@ -44,6 +44,7 @@ const int kSwitchNPredIndex = 1; | |||
const int kDataOutIndex = 0; | |||
const int kDataInIndex = 0; | |||
const int kMergeDataOutIndex = 0; | |||
const int kStaticOutput = -1; | |||
const size_t kMaxShapesCount = 100; | |||
const size_t kMinShapesCount = 2; | |||
@@ -947,15 +948,18 @@ Status GetDynamicOutputShape(ComputeGraphPtr &graph) { | |||
GELOGE(PARAM_INVALID, "Graph is null ,para is invalid"); | |||
return PARAM_INVALID; | |||
} | |||
for (auto &node : graph->GetAllNodes()) { | |||
for (auto &node : graph->GetDirectNode()) { | |||
if (node->GetType() == NETOUTPUT) { | |||
auto netoutput_desc = node->GetOpDesc(); | |||
auto inputnode_to_netoutput = node->GetInAllNodes(); | |||
std::vector<size_t> dynamic_output_index; | |||
for (size_t j = 0; j < inputnode_to_netoutput.size(); j++) { | |||
bool ret = false; | |||
(void)AttrUtils::GetBool(inputnode_to_netoutput.at(j)->GetOpDesc(), ATTR_INSERT_BY_MBATCH, ret); | |||
if (inputnode_to_netoutput.at(j)->GetType() == MERGE && ret) { | |||
GELOGI("Find the merge node %s with mbatch attr", inputnode_to_netoutput.at(j)->GetName().c_str()); | |||
GELOGI("Find the merge node %s with mbatch attr and the index is %zu", | |||
inputnode_to_netoutput.at(j)->GetName().c_str(), j); | |||
dynamic_output_index.emplace_back(j); | |||
for (size_t i = 0; i < inputnode_to_netoutput.at(j)->GetInNodes().size(); i++) { | |||
auto input_desc = inputnode_to_netoutput.at(j)->GetOpDesc(); | |||
auto input_tensor_desc = input_desc->GetInputDesc(i); | |||
@@ -967,6 +971,17 @@ Status GetDynamicOutputShape(ComputeGraphPtr &graph) { | |||
} | |||
} | |||
if (dynamic_output_dims.size() > 0) { | |||
for (size_t k = 0; k < inputnode_to_netoutput.size(); k++) { | |||
auto it = std::find(dynamic_output_index.begin(), dynamic_output_index.end(), k); | |||
if (it != dynamic_output_index.end()) { | |||
continue; | |||
} | |||
auto tensor_desc = netoutput_desc->GetInputDesc(k); | |||
auto shape = tensor_desc.GetShape().ToString(); | |||
std::string static_output_shape = std::to_string(kStaticOutput) + "," + std::to_string(k) + "," + shape; | |||
GELOGI("The static output shape msg is %s", static_output_shape.c_str()); | |||
dynamic_output_dims.emplace_back(static_output_shape); | |||
} | |||
if (!AttrUtils::SetListStr(netoutput_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims)) { | |||
GELOGE(FAILED, "Set dynamic output dims attr failed"); | |||
return FAILED; | |||
@@ -31,6 +31,7 @@ | |||
namespace ge { | |||
namespace { | |||
const size_t kConcatV2InputNum = 3; | |||
const int kSupportEmptyTensorRank = 1; | |||
const std::set<DataType> concatv2_supported_type = {DT_INT32, DT_FLOAT}; | |||
template <typename T> | |||
@@ -39,7 +40,12 @@ void GetOutputData(std::vector<T> &y_data, int64_t loop, size_t &input_size, | |||
for (int64_t i = 0; i < loop; i++) { | |||
for (size_t k = 0; k < input_size; k++) { | |||
GeShape datak_shape = input.at(k)->GetTensorDesc().GetShape(); | |||
const T *datak = reinterpret_cast<const T *>(input.at(k)->GetData().data()); | |||
auto buffer = input.at(k)->GetData(); | |||
const T *datak = reinterpret_cast<const T *>(buffer.data()); | |||
if (datak == nullptr || buffer.size() == 0) { | |||
GELOGW("input[%zu] is with no data", k); | |||
continue; | |||
} | |||
int64_t gapk = datak_shape.GetShapeSize() / loop; // [2,3] is 6/loop | |||
for (int64_t j = 0; j < gapk; j++) { | |||
y_data.push_back(datak[j + gapk * i]); | |||
@@ -63,7 +69,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||
return PARAM_INVALID; | |||
} | |||
int tidx = -1; | |||
Status ret = ConcatV2PreCompute(input, tidx); | |||
ConstGeTensorPtr tensor = nullptr; | |||
Status ret = ConcatV2PreCompute(input, tidx, tensor); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
@@ -71,9 +78,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||
size_t input_size = input.size(); // N + 1 | |||
input_size--; // N | |||
ConstGeTensorPtr tensor0 = input.at(0); | |||
GE_CHECK_NOTNULL(tensor0); | |||
DataType data_type = tensor0->GetTensorDesc().GetDataType(); | |||
GE_CHECK_NOTNULL(tensor); | |||
DataType data_type = tensor->GetTensorDesc().GetDataType(); | |||
uint32_t length = 0; | |||
if (!TypeUtils::GetDataTypeLength(data_type, length)) { | |||
GELOGW("Can't GetDataTypeLength of data_type: %s", TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||
@@ -91,7 +97,7 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||
return MEMALLOC_FAILED; | |||
} | |||
GeShape data0_shape = tensor0->GetTensorDesc().GetShape(); | |||
GeShape data0_shape = tensor->GetTensorDesc().GetShape(); | |||
int64_t loop = 1; | |||
for (int i = 0; i < tidx; i++) { | |||
loop *= data0_shape.GetDim(i); | |||
@@ -110,29 +116,33 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge: | |||
return SUCCESS; | |||
} | |||
Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx) { | |||
Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx, | |||
ConstGeTensorPtr &tensor) { | |||
size_t input_size = input.size(); | |||
// N >= 2 and N + 1 >= 3 | |||
if (input_size < kConcatV2InputNum) { | |||
GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum); | |||
return NOT_CHANGED; | |||
} | |||
bool has_empty_tensor = false; | |||
input_size--; | |||
for (size_t i = 0; i < input_size; i++) { | |||
if (input[i] == nullptr) { | |||
GELOGI("Input%zu must not be null.", i); | |||
return NOT_CHANGED; | |||
} | |||
if (input.at(i)->GetData().size() == 0) { | |||
GELOGI("Check data size fail. input%zu size is 0.", i); | |||
return NOT_CHANGED; | |||
GELOGW("input[%zu] is with no data.", i); | |||
has_empty_tensor = true; | |||
continue; | |||
} | |||
if (tensor == nullptr) { | |||
tensor = input.at(i); // get first valid tensor with data | |||
} | |||
} | |||
input_size--; | |||
ConstGeTensorPtr tensor0 = input.at(0); | |||
GE_CHECK_NOTNULL(tensor0); | |||
DataType data_type = tensor0->GetTensorDesc().GetDataType(); | |||
GE_CHECK_NOTNULL(tensor); | |||
DataType data_type = tensor->GetTensorDesc().GetDataType(); | |||
for (size_t i = 1; i < input_size; i++) { | |||
if (data_type != input.at(i)->GetTensorDesc().GetDataType()) { | |||
GELOGI("Data type of N inputs for ConcatV2 not the same, check input %zu failed.", i); | |||
@@ -149,13 +159,18 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &i | |||
ConstGeTensorPtr tensor_axis = input.at(input_size); | |||
GE_CHECK_NOTNULL(tensor_axis); | |||
const int *axis = reinterpret_cast<const int *>(tensor_axis->GetData().data()); | |||
tidx = axis[0]; // [-rank(values), rank(values)) | |||
int dims = static_cast<int>(tensor0->GetTensorDesc().GetShape().GetDimNum()); // rank | |||
GE_CHECK_NOTNULL(axis); | |||
tidx = axis[0]; // [-rank(values), rank(values)) | |||
int rank = static_cast<int>(tensor->GetTensorDesc().GetShape().GetDimNum()); // rank | |||
if (tidx < 0) { | |||
tidx += dims; | |||
tidx += rank; | |||
} | |||
if (tidx < 0 || tidx > dims) { | |||
GELOGI("ConcatV2 tidx not legal."); | |||
// 1. tidx should in range [0,rank) | |||
// 2. empty tensor only support case: [n],[m],[] | |||
// case: [[],[]] ,[[],[]] ,[] or other case when rank >=2 is not supported | |||
if (tidx < 0 || tidx >= rank || (has_empty_tensor && rank > kSupportEmptyTensorRank)) { | |||
GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", tidx, rank, | |||
has_empty_tensor); | |||
return NOT_CHANGED; | |||
} | |||
@@ -28,7 +28,7 @@ class ConcatV2Kernel : public Kernel { | |||
std::vector<GeTensorPtr> &v_output) override; | |||
private: | |||
Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx); | |||
Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx, ConstGeTensorPtr &tensor); | |||
}; | |||
} // namespace ge | |||
@@ -46,6 +46,8 @@ namespace ge { | |||
namespace { | |||
const int kDecimal = 10; | |||
const int kSocVersionLen = 50; | |||
const int kDefaultDeviceIdForTrain = 0; | |||
const int kDefaultDeviceIdForInfer = -1; | |||
} // namespace | |||
static std::shared_ptr<GELib> instancePtr_ = nullptr; | |||
@@ -194,8 +196,12 @@ Status GELib::SystemInitialize(const map<string, string> &options) { | |||
// In train and infer, profiling is always needed. | |||
InitOptions(options); | |||
InitProfiling(this->options_); | |||
if (is_train_mode_) { | |||
// 1.`is_train_mode_` means case: train | |||
// 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer | |||
// these two case need call `InitSystemWithOptions->rtGetDeviceIndexByPhyId` | |||
// to convert phy device id to logical device id | |||
// note:rtGetDeviceIndexByPhyId return `0` logical id when input phy device id is `0` | |||
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | |||
status = InitSystemWithOptions(this->options_); | |||
} else { | |||
status = InitSystemWithoutOptions(); | |||
@@ -237,7 +243,7 @@ void GELib::InitOptions(const map<string, string> &options) { | |||
if (iter != options.end()) { | |||
this->options_.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal); | |||
} | |||
this->options_.device_id = 0; | |||
this->options_.device_id = is_train_mode_ ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer; | |||
iter = options.find(OPTION_EXEC_DEVICE_ID); | |||
if (iter != options.end()) { | |||
this->options_.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal)); | |||
@@ -289,7 +295,8 @@ void GELib::InitOptions(const map<string, string> &options) { | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOptions(Options &options) { | |||
GELOGI("Training init GELib. session Id:%ld, device id :%d ", options.session_id, options.device_id); | |||
std::string mode = is_train_mode_ ? "Training" : "Online infer"; | |||
GELOGI("%s init GELib. session Id:%ld, device id :%d ", mode.c_str(), options.session_id, options.device_id); | |||
GEEVENT("System init with options begin, job id %s", options.job_id.c_str()); | |||
std::lock_guard<std::mutex> lock(status_mutex_); | |||
GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown, | |||
@@ -329,13 +336,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt | |||
is_system_inited = true; | |||
is_shutdown = false; | |||
GELOGI("Training init GELib success."); | |||
GELOGI("%s init GELib success.", mode.c_str()); | |||
return SUCCESS; | |||
} | |||
Status GELib::SystemShutdownWithOptions(const Options &options) { | |||
GELOGI("Training finalize GELib begin."); | |||
std::string mode = is_train_mode_ ? "Training" : "Online infer"; | |||
GELOGI("%s finalize GELib begin.", mode.c_str()); | |||
std::lock_guard<std::mutex> lock(status_mutex_); | |||
GE_IF_BOOL_EXEC(is_shutdown || !is_system_inited, | |||
@@ -353,8 +361,7 @@ Status GELib::SystemShutdownWithOptions(const Options &options) { | |||
is_system_inited = false; | |||
is_shutdown = true; | |||
GELOGI("Training finalize GELib success."); | |||
GELOGI("%s finalize GELib success.", mode.c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -424,7 +431,7 @@ Status GELib::Finalize() { | |||
// Shut down profiling | |||
ShutDownProfiling(); | |||
if (is_train_mode_) { | |||
if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) { | |||
GELOGI("System ShutDown."); | |||
mid_state = SystemShutdownWithOptions(this->options_); | |||
if (mid_state != SUCCESS) { | |||
@@ -39,6 +39,7 @@ | |||
#include "ir_build/atc_ir_common.h" | |||
#include "omg/omg.h" | |||
#include "omg/parser/parser_factory.h" | |||
#include "omg/parser/parser_inner_ctx.h" | |||
#include "parser/common/register_tbe.h" | |||
#include "register/op_registry.h" | |||
#include "single_op_parser.h" | |||
@@ -178,8 +179,6 @@ DEFINE_string(compress_weight_conf, "", "Optional; the config file to compress w | |||
DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable"); | |||
DEFINE_string(quant_optimize, "true", "Optional; enable quant optimize. true: enable; false(default): disable"); | |||
DEFINE_string(log, "default", "Optional; generate atc log. Support debug, info, warning, error, null"); | |||
DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0."); | |||
@@ -203,10 +202,7 @@ class GFlagUtils { | |||
"arguments explain:\n" | |||
" --model Model file\n" | |||
" --singleop Single op definition file. atc will generate offline " | |||
"model(s) for single op if --singleop is set. \n" | |||
" Note: Only output, soc_verion, core_type, aicore_num, auto_tune_mode, precision_mode, " | |||
"op_select_implmode, enable_small_channel, enable_compress_weight, compress_weight_conf " | |||
"enable_single_stream and log are valid in this mode \n" | |||
"model(s) for single op if --singleop is set.\n" | |||
" --weight Weight file. Required when framework is Caffe\n" | |||
" --framework Framework type(0:Caffe; 1:MindSpore; 3:Tensorflow)\n" | |||
" --output Output file path&name(needn't suffix, will add " | |||
@@ -253,6 +249,9 @@ class GFlagUtils { | |||
" --op_select_implmode Set op select implmode. Support high_precision, high_performance." | |||
"default: high_performance\n" | |||
"disable\n" | |||
" --optypelist_for_implmode Appoint which op to use op_select_implmode, used with op_select_implmode ." | |||
"Separate multiple nodes with commas (,). Use double quotation marks (\") to enclose each argument." | |||
"E.g.: \"node_name1,node_name2\"\n" | |||
" --head_stream Add head stream. 0(default): disable; 1: enable\n" | |||
" --soc_version The soc version. E.g.: \"Ascend310\"\n" | |||
" --core_type Set core type AiCore or VectorCore. VectorCore: use vector core. " | |||
@@ -270,8 +269,7 @@ class GFlagUtils { | |||
"Use double quotation marks (\") to enclose each argument." | |||
"E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n" | |||
" --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" | |||
" --enable_single_stream Enable single stream. true: enable; false(default): disable\n" | |||
" --quant_optimize Enable quant optimize. true(default): enable; false: disable\n"); | |||
" --enable_single_stream Enable single stream. true: enable; false(default): disable\n"); | |||
gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); | |||
// Using gflags to analyze input parameters | |||
@@ -656,13 +654,36 @@ void LoadCustomOpLib() { | |||
std::vector<OpRegistrationData> registrationDatas = OpRegistry::Instance()->registrationDatas; | |||
for (OpRegistrationData reg_data : registrationDatas) { | |||
bool ret = ge::OpRegistrationTbe::Instance()->Finalize(reg_data); | |||
if (ret) { | |||
OpRegistry::Instance()->Register(reg_data); | |||
if (reg_data.GetFrameworkType() == static_cast<domi::FrameworkType>(FLAGS_framework)) { | |||
bool ret = ge::OpRegistrationTbe::Instance()->Finalize(reg_data); | |||
if (ret) { | |||
(void)OpRegistry::Instance()->Register(reg_data); | |||
} | |||
} | |||
} | |||
} | |||
void SaveCustomCaffeProtoPath() { | |||
GELOGI("Enter save custom caffe proto path."); | |||
string customop_path; | |||
const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||
if (path_env != nullptr) { | |||
std::string path = path_env; | |||
customop_path = path + "/framework/custom/caffe/"; | |||
GELOGI("Get custom proto path from env : %s", path_env); | |||
ge::GetParserContext().custom_proto_path = customop_path; | |||
return; | |||
} | |||
std::string path_base = ge::GELib::GetPath(); | |||
GELOGI("path_base is %s", path_base.c_str()); | |||
path_base = path_base.substr(0, path_base.rfind('/')); | |||
path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||
customop_path = path_base + "ops/framework/custom/caffe/"; | |||
ge::GetParserContext().custom_proto_path = customop_path; | |||
return; | |||
} | |||
#endif | |||
Status CreateInputsForInference(const ge::Graph &graph, vector<ge::GeTensor> &inputs) { | |||
@@ -850,6 +871,7 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output | |||
atc_params.insert(std::pair<string, string>("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout)); | |||
atc_params.insert(std::pair<string, string>("compress_weight_conf", FLAGS_compress_weight_conf)); | |||
atc_params.insert(std::pair<string, string>(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); | |||
atc_params.insert(std::pair<string, string>("output", output)); | |||
Status ret = | |||
ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework, | |||
@@ -982,6 +1004,8 @@ domi::Status GenerateOmModel() { | |||
// Load custom operator Library | |||
LoadCustomOpLib(); | |||
SaveCustomCaffeProtoPath(); | |||
ret = ge::CheckCustomAiCpuOpLib(); | |||
GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); | |||
@@ -1043,8 +1067,6 @@ domi::Status GenerateOmModel() { | |||
options.insert(std::pair<string, string>(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); | |||
options.insert(std::pair<string, string>(string(ge::QUANT_OPTIMIZE), FLAGS_quant_optimize)); | |||
SetDynamicBatchSizeOrImagesizeOptions(); | |||
if (!FLAGS_save_original_model.empty()) { | |||
@@ -273,10 +273,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single | |||
} else { | |||
op_desc->AddInputDesc(desc.name, ge_tensor_desc); | |||
} | |||
if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { | |||
ge_tensor_desc.SetFormat(FORMAT_ND); | |||
ge_tensor_desc.SetOriginFormat(FORMAT_ND); | |||
} | |||
build_param.inputs.emplace_back(ge_tensor_desc); | |||
} | |||
@@ -292,10 +288,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single | |||
TensorUtils::SetInputTensor(ge_tensor_desc, false); | |||
TensorUtils::SetOutputTensor(ge_tensor_desc, true); | |||
op_desc->AddOutputDesc(ge_tensor_desc); | |||
if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { | |||
ge_tensor_desc.SetFormat(FORMAT_ND); | |||
ge_tensor_desc.SetOriginFormat(FORMAT_ND); | |||
} | |||
build_param.outputs.emplace_back(ge_tensor_desc); | |||
} | |||
@@ -29,6 +29,8 @@ | |||
#include "common/types.h" | |||
#include "common/util.h" | |||
#include "common/util/error_manager/error_manager.h" | |||
#include "common/helper/model_helper.h" | |||
#include "common/ge/ge_util.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/omg/parser/parser_inner_ctx.h" | |||
#include "google/protobuf/io/zero_copy_stream_impl.h" | |||
@@ -419,10 +421,6 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const | |||
GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); | |||
return domi::FAILED; | |||
} | |||
if (out_node->GetType() == DATA) { | |||
GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", user_out_nodes[i].first.c_str()); | |||
return domi::FAILED; | |||
} | |||
auto op_desc = out_node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
if (i < output_formats.size()) { | |||
@@ -441,24 +439,49 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const | |||
(void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); | |||
} | |||
output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); | |||
output_nodes_name.push_back(out_node->GetName()); | |||
output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(user_out_nodes[i].second)); | |||
} | |||
// default output node (leaf) | |||
if (user_out_nodes.empty()) { | |||
for (ge::NodePtr node : compute_graph->GetDirectNode()) { | |||
if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { | |||
Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name); | |||
Status ret = GetOutputLeaf(node, output_nodes_info); | |||
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); | |||
} | |||
} | |||
} | |||
GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name); | |||
compute_graph->SetGraphOutNodesInfo(output_nodes_info); | |||
domi::GetContext().net_out_nodes = output_nodes_name; | |||
return domi::SUCCESS; | |||
} | |||
Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||
std::vector<std::string> &output_nodes_name) { | |||
void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info, | |||
std::vector<std::string> &output_nodes_name) { | |||
output_nodes_name.clear(); | |||
if (domi::GetContext().out_top_names.empty()) { | |||
// tf process, no top name. | |||
for (const auto output_node_info : output_nodes_info) { | |||
std::string node_name = output_node_info.first->GetName(); | |||
int32_t index = output_node_info.second; | |||
output_nodes_name.push_back(node_name + ":" + std::to_string(index)); | |||
} | |||
return; | |||
} | |||
// caffe process, need add top name after node_name:index | |||
for (size_t i = 0; i < output_nodes_info.size(); ++i) { | |||
std::string node_name = output_nodes_info[i].first->GetName(); | |||
int32_t index = output_nodes_info[i].second; | |||
if (i < domi::GetContext().out_top_names.size()) { | |||
output_nodes_name.push_back(node_name + ":" + std::to_string(index) + ":" + domi::GetContext().out_top_names[i]); | |||
} else { | |||
GELOGW("Get top name of node [%s] fail.", node_name.c_str()); | |||
output_nodes_name.push_back(node_name + ":" + std::to_string(index)); | |||
} | |||
} | |||
} | |||
Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info) { | |||
ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); | |||
if (tmpDescPtr == nullptr) { | |||
GELOGE(domi::FAILED, "Get outnode op desc fail."); | |||
@@ -468,7 +491,6 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> | |||
if (node->GetType() != NETOUTPUT) { | |||
for (size_t index = 0; index < size; ++index) { | |||
output_nodes_info.push_back(std::make_pair(node, index)); | |||
output_nodes_name.push_back(node->GetName()); | |||
} | |||
} else { | |||
const auto in_anchors = node->GetAllInDataAnchors(); | |||
@@ -480,7 +502,6 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> | |||
} | |||
auto out_node = out_anchor->GetOwnerNode(); | |||
output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); | |||
output_nodes_name.push_back(out_node->GetName()); | |||
} | |||
} | |||
return SUCCESS; | |||
@@ -612,9 +633,16 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||
Params::Instance()->SetTarget(target); | |||
// Create an empty computegraph | |||
ComputeGraphPtr compute_graph = nullptr; | |||
GE_MAKE_SHARED(compute_graph = std::make_shared<ComputeGraph>(kGraphDefaultName + "_" + CurrentTimeInStr()), | |||
return FAILED); | |||
std::string om_name; | |||
ParseAtcParms(atc_params, "output", om_name); | |||
ModelHelper model_helper; | |||
string graph_name = ""; | |||
Status name_ret = model_helper.GetBaseNameFromFileName(om_name, graph_name); | |||
if (name_ret != SUCCESS) { | |||
graph_name = kGraphDefaultName + "_" + CurrentTimeInStr(); | |||
} | |||
ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name); | |||
GE_CHECK_NOTNULL(compute_graph); | |||
graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | |||
// initialize omgContext | |||
@@ -664,8 +692,6 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||
GELOGI("The pre-checking report has been saved to %s.", check_report.c_str()); | |||
} | |||
// Prevent data residue in multiple calls | |||
PreChecker::Instance().Clear(); | |||
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC model parse ret fail."); | |||
std::string input_fp16_nodes; | |||
@@ -693,12 +719,19 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri | |||
graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); | |||
auto weights_parser = WeightsParserFactory::Instance()->CreateWeightsParser(type); | |||
ret = weights_parser->Parse(weights_file, graph); | |||
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); | |||
// IN ONLY_PRE_CHECK mode, generate pre inspection report only. | |||
if (run_mode == ONLY_PRE_CHECK) { | |||
if (PreChecker::Instance().HasError() || run_mode == ONLY_PRE_CHECK) { | |||
std::string check_report; | |||
ParseAtcParms(atc_params, "check_report", check_report); | |||
GE_RETURN_WITH_LOG_IF_ERROR(PreChecker::Instance().Save(check_report), "Generate pre-checking report failed."); | |||
GEEVENT("The pre-checking report has been saved to %s.", check_report.c_str()); | |||
return SUCCESS; | |||
} | |||
// Prevent data residue in multiple calls | |||
PreChecker::Instance().Clear(); | |||
GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); | |||
GELOGI("ATC parser success."); | |||
@@ -41,17 +41,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFr | |||
uintptr_t resource_id; | |||
// runtime uses NULL to denote a default stream for each device | |||
if (stream == nullptr) { | |||
// use device id as resource key instead | |||
int32_t dev_id = 0; | |||
auto rt_err = rtGetDevice(&dev_id); | |||
// get current context | |||
rtContext_t rt_cur_ctx = nullptr; | |||
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | |||
if (rt_err != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Get current device id failed. ret = %d", static_cast<int>(rt_err)); | |||
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | |||
return RT_FAILED; | |||
} | |||
GELOGI("GetOpFromModel with default stream. device id = %d", dev_id); | |||
resource_id = static_cast<uintptr_t>(dev_id); | |||
// use current context as resource key instead | |||
GELOGI("use context as resource key instead when default stream"); | |||
resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx); | |||
} else { | |||
GELOGI("use stream as resource key instead when create stream"); | |||
resource_id = reinterpret_cast<uintptr_t>(stream); | |||
} | |||
@@ -0,0 +1,6 @@ | |||
inc_path := $(shell pwd)/inc/external/ | |||
out_path := $(shell pwd)/out/atc/lib64/stub/ | |||
stub_path := $(shell pwd)/framework/domi/stub/ | |||
mkdir_stub := $(shell mkdir -p $(out_path)) | |||
local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) |
@@ -0,0 +1,4 @@ | |||
################################################################################### | |||
the directory (stub) saves the stub file | |||
gen_stubapi.py is using for retrieving API and generating stub functions | |||
################################################################################### |
@@ -0,0 +1,573 @@ | |||
import os | |||
import re | |||
import sys | |||
import logging | |||
logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', | |||
level=logging.INFO) | |||
""" | |||
this attr is used for symbol table visible | |||
""" | |||
GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' | |||
""" | |||
generate stub func body by return type | |||
""" | |||
RETURN_STATEMENTS = { | |||
'graphStatus': ' return GRAPH_SUCCESS;', | |||
'Status': ' return SUCCESS;', | |||
'Graph': ' return Graph();', | |||
'Graph&': ' return *this;', | |||
'Format': ' return Format();', | |||
'Format&': ' return *this;', | |||
'Shape': ' return Shape();', | |||
'Shape&': ' return *this;', | |||
'TensorDesc': ' return TensorDesc();', | |||
'TensorDesc&': ' return *this;', | |||
'Tensor': ' return Tensor();', | |||
'Tensor&': ' return *this;', | |||
'Operator': ' return Operator();', | |||
'Operator&': ' return *this;', | |||
'Ptr': ' return nullptr;', | |||
'std::string': ' return "";', | |||
'std::string&': ' return "";', | |||
'string': ' return "";', | |||
'int': ' return 0;', | |||
'DataType': ' return DT_FLOAT;', | |||
'InferenceContextPtr': ' return nullptr;', | |||
'SubgraphBuilder': ' return nullptr;', | |||
'OperatorImplPtr': ' return nullptr;', | |||
'OutHandler': ' return nullptr;', | |||
'std::vector<std::string>': ' return {};', | |||
'std::vector<int64_t>': ' return {};', | |||
'std::map': ' return {};', | |||
'uint32_t': ' return 0;', | |||
'int64_t': ' return 0;', | |||
'uint64_t': ' return 0;', | |||
'size_t': ' return 0;', | |||
'float': ' return 0.0f;', | |||
'bool': ' return false;', | |||
} | |||
""" | |||
max code len per line in hua_wei software programming specifications | |||
""" | |||
max_code_len_per_line = 100 | |||
""" | |||
white_list_for_debug, include_dir_key_words is to | |||
determines which header files to generate cc files from | |||
when DEBUG on | |||
""" | |||
white_list_for_debug = ["operator.h", "tensor.h", | |||
"graph.h", "operator_factory.h", | |||
"ge_ir_build.h"] | |||
include_dir_key_words = ["ge", "graph"] | |||
DEBUG = True | |||
def need_generate_func(func_line): | |||
""" | |||
:param func_line: | |||
:return: | |||
""" | |||
if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ | |||
or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): | |||
return False | |||
return True | |||
def file_endswith_white_list_suffix(file): | |||
""" | |||
:param file: | |||
:return: | |||
""" | |||
if DEBUG: | |||
for suffix in white_list_for_debug: | |||
if file.endswith(suffix): | |||
return True | |||
return False | |||
else: | |||
return True | |||
""" | |||
belows are patterns used for analyse .h file | |||
""" | |||
# pattern function | |||
pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after | |||
([a-zA-Z~_] # void int likely | |||
.* | |||
[)] #we find ) | |||
(?!.*{) # we do not want the case int abc() const { return 1;} | |||
.*) | |||
(;.*) #we want to find ; and after for we will replace these later | |||
\n$ | |||
""", re.VERBOSE | re.MULTILINE | re.DOTALL) | |||
# pattern comment | |||
pattern_comment = re.compile(r'^\s*//') | |||
pattern_comment_2_start = re.compile(r'^\s*/[*]') | |||
pattern_comment_2_end = re.compile(r'[*]/\s*$') | |||
# pattern define | |||
pattern_define = re.compile(r'^\s*#define') | |||
pattern_define_return = re.compile(r'\\\s*$') | |||
# blank line | |||
pattern_blank_line = re.compile(r'^\s*$') | |||
# virtual,explicit,friend,static | |||
pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') | |||
# lead space | |||
pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') | |||
# functions will have patterns such as func ( or func( | |||
# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist | |||
# format like :"operator = ()" | |||
pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') | |||
# template | |||
pattern_template = re.compile(r'^\s*template') | |||
pattern_template_end = re.compile(r'>\s*$') | |||
# namespace | |||
pattern_namespace = re.compile(r'namespace.*{') | |||
# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with | |||
pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR) | |||
# {} | |||
pattern_start = re.compile('{') | |||
pattern_end = re.compile('}') | |||
line_index = 0 | |||
class H2CC(object): | |||
def __init__(self, input_file, output_file, shared_includes_content): | |||
""" | |||
:param input_file: | |||
:param output_file: | |||
:param shared_includes_content: | |||
""" | |||
self.input_file = input_file | |||
self.output_file = output_file | |||
self.shared_includes_content = shared_includes_content | |||
self.line_index = 0 | |||
self.input_fd = open(self.input_file, 'r') | |||
self.input_content = self.input_fd.readlines() | |||
self.output_fd = open(self.output_file, 'w') | |||
# The state may be normal_now(in the middle of {}),class_now,namespace_now | |||
self.stack = [] | |||
self.stack_class = [] | |||
self.stack_template = [] | |||
# record funcs generated by h2cc func | |||
self.func_list_exist = [] | |||
def __del__(self): | |||
self.input_fd.close() | |||
self.output_fd.close() | |||
del self.stack | |||
del self.stack_class | |||
del self.stack_template | |||
del self.func_list_exist | |||
def just_skip(self): | |||
# skip blank line or comment | |||
if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search( | |||
self.input_content[self.line_index]): # /n or comment using // | |||
self.line_index += 1 | |||
if pattern_comment_2_start.search(self.input_content[self.line_index]): # comment using /* | |||
while not pattern_comment_2_end.search(self.input_content[self.line_index]): # */ | |||
self.line_index += 1 | |||
self.line_index += 1 | |||
# skip define | |||
if pattern_define.search(self.input_content[self.line_index]): | |||
while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search( | |||
self.input_content[self.line_index]): | |||
self.line_index += 1 | |||
self.line_index += 1 | |||
def write_inc_content(self): | |||
for shared_include_content in self.shared_includes_content: | |||
self.output_fd.write(shared_include_content) | |||
def h2cc(self): | |||
""" | |||
:return: | |||
""" | |||
logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file) | |||
global pattern_comment | |||
global pattern_comment_2_start | |||
global pattern_comment_2_end | |||
global pattern_blank_line | |||
global pattern_func | |||
global pattern_keyword | |||
global pattern_leading_space | |||
global pattern_func_name | |||
global pattern_template | |||
global pattern_template_end | |||
global pattern_namespace | |||
global pattern_class | |||
global pattern_start | |||
global pattern_end | |||
global line_index | |||
# write inc content | |||
self.write_inc_content() | |||
# core processing cycle, process the input .h file by line | |||
while self.line_index < len(self.input_content): | |||
# handle comment and blank line | |||
self.just_skip() | |||
# match namespace | |||
self.handle_namespace() | |||
# match template | |||
template_string = self.handle_template() | |||
# match class | |||
line = self.input_content[self.line_index] | |||
match_class = pattern_class.search(line) | |||
match_start = pattern_start.search(line) | |||
handle_class_result = self.handle_class(template_string, line, match_start, match_class) | |||
if handle_class_result == "continue": | |||
continue | |||
# match "}" | |||
handle_stack_result = self.handle_stack(match_start) | |||
if handle_stack_result == "continue": | |||
continue | |||
# handle func | |||
handle_func1_result, line, start_i = self.handle_func1(line) | |||
if handle_func1_result == "continue": | |||
continue | |||
# here means func is found | |||
# delete key word | |||
line = pattern_keyword.sub('', line) | |||
logging.info("line[%s]", line) | |||
# Class member function | |||
# if friend we will not add class name | |||
friend_match = re.search('friend ', line) | |||
if len(self.stack_class) > 0 and not friend_match: | |||
line, func_name = self.handle_class_member_func(line, template_string) | |||
# Normal functions | |||
else: | |||
line, func_name = self.handle_normal_func(line, template_string) | |||
need_generate = need_generate_func(line) | |||
# func body | |||
line += self.implement_function(line) | |||
# comment | |||
line = self.gen_comment(start_i) + line | |||
# write to out file | |||
self.write_func_content(line, func_name, need_generate) | |||
# next loop | |||
self.line_index += 1 | |||
logging.info('Added %s functions', len(self.func_list_exist)) | |||
logging.info('Successfully converted,please see ' + self.output_file) | |||
def handle_func1(self, line): | |||
""" | |||
:param line: | |||
:return: | |||
""" | |||
find1 = re.search('[(]', line) | |||
if not find1: | |||
self.line_index += 1 | |||
return "continue", line, None | |||
find2 = re.search('[)]', line) | |||
start_i = self.line_index | |||
space_match = pattern_leading_space.search(line) | |||
# deal with | |||
# int abc(int a, | |||
# int b) | |||
if find1 and (not find2): | |||
self.line_index += 1 | |||
line2 = self.input_content[self.line_index] | |||
if space_match: | |||
line2 = re.sub('^' + space_match.group(1), '', line2) | |||
line += line2 | |||
while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): | |||
self.line_index += 1 | |||
line2 = self.input_content[self.line_index] | |||
line2 = re.sub('^' + space_match.group(1), '', line2) | |||
line += line2 | |||
match_start = pattern_start.search(self.input_content[self.line_index]) | |||
match_end = pattern_end.search(self.input_content[self.line_index]) | |||
if match_start: # like ) { or ) {} int the last line | |||
if not match_end: | |||
self.stack.append('normal_now') | |||
ii = start_i | |||
while ii <= self.line_index: | |||
ii += 1 | |||
self.line_index += 1 | |||
return "continue", line, start_i | |||
logging.info("line[%s]", line) | |||
# ' int abc();'->'int abc()' | |||
(line, match) = pattern_func.subn(r'\2\n', line) | |||
logging.info("line[%s]", line) | |||
# deal with case: | |||
# 'int \n abc(int a, int b)' | |||
if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): | |||
line = self.input_content[start_i - 1] + line | |||
line = line.lstrip() | |||
if not match: | |||
self.line_index += 1 | |||
return "continue", line, start_i | |||
return "pass", line, start_i | |||
def handle_stack(self, match_start): | |||
""" | |||
:param match_start: | |||
:return: | |||
""" | |||
line = self.input_content[self.line_index] | |||
match_end = pattern_end.search(line) | |||
if match_start: | |||
self.stack.append('normal_now') | |||
if match_end: | |||
top_status = self.stack.pop() | |||
if top_status == 'namespace_now': | |||
self.output_fd.write(line + '\n') | |||
elif top_status == 'class_now': | |||
self.stack_class.pop() | |||
self.stack_template.pop() | |||
if match_start or match_end: | |||
self.line_index += 1 | |||
return "continue" | |||
if len(self.stack) > 0 and self.stack[-1] == 'normal_now': | |||
self.line_index += 1 | |||
return "continue" | |||
return "pass" | |||
def handle_class(self, template_string, line, match_start, match_class): | |||
""" | |||
:param template_string: | |||
:param line: | |||
:param match_start: | |||
:param match_class: | |||
:return: | |||
""" | |||
if match_class: # we face a class | |||
self.stack_template.append(template_string) | |||
self.stack.append('class_now') | |||
class_name = match_class.group(3) | |||
# class template specializations: class A<u,Node<u> > | |||
if '<' in class_name: | |||
k = line.index('<') | |||
fit = 1 | |||
for ii in range(k + 1, len(line)): | |||
if line[ii] == '<': | |||
fit += 1 | |||
if line[ii] == '>': | |||
fit -= 1 | |||
if fit == 0: | |||
break | |||
class_name += line[k + 1:ii + 1] | |||
logging.info('class_name[%s]', class_name) | |||
self.stack_class.append(class_name) | |||
while not match_start: | |||
self.line_index += 1 | |||
line = self.input_content[self.line_index] | |||
match_start = pattern_start.search(line) | |||
self.line_index += 1 | |||
return "continue" | |||
return "pass" | |||
def handle_template(self): | |||
line = self.input_content[self.line_index] | |||
match_template = pattern_template.search(line) | |||
template_string = '' | |||
if match_template: | |||
match_template_end = pattern_template_end.search(line) | |||
template_string = line | |||
while not match_template_end: | |||
self.line_index += 1 | |||
line = self.input_content[self.line_index] | |||
template_string += line | |||
match_template_end = pattern_template_end.search(line) | |||
self.line_index += 1 | |||
return template_string | |||
def handle_namespace(self): | |||
line = self.input_content[self.line_index] | |||
match_namespace = pattern_namespace.search(line) | |||
if match_namespace: # we face namespace | |||
self.output_fd.write(line + '\n') | |||
self.stack.append('namespace_now') | |||
self.line_index += 1 | |||
def handle_normal_func(self, line, template_string): | |||
template_line = '' | |||
self.stack_template.append(template_string) | |||
if self.stack_template[-1] != '': | |||
template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) | |||
# change '< class T = a, class U = A(3)>' to '<class T, class U>' | |||
template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||
template_line = re.sub(r'\s*=.*,', ',', template_line) | |||
template_line = re.sub(r'\s*=.*', '', template_line) | |||
line = re.sub(r'\s*=.*,', ',', line) | |||
line = re.sub(r'\s*=.*\)', ')', line) | |||
line = template_line + line | |||
self.stack_template.pop() | |||
func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||
logging.info("line[%s]", line) | |||
logging.info("func_name[%s]", func_name) | |||
return line, func_name | |||
def handle_class_member_func(self, line, template_string): | |||
template_line = '' | |||
x = '' | |||
if template_string != '': | |||
template_string = re.sub(r'\s*template', 'template', template_string) | |||
template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) | |||
template_string = re.sub(r'\s*=.*,', ',', template_string) | |||
template_string = re.sub(r'\s*=.*', '', template_string) | |||
if self.stack_template[-1] != '': | |||
if not (re.search(r'<\s*>', stack_template[-1])): | |||
template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) | |||
if not (re.search(r'<.*>', self.stack_class[-1])): | |||
# for x we get like template<class T, typename U> -> <T,U> | |||
x = re.sub(r'template\s*<', '<', template_line) # remove template -> <class T, typename U> | |||
x = re.sub(r'\n', '', x) | |||
x = re.sub(r'\s*=.*,', ',', x) | |||
x = re.sub(r'\s*=.*\>', '>', x) | |||
x = x.rstrip() # remove \n | |||
x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '', | |||
x) # remove class,typename -> <T, U> | |||
x = re.sub(r'<\s+', '<', x) | |||
x = re.sub(r'\s+>', '>', x) | |||
x = re.sub(r'\s+,', ',', x) | |||
x = re.sub(r',\s+', ', ', x) | |||
line = re.sub(r'\s*=\s+0', '', line) | |||
line = re.sub(r'\s*=\s+.*,', ',', line) | |||
line = re.sub(r'\s*=\s+.*\)', ')', line) | |||
logging.info("x[%s]\nline[%s]", x, line) | |||
# if the function is long, void ABC::foo() | |||
# breaks into two lines void ABC::\n foo() | |||
temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) | |||
if len(temp_line) > max_code_len_per_line: | |||
line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) | |||
else: | |||
line = temp_line | |||
logging.info("line[%s]", line) | |||
# add template as the above if there is one | |||
template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) | |||
template_line = re.sub(r'\s*=.*,', ',', template_line) | |||
template_line = re.sub(r'\s*=.*', '', template_line) | |||
line = template_line + template_string + line | |||
func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() | |||
logging.info("line[%s]", line) | |||
logging.info("func_name[%s]", func_name) | |||
return line, func_name | |||
def write_func_content(self, content, func_name, need_generate): | |||
if not (func_name in self.func_list_exist) and need_generate: | |||
self.output_fd.write(content) | |||
self.func_list_exist.append(func_name) | |||
logging.info('add func:[%s]', func_name) | |||
def gen_comment(self, start_i): | |||
comment_line = '' | |||
# Function comments are on top of function declarations, copy them over | |||
k = start_i - 1 # one line before this func start | |||
if pattern_template.search(self.input_content[k]): | |||
k -= 1 | |||
if pattern_comment_2_end.search(self.input_content[k]): | |||
comment_line = self.input_content[k].lstrip() | |||
while not pattern_comment_2_start.search(self.input_content[k]): | |||
k -= 1 | |||
comment_line = self.input_content[k].lstrip() + comment_line | |||
else: | |||
for j in range(k, 0, -1): | |||
c_line = self.input_content[j] | |||
if pattern_comment.search(c_line): | |||
c_line = re.sub(r'\s*//', '//', c_line) | |||
comment_line = c_line + comment_line | |||
else: | |||
break | |||
return comment_line | |||
@staticmethod | |||
def implement_function(func): | |||
function_def = '' | |||
function_def += '{\n' | |||
all_items = func.split() | |||
start = 0 | |||
return_type = all_items[start] | |||
if return_type == "const": | |||
start += 1 | |||
return_type = all_items[start] | |||
if return_type.startswith(('std::map', 'std::set', 'std::vector')): | |||
return_type = "std::map" | |||
if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): | |||
return_type = "Ptr" | |||
if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): | |||
return_type += "&" | |||
if RETURN_STATEMENTS.__contains__(return_type): | |||
function_def += RETURN_STATEMENTS[return_type] | |||
else: | |||
logging.warning("Unhandled return type[%s]", return_type) | |||
function_def += '\n' | |||
function_def += '}\n' | |||
function_def += '\n' | |||
return function_def | |||
def collect_header_files(path): | |||
""" | |||
:param path: | |||
:return: | |||
""" | |||
header_files = [] | |||
shared_includes_content = [] | |||
for root, dirs, files in os.walk(path): | |||
files.sort() | |||
for file in files: | |||
if file.find("git") >= 0: | |||
continue | |||
if not file.endswith('.h'): | |||
continue | |||
file_path = os.path.join(root, file) | |||
file_path = file_path.replace('\\', '/') | |||
header_files.append(file_path) | |||
include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) | |||
shared_includes_content.append(include_str) | |||
return header_files, shared_includes_content | |||
def generate_stub_file(inc_dir, out_cc_dir): | |||
""" | |||
:param inc_dir: | |||
:param out_cc_dir: | |||
:return: | |||
""" | |||
target_header_files, shared_includes_content = collect_header_files(inc_dir) | |||
for header_file in target_header_files: | |||
if not file_endswith_white_list_suffix(header_file): | |||
continue | |||
cc_file = re.sub('.h*$', '.cc', header_file) | |||
h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) | |||
h_2_cc.h2cc() | |||
def gen_code(inc_dir, out_cc_dir): | |||
""" | |||
:param inc_dir: | |||
:param out_cc_dir: | |||
:return: | |||
""" | |||
if not inc_dir.endswith('/'): | |||
inc_dir += '/' | |||
if not out_cc_dir.endswith('/'): | |||
out_cc_dir += '/' | |||
for include_dir_key_word in include_dir_key_words: | |||
generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) | |||
if __name__ == '__main__': | |||
inc_dir = sys.argv[1] | |||
out_cc_dir = sys.argv[2] | |||
gen_code(inc_dir, out_cc_dir) |
@@ -17,9 +17,10 @@ | |||
syntax = "proto3"; | |||
import "om.proto"; | |||
package domi; | |||
message FusionModelDef { | |||
string version = 1; | |||
repeated OpDef fusion_op = 2; | |||
} | |||
} |
@@ -1,3 +1,19 @@ | |||
/** | |||
* Copyright 2019 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "register/op_kernel_registry.h" | |||
namespace ge { | |||
@@ -1029,9 +1029,9 @@ REG_OP(BesselI1e) | |||
* y: A Tensor of type UnaryDataType. | |||
* @attention Constraints: | |||
* @li "base" is supposed to be greater than 0. Retaining the default \n | |||
* @li "base" is supposed to be greater than 0. Retaining the default | |||
* value "-1" sets "base" to "e". | |||
* @li If the input value of operator Log is within the range (0, 0.01] or \n | |||
* @li If the input value of operator Log is within the range (0, 0.01] or | |||
* [0.95, 1.05], the output accuracy is subject to change. | |||
* @par Third-party framework compatibility | |||
@@ -1047,11 +1047,11 @@ REG_OP(Log) | |||
.OP_END_FACTORY_REG(Log) | |||
/** | |||
* @brief Returns x1 * x2 element-wise.\n | |||
* @brief Returns x1 * x2 element-wise. | |||
* y = x1 * x2 | |||
* @par Inputs: | |||
* @li x1: A Tensor. Must be one of the following types: float16, float32,\n | |||
* @li x1: A Tensor. Must be one of the following types: float16, float32, | |||
* float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. | |||
* @li x2: A Tensor. Must be one of the following types: float16, float32, | |||
* float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128. | |||
@@ -1079,7 +1079,7 @@ REG_OP(Mul) | |||
.OP_END_FACTORY_REG(Mul) | |||
/** | |||
* @brief Computes the gradient of the square root of "x" with regard to its\n | |||
* @brief Computes the gradient of the square root of "x" with regard to its | |||
* input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding | |||
* input gradient. | |||
@@ -3022,6 +3022,7 @@ REG_OP(CosineEmbeddingLoss) | |||
*@brief Kullback-Leibler divergence. | |||
*@par Inputs: | |||
* Two inputs, including: | |||
*@li x: Tensor of arbitrary shape. | |||
*@li target: Tensor of the same shape and dtype as x. | |||
@@ -934,7 +934,6 @@ REG_OP(EncodeJpeg) | |||
/** | |||
*@brief PNG-encode an image. | |||
*@par Inputs: | |||
*Input image must be unit8 or uint16 type. Inputs include: \n | |||
*image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] \n | |||
@@ -1224,6 +1223,16 @@ REG_OP(CombinedNonMaxSuppression) | |||
.ATTR(clip_boxes, Bool, true) | |||
.OP_END_FACTORY_REG(CombinedNonMaxSuppression) | |||
REG_OP(SpatialTransformerD) | |||
.INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16})) | |||
.ATTR(output_size, ListInt, {-1, -1}) | |||
.ATTR(default_theta, ListFloat, {}) | |||
.ATTR(align_corners, Bool, false) | |||
.ATTR(use_default_theta, ListBool, {}) | |||
.OP_END_FACTORY_REG(SpatialTransformerD) | |||
} // namespace ge | |||
#endif // GE_OP_MAGE_OPS_H_ |
@@ -93,31 +93,49 @@ REG_OP(MatMulV2) | |||
*@par Inputs: | |||
*Five inputs, including: | |||
*@li a: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. Has format [FRACTAL_NZ]. | |||
*@li b: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. When type is int8, has format [FRACTAL_Z], \n otherwise has format [FRACTAL_NZ]. | |||
*@li c: A matrix Tensor. 2D or higher. Must be one of the following types: \n float16, int32, float32. When type is int32, has format [ND], \n otherwise has format [FRACTAL_NZ]. | |||
*@li alpha: A 1D Tensor. The shape of alpha is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND]. | |||
*@li beta: A 1D Tensor. The shape of beta is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND]. | |||
*@li a: A matrix Tensor. Must be one of the following types: float16, int8. | |||
* Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). | |||
*@li b: A matrix Tensor. Must be one of the following types: float16, int8. | |||
* Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z). | |||
*@li c: A matrix Tensor. Must be one of the following types: float16, int32, | |||
* float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ). | |||
*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following | |||
* types: float16, int32, float32. Has format [ND]. | |||
*@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following | |||
* types: float16, int32, float32. Has format [ND]. | |||
* The format of a, b, c has restriction:\n | |||
* When type of a is int8 and type of c is int32, the format of a, b, c should | |||
* all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n | |||
* When type of a is int8 and type of c is float32, the format of a, b, c should | |||
* all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n | |||
* When type of a is float16 and type of c is float16, the format of a, b, c | |||
* should all be ND or FRACTAL_NZ.\n | |||
* When type of a is float16 and type of c is float32, the format of a, b, c | |||
* should all be ND or FRACTAL_NZ. | |||
*@par Attributes: | |||
*Two attributes, including: | |||
*@li transpose_a: Optional. A bool.\n If True, changes the shape of "a" from [M, K] to [K, M].\n Reserved parameters, not used for now. | |||
*@li transpose_b: Optional. A bool.\n If True, changes the shape of "b" from [M, K] to [K, M].\n Reserved parameters, not used for now. | |||
*@li transpose_a: Optional. A bool. If True, changes the shape of "a" from | |||
* [M, K] to [K, M]. | |||
*@li transpose_b: Optional. A bool. If True, changes the shape of "b" from | |||
* [K, N] to [N, K]. | |||
*@par Outputs: | |||
*@out: The result matrix Tensor. 4D. Must be one of the following types:\n float16, float32, int32. Has format [FRACTAL_NZ]. | |||
*y: The result matrix Tensor. Must be one of the following types: float16, | |||
* float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a. | |||
* 2D(ND) or 4D(FRACTAL_NZ). | |||
*/ | |||
REG_OP(Gemm) | |||
REG_OP(GEMM) | |||
.INPUT(a, TensorType({DT_FLOAT16, DT_INT8})) | |||
.INPUT(b, TensorType({DT_FLOAT16, DT_INT8})) | |||
.INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(out, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) | |||
.ATTR(transpose_a, Bool, false) | |||
.ATTR(transpose_b, Bool, false) | |||
.OP_END_FACTORY_REG(Gemm) | |||
.OP_END_FACTORY_REG(GEMM) | |||
/** | |||
*@brief Multiplies matrix "a" by matrix "b", producing "a * b". | |||
@@ -361,14 +361,14 @@ REG_OP(BatchNormGradExt2) | |||
*@par Inputs: | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||
*@li momentum: An optional string, input x's Scale factor | |||
*@li variance: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the variance used for inference. | |||
*@li momentum: A Tensor of type float32 or float16, represents the mean and the variance's scale factor | |||
*@li scale: An optional tensor of type float16 or float32, no use | |||
*@li offset: An optional tensor of type float16 or float32, no use | |||
*@par Attributes: | |||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||
*@li use_global_stats: mean inference mode , only can be "True". | |||
*@li mode: An optional input, not use | |||
*@li mode: An optional attr, not use | |||
*@par Outputs:\n | |||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" | |||
*/ | |||
@@ -391,7 +391,7 @@ REG_OP(BNInference) | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||
*@li momentum: An optional float, input x's Scale factor | |||
*@li momentum: A Tensor of type float32 or float16, the mean and the variance's Scale factor | |||
*@par Attributes: | |||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||
*@li use_global_stats: mean inference mode , only can be "True". | |||
@@ -420,13 +420,13 @@ REG_OP(BnHost) | |||
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. | |||
*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. | |||
*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. | |||
*@li momentum: An optional float, input x's Scale factor | |||
*@li scale: An optional tensor of type float16 or float32, no use | |||
*@li offset: An optional tensor of type float16 or float32, no use | |||
*@par Attributes: | |||
*@li momentum: An optional float32 num, represents the mean and the variance's scale factor | |||
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". | |||
*@li use_global_stats: mean inference mode , only can be "True". | |||
*@li mode: An optional inpout, not use | |||
*@li mode: An optional attr, not use | |||
*@par Outputs:\n | |||
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" | |||
*/ | |||
@@ -62,7 +62,7 @@ namespace ge { | |||
* data is 5D with shape [N, C1, Ho, Wo, C0], | |||
* where C is the same as that of the feature map and C0 is 16.\n | |||
* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * | |||
* stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n | |||
* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. | |||
* @par Third-party framework compatibility | |||
* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. | |||
@@ -119,7 +119,7 @@ REG_OP(DepthwiseConv2DBackpropFilter) | |||
* data is 5D with shape [N, C1, Ho, Wo, C0], | |||
* where C is the same as that of the feature map and C0 is 16.\n | |||
* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * | |||
* stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n | |||
* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512. | |||
* @par Third-party framework compatibility | |||
* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. | |||
@@ -178,7 +178,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD) | |||
* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the | |||
* data is 5D with shape [N, C1, Ho, Wo, C0], | |||
* where C is the same as that of the feature map and C0 is 16.\n | |||
* Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * | |||
* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * | |||
* Wf * C0 * C0 * 2) / (2 * Wo *C0).\n | |||
* @par Third-party framework compatibility | |||
@@ -235,7 +235,7 @@ REG_OP(DepthwiseConv2DBackpropInput) | |||
* Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the | |||
* data is 5D with shape [N, C1, Ho, Wo, C0], | |||
* where C is the same as that of the feature map and C0 is 16.\n | |||
* Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * | |||
* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf * | |||
* Wf * C0 * C0 * 2) / (2 * Wo *C0).\n | |||
* @par Third-party framework compatibility | |||
@@ -459,45 +459,44 @@ REG_OP(Conv2DBackpropInputD) | |||
*@brief Computes the Deconvolution with respect to the input. | |||
*@par Inputs: | |||
* Three inputs: | |||
* @li x: A Tensor. Must have the same type as "filter". 4D with shape | |||
* [batch, out_height, out_width, out_channels] | |||
* or [batch, out_channels, out_height, out_width]. Gradients with respect | |||
* @li x: A Tensor of type float16 or int8. 4D with shape | |||
* [batch, out_channels, out_height, out_width]. Gradients with respect | |||
* to the output of the convolution. | |||
* @li filter: A Tensor of type float16. | |||
* 4D with shape [filter_height, filter_width, in_channels, out_channels], | |||
* or [out_channels, filter_height, filter_width, in_channels], | |||
* or [out_channels, in_channel, filter_height, filter_width]. | |||
* @li filter: A Tensor. Must have the same type as "x". | |||
* 4D with shape [out_channels, in_channel, filter_height, filter_width].\n | |||
* Two optional inputs: | |||
* @li bias: An optional tensor of type float16 | |||
* @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved.\n | |||
* @li bias: An optional tensor. Must have the same type as "y". | |||
* @li offset_w: An optional 1D tensor for quantized deconvolution. | |||
* Type is int8. Reserved.\n | |||
*@par Attributes: | |||
* Six attributes: | |||
* @li strides: A tuple or list of 2 integers. The stride of the sliding window | |||
* for H/W dimension. | |||
* @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] | |||
* padding on the feature map | |||
* padding on the feature map. | |||
* @li dilations: A tuple or list of 4 integers. The dilation factor for each | |||
* dimension of input. Must be [1, 1, 1, 1]. | |||
* @li groups: Number of blocked connections from input channels to \n | |||
output channels. | |||
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".\n | |||
* @li groups: Number of blocked connections from input channels to | |||
output channels. Defaults to "1". | |||
* @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n | |||
Specify the data format of the input and output data. | |||
* @li offset_x: An optional integer for quantized deconvolution. | |||
* @li offset_x: An optional integer for quantized deconvolution. Defaults to "0". | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as "filter". 4D tensor with shape | |||
* [batch, height, width, channels] or [batch, channels, height, width]. | |||
* y: A Tensor. 4D tensor with shape [batch, channels, height, width]. | |||
* When type of x is float16, the type of y must be float16. | |||
* When type of x is int8, the type of y must be int32. | |||
*/ | |||
REG_OP(Deconvolution) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||
.ATTR(strides, ListInt, {1, 1, 1, 1}) | |||
.ATTR(pads, ListInt, {0, 0, 0, 0}) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32})) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
.ATTR(groups, Int, 1) | |||
.ATTR(data_format, String, "NHWC") | |||
.ATTR(data_format, String, "NCHW") | |||
.ATTR(offset_x, Int, 0) | |||
.OP_END_FACTORY_REG(Deconvolution) | |||
/** | |||
@@ -554,7 +553,7 @@ REG_OP(Conv2DBackpropFilter) | |||
* @li groups: Number of blocked connections from input channels to output channels. | |||
* @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. | |||
*@par Outputs: | |||
* y: A Tensor. Has the same type as x | |||
* y: A Tensor. Type is float32 | |||
*@par Third-party framework compatibility | |||
* Compatible with Tensorflow's conv2d_backprop_filter | |||
*/ | |||
@@ -586,8 +585,6 @@ REG_OP(Conv2DBackpropFilterD) | |||
|---------|---------|---------|----------|-------- | |||
| float32 | float32 | float32 | _ | float32 | |||
|---------|---------|---------|----------|-------- | |||
| float64 | float64 | float64 | _ | float64 | |||
|---------|---------|---------|----------|-------- | |||
| int8 | int8 | int32 | int8 | int32 | |||
-----------|---------|---------|---------|----------|-------- | |||
Format | NCHW | NCHW | ND | ND | NCHW | |||
@@ -607,7 +604,7 @@ REG_OP(Conv2DBackpropFilterD) | |||
* for dilated convolution. Has the same dimension order and value as "strides". | |||
* @li groups: Number of blocked connections from input channels to output | |||
* channels. Input channels and output channels must both be divisible by | |||
* "groups". Must be set to 1. | |||
* "groups". | |||
* @li offset_x: An optional integer for quantized convolution. | |||
* @li data_format: An optional string from: "NHWC", "NCHW". Specifying the | |||
* data format of the input and output images. Reserved. | |||
@@ -642,7 +639,7 @@ REG_OP(Conv2DBackpropFilterD) | |||
* @verbatim | |||
Output | Restrictions | |||
------------------|---------------------------------------------- | |||
W dimension == 1 | HxW(input) == HxW(filter) == 1x1,2x2...11x11. | |||
W dimension == 1 | HxW(input) == HxW(filter) | |||
H dimension == 1 | | |||
------------------|---------------------------------------------- | |||
W dimension == 1 | Not supported | |||
@@ -659,11 +656,11 @@ REG_OP(Conv2DBackpropFilterD) | |||
*@li Compatible with the Caffe operator 2D "Convolution". | |||
*/ | |||
REG_OP(Conv2D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||
.INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8})) | |||
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) | |||
.REQUIRED_ATTR(strides, ListInt) | |||
.REQUIRED_ATTR(pads, ListInt) | |||
.ATTR(dilations, ListInt, {1, 1, 1, 1}) | |||
@@ -186,7 +186,7 @@ REG_OP(ROIAlignGrad) | |||
* Three inputs, including: \n | |||
*@li features: A 5HD Tensor of type float32 or float16. | |||
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, | |||
* "x0", "x1", "y0", and "y1". | |||
* "x0", "y0", "x1", and "y1". | |||
*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved. | |||
*@par Attributes: | |||
@@ -219,7 +219,7 @@ REG_OP(MaxPool3D) | |||
* @attention Constraints: | |||
* @li Computing gradients of global pooling is not supported, which means | |||
* "ksize < x1". | |||
* @li "ksiez" is in the range [1, 255]. "strides" is in the range [1, 63] | |||
* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator MaxPoolGrad. | |||
@@ -239,10 +239,9 @@ REG_OP(MaxPoolGrad) | |||
* @brief Computes second-order gradients of the maxpooling function. | |||
* @par Inputs: | |||
* @li x1: Original forward input tensor. Supported type:float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||
* @li x2: Has the same type and format as input "x1". | |||
* @li grad:Has the same type and format as input "x1". | |||
* @li x1: Original forward input tensor of type RealNumberType | |||
* @li x2: Original forward output tensor of type RealNumberType | |||
* @li grad: Gradient tensor of type RealNumberType | |||
* @par Attributes: | |||
* @li ksize: A required list or tuple, | |||
@@ -258,9 +257,12 @@ REG_OP(MaxPoolGrad) | |||
* @li "x1" and "grads" must have the same shape. | |||
* @li "x2" and "y" must have the same shape. Otherwise, an error is reported. | |||
* @li "x1", "x2", "grads", and "y" must be 5D tensors. | |||
* @li ksize[H] and ksize[W] is in the range [1, 255]. | |||
* @li strides[H] and strides[W] is in the range [1, 63]. | |||
* @li Other dimensions of ksize and strides is 1. | |||
* @par Outputs: | |||
* @li y: Has the same type and format as input "x1". | |||
* @li y: Result tensor of type RealNumberType | |||
* @par Third-party framework compatibility | |||
* @li Compatible with the TensorFlow operator MaxPoolGradGrad. | |||
@@ -399,18 +401,15 @@ REG_OP(MaxPoolGradWithArgmax) | |||
* @brief Computes second-order gradients of the maxpooling function. | |||
* @par Inputs: | |||
* @li x: Original forward input tensor. Supported type: float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||
* @li grad: Gradient tensor. Supported type: float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64. | |||
* @li argmax: An tensor of type int32 or int64. | |||
* @li x: Original forward input tensor of type RealNumberType | |||
* @li grad: Gradient tensor of type RealNumberType | |||
* @li argmax: An tensor of type IndexNumberType | |||
* @par Attributes: | |||
* @li ksize: A required list, specifying the size of the sliding window. | |||
* @li strides: A required list, specifying the stride of the sliding window. | |||
* @li padding: A required string, window sliding mode. Either SAME or VALID. | |||
* @par Outputs: | |||
* @li y:Result tensor. Supported type: float, double, int32, | |||
* uint8, int16, int8, int64, uint16, half, uint32, uint64 | |||
* @li y:Result tensor of type RealNumberType | |||
* @attention Constraints: | |||
* @li Only the cloud platform is supported. | |||
@@ -41,7 +41,7 @@ namespace ge { | |||
*@li beta1: A scalar. Has the same type as "var". | |||
*@li beta2: A scalar. Has the same type as "var". | |||
*@li epsilon: A scalar. Has the same type as "var". | |||
*@li grad: A tensor for the gradient. Has the same type as "var". | |||
*@li grad: A tensor for the gradient. Has the same type as "var". | |||
* | |||
*@par Attributes: | |||
* use_locking: An optional bool. Defaults to "False". | |||
@@ -465,7 +465,7 @@ REG_OP(ApplyKerasMomentumD) | |||
/** | |||
*@brief Updates '*var' according to the Adam algorithm.. | |||
*@brief Updates '*var' according to the Adam algorithm. | |||
* lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t) | |||
* m_t := beta_1 * m_{t-1} + (1 - beta_1) * g | |||
* v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g | |||
@@ -866,7 +866,7 @@ REG_OP(ApplyCenteredRMSProp) | |||
.OUTPUT(var, TensorType::NumberType()) | |||
.ATTR(use_locking, Bool, false) | |||
.OP_END_FACTORY_REG(ApplyCenteredRMSProp) | |||
/** | |||
*@brief Updates "var" according to the centered RMSProp algorithm. | |||
* The centered RMSProp algorithm uses an estimate of the centered second moment | |||
@@ -1262,7 +1262,7 @@ REG_OP(DataFormatDimMap) | |||
.OP_END_FACTORY_REG(DataFormatDimMap) | |||
/** | |||
* @brief Implements stochastic gradient descent (optionally with momentum).\n | |||
* @brief Implements stochastic gradient descent (optionally with momentum). | |||
* Nesterov momentum is based on the formula from | |||
* On the importance of initialization and momentum in deep learning.\n | |||
@@ -1508,7 +1508,7 @@ REG_OP(ApplyProximalAdagradD) | |||
*@par Attributes: | |||
*use_locking: An optional bool. Defaults to "False".\n | |||
* If "True", updating of the var and accum tensors will be protected by a lock; \n | |||
* If "False", the behavior is undefined, but may exhibit less contention. | |||
* If "False", the behavior is undefined, but may exhibit less contention. | |||
*@par Outputs: | |||
*var: A mutable Tensor. Has the same type as "var". | |||
@@ -2172,13 +2172,13 @@ REG_OP(SparseApplyFtrl) | |||
* Should be a Variable Tensor. | |||
* @li grad: A Tensor of the same type as "var", for the gradient. | |||
* @li indices: A vector of indices into the first dimension of var and accum. | |||
* @par Attributes: | |||
* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. | |||
* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar. | |||
* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar. | |||
* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar. | |||
* @par Attributes: | |||
* use_locking: An optional bool. Defaults to "False". | |||
* @li use_locking: An optional bool. Defaults to "False". | |||
* If "True", updating of the "var" and "accum" tensors will be | |||
* protected by a lock; otherwise the behavior is undefined, | |||
* but may exhibit less contention. | |||
@@ -2314,6 +2314,7 @@ REG_OP(SparseApplyFtrlV2D) | |||
* var <- var - mom\n | |||
* | |||
* @par Inputs: | |||
* Nine inputs, including: | |||
* @li var: A mutable tensor. Must be one of the data types defined in\n | |||
* TensorType::NumberType(). Should be from a Variable(). | |||
* @li ms: A mutable tensor. Must have the same type as "var". Should be from a | |||
@@ -2367,6 +2368,7 @@ REG_OP(SparseApplyRMSProp) | |||
* var <- var - mom | |||
* | |||
* @par Inputs: | |||
* Six inputs, including: | |||
* @li var: A mutable tensor. Must be one of the data types defined in | |||
* TensorType::NumberType(). Should be from a Variable(). | |||
* @li ms: A mutable tensor. Must have the same type as "var". Should be from a | |||
@@ -2418,6 +2420,7 @@ REG_OP(SparseApplyRMSPropD) | |||
* accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n | |||
* | |||
* @par Inputs: | |||
* Eight inputs, including: | |||
* @li var: A mutable tensor. Must be one of the data types defined in\n | |||
* TensorType::NumberType(). Should be from a Variable(). | |||
* @li accum: A mutable tensor. Must have the same type as "var". Should be from a | |||
@@ -2468,6 +2471,7 @@ REG_OP(SparseApplyAdadelta) | |||
* accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n | |||
* | |||
* @par Inputs: | |||
* Seven inputs, including: | |||
* @li var: A mutable tensor. Must be one of the data types defined in | |||
* TensorType::NumberType(). Should be from a Variable(). | |||
* @li accum: A mutable tensor. Must have the same type as "var". Should be from a | |||
@@ -203,11 +203,11 @@ REG_OP(Sigmoid) | |||
* @brief Computes z = (y - y*y)*dy. | |||
* @par Inputs: | |||
* @li y: the input is tensor , dtype is UnaryDataType. | |||
* @li dy the input is tensor , dtype is UnaryDataType. | |||
* @li y: The input is Tensor, dtype is UnaryDataType. | |||
* @li dy: The input is Tensor, dtype is UnaryDataType. | |||
* @par Outputs: | |||
* z: the shape of output, dtype is UnaryDataType. | |||
* z: The shape of output, dtype is UnaryDataType. | |||
*/ | |||
REG_OP(SigmoidGrad) | |||
.INPUT(y, TensorType(UnaryDataType)) | |||
@@ -21,17 +21,17 @@ | |||
namespace ge { | |||
/** | |||
* @brief Dequantizes the input tensor into a float tensor.\n | |||
* [input_min_range, input_max_range] are scalar floats that specify the range | |||
* for "output_data". \n | |||
* @brief Dequantizes the input tensor into a float tensor. | |||
* [min_range, max_range] are float32 tensors that specify the range | |||
* for "y". \n | |||
* The "mode" attribute controls exactly which calculations are used to convert\n | |||
* the float values to their quantized equivalents. | |||
* @par Inputs: | |||
* @li input_data: A Tensor. Must be one of the following types: int8, uint8, | |||
* @li x: A Tensor. Must be one of the following types: int8, uint8, | |||
* int32. | |||
* @li input_min_range: A Tensor of type float32. | |||
* @li min_range: A Tensor of type float32. | |||
* Specifies the minimum scalar value possibly produced for the input. | |||
* @li input_max_range: A Tensor of type float32. | |||
* @li max_range: A Tensor of type float32. | |||
* Specifies the maximum scalar value possibly produced for the input. | |||
* @par Attributes: | |||
@@ -39,11 +39,11 @@ namespace ge { | |||
* Defaults to "MIN_COMBINED". | |||
* @par Outputs: | |||
* output_data: A dictionary of type float32. | |||
* y: A dictionary of type float32. | |||
* @attention Constraints: | |||
* @li "input_min_range" and "input_max_range" have the same shapes. | |||
* @li "input_data" and "output_data" have the same shapes. | |||
* @li "min_range" and "max_range" have the same shapes. | |||
* @li "x" and "y" have the same shapes. | |||
* @par Third-party framework compatibility | |||
* Compatible with the TensorFlow operator Dequantize. | |||
@@ -149,7 +149,7 @@ REG_OP(TileD) | |||
* @li indices: A Tensor of type IndexNumberType. | |||
* @par Outputs: | |||
* output: A Tensor of type BasicType. | |||
* y: A Tensor of type BasicType. | |||
* @see GatherNd() | |||
* @attention Constraints: | |||
@@ -767,6 +767,7 @@ REG_OP(SliceD) | |||
* dimension. | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li x: A 1D or higher tensor of type float16, with the last dimension at | |||
* least "k". | |||
* Specifies the data to sort. | |||
@@ -789,7 +790,7 @@ REG_OP(SliceD) | |||
* @li indices: A Tensor of type int32, specifying the indices of sorted data. | |||
* @attention Constraints: | |||
* @li k =< 4096 | |||
* @li k =< 5120 | |||
* @li Size of the last dimension =< 65500 | |||
* @li sorted = true | |||
* @li Don't support to get score on the platform of Ascend310 | |||
@@ -813,6 +814,7 @@ REG_OP(TopKD) | |||
* dimension. | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li x: A 1D or higher tensor of type BasicType, with the last dimension | |||
* at least "k". | |||
* @li k: A 0D Tensor of type int32.\n | |||
@@ -902,8 +904,8 @@ REG_OP(ScatterNdD) | |||
* @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids. | |||
* @par Attributes: | |||
* @li k: A required int32, specifying the number of top elements to look at for | |||
* computing precision. | |||
* @li k: A required IndexNumberType, specifying the number of top elements to | |||
* look at for computing precision. | |||
* @par Outputs: | |||
* y: A Tensor of type bool. | |||
@@ -1000,6 +1002,7 @@ REG_OP(StridedSliceAssign) | |||
* "strides", etc. work exactly as in "StridedSlice". | |||
* @par Inputs: | |||
* Two inputs, including: | |||
* @li var: A mutable ND Tensor of type BasicType. | |||
* @li input_value: A mutable ND "Tensor" of type BasicType. | |||
@@ -1335,7 +1338,7 @@ REG_OP(InplaceSubD) | |||
.OP_END_FACTORY_REG(InplaceSubD) | |||
/** | |||
* @brief Applies sparse addition to input "x" using individual values or slices\n | |||
* @brief Applies sparse addition to input "x" using individual values or slices | |||
* from "updates" according to "indices". The updates are non-aliasing: "x" is\n | |||
* only modified in-place if no other operations will use it. Otherwise, a copy\n | |||
* of "x" is made. This operation has a gradient with respect to both "x" and | |||
@@ -1372,7 +1375,7 @@ REG_OP(ScatterNonAliasingAdd) | |||
* @li x: A Tensor of type RealNumberType. | |||
* @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix | |||
* of "x.shape". | |||
* @li k: A Tensor. | |||
* @li num_segments: A Tensor of type IndexNumberType. | |||
* @par Outputs: | |||
* y: A Tensor of type RealNumberType. | |||
@@ -1419,13 +1422,13 @@ REG_OP(UnsortedSegmentMinD) | |||
* @par Inputs: | |||
* Three inputs, including: | |||
* @li x: A Tensor of type RealNumberType. | |||
* @li x: A Tensor of type NumberType. | |||
* @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix | |||
* of "x.shape". | |||
* @li k: A Tensor. | |||
* @li num_segments: A Tensor of type IndexNumberType. | |||
* @par Outputs: | |||
* y: A Tensor of type RealNumberType. | |||
* y: A Tensor of type NumberType. | |||
* @see UnsortedSegmentSum(), UnsortedSegmentMin(), | |||
@@ -20,19 +20,38 @@ | |||
#include "graph/operator_reg.h" | |||
namespace ge { | |||
/** | |||
*@brief Convert tensor format from HWCN to C1HWNCoC0. | |||
*@par Inputs: | |||
*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN. | |||
*@par Outputs: | |||
*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0. | |||
*/ | |||
REG_OP(DepthwiseWeight4DTo6D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | |||
.OP_END_FACTORY_REG(DepthwiseWeight4DTo6D) | |||
/** | |||
*@brief Convert tensor format from C1HWNCoC0 to HWCN. | |||
*@par Inputs: | |||
*x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0. | |||
*@par Attributes: | |||
*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN. | |||
*@par Outputs: | |||
*y: A 4D Tensor. Has the same type as "x", with format HWCN. | |||
*/ | |||
REG_OP(DepthwiseWeight6DTo4D) | |||
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | |||
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16})) | |||
.ATTR(channel_size, Int, 16) | |||
.OP_END_FACTORY_REG(DepthwiseWeight6DTo4D) | |||
/** | |||
*@brief Permutes the dimensions according to perm.\n | |||
The returned tensor's dimension i will correspond to the input dimension perm[i]. | |||
@@ -390,20 +409,20 @@ REG_OP(SpaceToBatchD) | |||
.OP_END_FACTORY_REG(SpaceToBatchD) | |||
/** | |||
* @brief Unpacks the given dimension of a rank-R tensor "x" into rank-(R-1) | |||
* @brief Unpacks the given dimension of a rank-R Tensor "x" into rank-(R-1) | |||
* tensors. | |||
* @par Inputs: | |||
* x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0. | |||
* @par Attributes: | |||
* @li num: An optional int, specifying the number of tensors to be unpacked to. | |||
* @li num: A required int, specifying the number of tensors to be unpacked to. | |||
* Defaults to "None". | |||
* @li axis: A required int, specifying the axis to unpack along. The value range | |||
* @li axis: An optional int, specifying the axis to unpack along. The value range | |||
* is [-R, R). | |||
* @par Outputs: | |||
* y: The list of Tensor objects unpacked from "x", of type BasicType. | |||
* y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType. | |||
* @attention Constraints: | |||
* @li If "num" is not specified, it is inferred from the shape of "x". | |||
@@ -434,11 +453,11 @@ REG_OP(Unpack) | |||
* dimension of images. | |||
* @li strides: A required list or tuple. How far the centers of two consecutive | |||
* patches are in the images. Must be: [1, stride_rows, stride_cols, 1]. | |||
* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1]. \n | |||
* This is the input stride, specifying how far two consecutive patch \n | |||
* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1].\n | |||
* This is the input stride, specifying how far two consecutive patch\n | |||
* samples are in the input. Equivalent to extracting patches | |||
* with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n | |||
* (rates - 1), followed by subsampling them spatially by a factor of rates. \n | |||
* (rates - 1), followed by subsampling them spatially by a factor of rates.\n | |||
* This is equivalent to rate in dilated (a.k.a. Atrous) convolutions. | |||
* @li padding: A required string. The type of padding algorithm to use. | |||
@@ -59,6 +59,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { | |||
domi::ParseParamFunc GetParseParamFunc(const std::string &op_type); | |||
domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &op_type); | |||
domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type); | |||
domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); | |||
@@ -73,6 +75,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { | |||
std::unordered_map<std::string, std::set<std::string>> op_ori_optype_map_; | |||
std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_; | |||
std::unordered_map<std::string, ParseParamFunc> opParseParamsFnMap_; | |||
std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_; | |||
std::unordered_map<std::string, FusionParseParamFunc> fusionOpParseParamsFnMap_; | |||
std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_; | |||
std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_; | |||
@@ -100,6 +100,14 @@ RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx); | |||
/** | |||
* @ingroup rt_context | |||
* @brief returns the primary context of device. | |||
* @param [out] ctx returned context | |||
* @return RT_ERROR_NONE for ok | |||
*/ | |||
RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t device, rtContext_t *ctx); | |||
/** | |||
* @ingroup rt_context | |||
* @brief returns the device ID for the current context | |||
* @param [out] device returned device id | |||
* @return RT_ERROR_NONE for ok | |||
@@ -277,6 +277,7 @@ extern int dlog_setlevel(int moduleId, int level, int enableEvent); | |||
/** | |||
* @ingroup slog | |||
* @brief CheckLogLevel: check module level enable or not | |||
* users no need to call it because all dlog interface(include inner interface) has already called | |||
* | |||
* @param [in]moduleId: module id, eg: CCE | |||
* @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG | |||
@@ -291,46 +292,76 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||
* @param [in]moduleId: module id, eg: CCE | |||
* @param [in]fmt: log content | |||
*/ | |||
#define dlog_error(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
#define dlog_error(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
/** | |||
* @ingroup slog | |||
* @brief dlog_warn: print warning log | |||
* call CheckLogLevel in advance to optimize performance, call interface with fmt input take time | |||
* | |||
* @param [in]moduleId: module id, eg: CCE | |||
* @param [in]fmt: log content | |||
*/ | |||
#define dlog_warn(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||
#define dlog_warn(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
#else | |||
#define dlog_warn(moduleId, fmt, ...) \ | |||
do { \ | |||
if(CheckLogLevel(moduleId, DLOG_WARN) == 1) { \ | |||
DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
#endif | |||
/** | |||
* @ingroup slog | |||
* @brief dlog_info: print info log | |||
* call CheckLogLevel in advance to optimize performance, call interface with fmt input take time | |||
* | |||
* @param [in]moduleId: module id, eg: CCE | |||
* @param [in]fmt: log content | |||
*/ | |||
#define dlog_info(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||
#define dlog_info(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
#else | |||
#define dlog_info(moduleId, fmt, ...) \ | |||
do { \ | |||
if(CheckLogLevel(moduleId, DLOG_INFO) == 1) { \ | |||
DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
#endif | |||
/** | |||
* @ingroup slog | |||
* @brief dlog_debug: print debug log | |||
* call CheckLogLevel in advance to optimize performance, call interface with fmt input take time | |||
* | |||
* @param [in]moduleId: module id, eg: CCE | |||
* @param [in]fmt: log content | |||
*/ | |||
#define dlog_debug(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||
#define dlog_debug(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
#else | |||
#define dlog_debug(moduleId, fmt, ...) \ | |||
do { \ | |||
if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) { \ | |||
DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
#endif | |||
/** | |||
* @ingroup slog | |||
@@ -339,9 +370,9 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||
* @param [in]moduleId: module id, eg: CCE | |||
* @param [in]fmt: log content | |||
*/ | |||
#define dlog_event(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
#define dlog_event(moduleId, fmt, ...) \ | |||
do { \ | |||
DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
/** | |||
@@ -352,10 +383,19 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||
* @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) | |||
* @param [in]fmt: log content | |||
*/ | |||
#define Dlog(moduleId, level, fmt, ...) \ | |||
do { \ | |||
DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||
#define Dlog(moduleId, level, fmt, ...) \ | |||
do { \ | |||
DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
#else | |||
#define Dlog(moduleId, level, fmt, ...) \ | |||
do { \ | |||
if(CheckLogLevel(moduleId, level) == 1) { \ | |||
DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
#endif | |||
/** | |||
* @ingroup slog | |||
@@ -366,10 +406,19 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||
* @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) | |||
* @param [in]fmt: log content | |||
*/ | |||
#define DlogSub(moduleId, submodule, level, fmt, ...) \ | |||
do { \ | |||
DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||
#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||
#define DlogSub(moduleId, submodule, level, fmt, ...) \ | |||
do { \ | |||
DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||
} while (0) | |||
#else | |||
#define DlogSub(moduleId, submodule, level, fmt, ...) \ | |||
do { \ | |||
if(CheckLogLevel(moduleId, level) == 1) { \ | |||
DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
#endif | |||
/** | |||
* @ingroup slog | |||
@@ -381,11 +430,19 @@ extern int CheckLogLevel(int moduleId, int logLevel); | |||
* @param [in]kvNum: key-value element num in array | |||
* @param [in]fmt: log content | |||
*/ | |||
#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ | |||
do { \ | |||
DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD | |||
#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ | |||
do { \ | |||
DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} while (0) | |||
#else | |||
#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...) \ | |||
do { \ | |||
if(CheckLogLevel(moduleId, level) == 1) { \ | |||
DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ | |||
} \ | |||
} while (0) | |||
#endif | |||
/** | |||
* @ingroup slog | |||
@@ -1,10 +1,9 @@ | |||
diff -Npur bounds_checking_function/CMakeLists.txt securec/CMakeLists.txt | |||
--- bounds_checking_function/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 | |||
+++ securec/CMakeLists.txt 2020-05-11 17:10:49.406735400 +0800 | |||
@@ -0,0 +1,19 @@ | |||
diff -Npur -x .git libboundscheck/CMakeLists.txt securec/CMakeLists.txt | |||
--- libboundscheck/CMakeLists.txt 1970-01-01 08:00:00.000000000 +0800 | |||
+++ securec/CMakeLists.txt 2020-09-19 16:53:48.689460700 +0800 | |||
@@ -0,0 +1,18 @@ | |||
+cmake_minimum_required(VERSION 3.14) | |||
+project(Securec) | |||
+set(CMAKE_BUILD_TYPE "Debug") | |||
+set(CMAKE_C_FLAGS_DEBUG "$ENV{CFLAGS} -fPIC -O0 -Wall -Wno-deprecated-declarations -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)='") | |||
+set(CMAKE_C_FLAGS_RELEASE "$ENV{CFLAGS} -fPIC -O3 -Wall -Wno-deprecated-declarations") | |||
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | |||