diff --git a/CMakeLists.txt b/CMakeLists.txt index 41aad1a9..9a9a7a9d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ if (ENABLE_OPEN_SRC) message(FATAL_ERROR "Running on a unsupported architecture: ${SYSTEM_TYPE}, build terminated") endif() set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) + set(STATIC_ACL_LIB ${GE_LIB_PATH}) find_module(slog libslog.so ${GE_LIB_PATH}) find_module(mmpa libmmpa.so ${GE_LIB_PATH}) find_module(msprof libmsprof.so ${GE_LIB_PATH}) @@ -53,7 +54,6 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) - find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) else() if(DEFINED ENV{ASCEND_CUSTOM_PATH}) set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH}) @@ -66,6 +66,7 @@ if (ENABLE_OPEN_SRC) set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64) set(ASCEND_ATC_DIR ${ASCEND_DIR}/atc/lib64) set(ASCEND_ACL_DIR ${ASCEND_DIR}/acllib/lib64) + set(STATIC_ACL_LIB ${ASCEND_ACL_DIR}) find_module(slog libslog.so ${ASCEND_ATC_DIR}) find_module(mmpa libmmpa.so ${ASCEND_ATC_DIR}) if(PLATFORM STREQUAL "train") @@ -88,7 +89,6 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) - find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR}) elseif(PRODUCT STREQUAL "flr1") @@ -111,7 +111,6 @@ if (ENABLE_OPEN_SRC) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) - find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() message(FATAL_ERROR "PLATFORM param is invalid, should be train or inference, build terminated") endif() diff --git a/common/graph/stub/Makefile b/common/graph/stub/Makefile deleted file mode 100644 index f339fa33..00000000 --- a/common/graph/stub/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -inc_path := $(shell pwd)/metadef/inc/external/ -out_path := $(shell pwd)/out/graph/lib64/stub/ -stub_path := $(shell pwd)/metadef/graph/stub/ - -mkdir_stub := $(shell mkdir -p $(out_path)) -graph_local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path)) diff --git a/common/graph/stub/gen_stubapi.py b/common/graph/stub/gen_stubapi.py deleted file mode 100644 index 7263ff17..00000000 --- a/common/graph/stub/gen_stubapi.py +++ /dev/null @@ -1,578 +0,0 @@ -import os -import re -import sys -import logging - -logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s', - level=logging.INFO) - -""" - this attr is used for symbol table visible -""" -GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' - -""" - generate stub func body by return type -""" -RETURN_STATEMENTS = { - 'graphStatus': ' std::cout << "[ERROR]: stub library libgraph or libge_compiler cannot be used for execution, please check your "\n ' - ' << "environment variables and compilation options to make sure you use the correct library."\n' - ' << std::endl;\n' - ' return ACL_ERROR_COMPILING_STUB_MODE;', - 'Status': ' return SUCCESS;', - 'Graph': ' return Graph();', - 'Graph&': ' return *this;', - 'Format': ' return Format();', - 'Format&': ' return *this;', - 'Shape': ' return Shape();', - 'Shape&': ' return *this;', - 'TensorDesc': ' return TensorDesc();', - 'TensorDesc&': ' return *this;', - 'Tensor': ' return Tensor();', - 'Tensor&': ' return *this;', - 'Operator': ' return Operator();', - 'Operator&': ' return *this;', - 'Ptr': ' return nullptr;', - 'std::string': ' return "";', - 'std::string&': ' return "";', - 'string': ' return "";', - 'int': ' return 0;', - 'DataType': ' return DT_FLOAT;', - 'InferenceContextPtr': ' return nullptr;', - 'SubgraphBuilder': ' return nullptr;', - 'OperatorImplPtr': ' return nullptr;', - 'OutHandler': ' return nullptr;', - 'std::vector': ' return {};', - 'std::vector': ' return {};', - 'std::map': ' return {};', - 'uint32_t': ' return 0;', - 'int64_t': ' return 0;', - 'uint64_t': ' return 0;', - 'size_t': ' return 0;', - 'float': ' return 0.0f;', - 'bool': ' return false;', -} - -""" - max code len per line in hua_wei software programming specifications -""" -max_code_len_per_line = 100 - -""" - white_list_for_debug, include_dir_key_words is to - determines which header files to generate cc files from - when DEBUG on -""" -white_list_for_debug = ["attr_value.h", "operator.h", "tensor.h", "graph.h", "operator_factory.h", "inference_context.h", - "ge_ir_build.h", "ge_api.h", "ascend_string.h", "gnode.h"] -include_dir_key_words = ["ge", "graph"] -DEBUG = True - - -def need_generate_func(func_line): - """ - :param func_line: - :return: - """ - if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \ - or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"): - return False - return True - - -def file_endswith_white_list_suffix(file): - """ - :param file: - :return: - """ - if DEBUG: - for suffix in white_list_for_debug: - if file.endswith(suffix): - return True - return False - else: - return True - - -""" - belows are patterns used for analyse .h file -""" -# pattern function -pattern_func = re.compile(r"""(^[\s]*) #leading with space,we will find and delete after -([a-zA-Z~_] # void int likely -.* -[)] #we find ) -(?!.*{) # we do not want the case int abc() const -.*) -(;.*) #we want to find ; and after for we will replace these later -\n$ -""", re.VERBOSE | re.MULTILINE | re.DOTALL) - -# pattern comment -pattern_comment = re.compile(r'^\s*//') -pattern_comment_2_start = re.compile(r'^\s*/[*]') -pattern_comment_2_end = re.compile(r'[*]/\s*$') -# pattern define -pattern_define = re.compile(r'^\s*#define') -pattern_define_return = re.compile(r'\\\s*$') -# blank line -pattern_blank_line = re.compile(r'^\s*$') -# virtual,explicit,friend,static -pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)') -# lead space -pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]') -# functions will have patterns such as func ( or func( -# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist -# format like :"operator = ()" -pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]') -# template -pattern_template = re.compile(r'^\s*template') -pattern_template_end = re.compile(r'>\s*$') -# namespace -pattern_namespace = re.compile(r'namespace.*{') -# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with -pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+ 0 and not friend_match: - line, func_name = self.handle_class_member_func(line, template_string) - # Normal functions - else: - line, func_name = self.handle_normal_func(line, template_string) - - need_generate = need_generate_func(line) - # func body - line += self.implement_function(line) - # comment - line = self.gen_comment(start_i) + line - # write to out file - self.write_func_content(line, func_name, need_generate) - # next loop - self.line_index += 1 - - logging.info('Added %s functions', len(self.func_list_exist)) - logging.info('Successfully converted,please see ' + self.output_file) - - def handle_func1(self, line): - """ - :param line: - :return: - """ - find1 = re.search('[(]', line) - if not find1: - self.line_index += 1 - return "continue", line, None - find2 = re.search('[)]', line) - start_i = self.line_index - space_match = pattern_leading_space.search(line) - # deal with - # int abc(int a, - # int b) - if find1 and (not find2): - self.line_index += 1 - line2 = self.input_content[self.line_index] - if space_match: - line2 = re.sub('^' + space_match.group(1), '', line2) - line += line2 - while self.line_index < len(self.input_content) and (not re.search('[)]', line2)): - self.line_index += 1 - line2 = self.input_content[self.line_index] - line2 = re.sub('^' + space_match.group(1), '', line2) - line += line2 - - match_start = pattern_start.search(self.input_content[self.line_index]) - match_end = pattern_end.search(self.input_content[self.line_index]) - if match_start: # like ) { or ) {} int the last line - if not match_end: - self.stack.append('normal_now') - ii = start_i - while ii <= self.line_index: - ii += 1 - self.line_index += 1 - return "continue", line, start_i - logging.info("line[%s]", line) - # ' int abc();'->'int abc()' - (line, match) = pattern_func.subn(r'\2\n', line) - logging.info("line[%s]", line) - # deal with case: - # 'int \n abc(int a, int b)' - if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]): - line = self.input_content[start_i - 1] + line - line = line.lstrip() - if not match: - self.line_index += 1 - return "continue", line, start_i - return "pass", line, start_i - - def handle_stack(self, match_start): - """ - :param match_start: - :return: - """ - line = self.input_content[self.line_index] - match_end = pattern_end.search(line) - if match_start: - self.stack.append('normal_now') - if match_end: - top_status = self.stack.pop() - if top_status == 'namespace_now': - self.output_fd.write(line + '\n') - elif top_status == 'class_now': - self.stack_class.pop() - self.stack_template.pop() - if match_start or match_end: - self.line_index += 1 - return "continue" - - if len(self.stack) > 0 and self.stack[-1] == 'normal_now': - self.line_index += 1 - return "continue" - return "pass" - - def handle_class(self, template_string, line, match_start, match_class): - """ - :param template_string: - :param line: - :param match_start: - :param match_class: - :return: - """ - if match_class: # we face a class - self.stack_template.append(template_string) - self.stack.append('class_now') - class_name = match_class.group(3) - - # class template specializations: class A > - if '<' in class_name: - k = line.index('<') - fit = 1 - for ii in range(k + 1, len(line)): - if line[ii] == '<': - fit += 1 - if line[ii] == '>': - fit -= 1 - if fit == 0: - break - class_name += line[k + 1:ii + 1] - logging.info('class_name[%s]', class_name) - self.stack_class.append(class_name) - while not match_start: - self.line_index += 1 - line = self.input_content[self.line_index] - match_start = pattern_start.search(line) - self.line_index += 1 - return "continue" - return "pass" - - def handle_template(self): - line = self.input_content[self.line_index] - match_template = pattern_template.search(line) - template_string = '' - if match_template: - match_template_end = pattern_template_end.search(line) - template_string = line - while not match_template_end: - self.line_index += 1 - line = self.input_content[self.line_index] - template_string += line - match_template_end = pattern_template_end.search(line) - self.line_index += 1 - return template_string - - def handle_namespace(self): - line = self.input_content[self.line_index] - match_namespace = pattern_namespace.search(line) - if match_namespace: # we face namespace - self.output_fd.write(line + '\n') - self.stack.append('namespace_now') - self.line_index += 1 - - def handle_normal_func(self, line, template_string): - template_line = '' - self.stack_template.append(template_string) - if self.stack_template[-1] != '': - template_line = re.sub(r'\s*template', 'template', self.stack_template[-1]) - # change '< class T = a, class U = A(3)>' to '' - template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) - template_line = re.sub(r'\s*=.*,', ',', template_line) - template_line = re.sub(r'\s*=.*', '', template_line) - line = re.sub(r'\s*=.*,', ',', line) - line = re.sub(r'\s*=.*\)', ')', line) - line = template_line + line - self.stack_template.pop() - func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() - logging.info("line[%s]", line) - logging.info("func_name[%s]", func_name) - return line, func_name - - def handle_class_member_func(self, line, template_string): - template_line = '' - x = '' - if template_string != '': - template_string = re.sub(r'\s*template', 'template', template_string) - template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string) - template_string = re.sub(r'\s*=.*,', ',', template_string) - template_string = re.sub(r'\s*=.*', '', template_string) - if self.stack_template[-1] != '': - if not (re.search(r'<\s*>', stack_template[-1])): - template_line = re.sub(r'^\s*template', 'template', stack_template[-1]) - if not (re.search(r'<.*>', self.stack_class[-1])): - # for x we get like template -> - x = re.sub(r'template\s*<', '<', template_line) # remove template -> - x = re.sub(r'\n', '', x) - x = re.sub(r'\s*=.*,', ',', x) - x = re.sub(r'\s*=.*\>', '>', x) - x = x.rstrip() # remove \n - x = re.sub(r'(class|typename)\s+|(|\s*class)', '', - x) # remove class,typename -> - x = re.sub(r'<\s+', '<', x) - x = re.sub(r'\s+>', '>', x) - x = re.sub(r'\s+,', ',', x) - x = re.sub(r',\s+', ', ', x) - line = re.sub(r'\s*=\s+0', '', line) - line = re.sub(r'\s*=\s+.*,', ',', line) - line = re.sub(r'\s*=\s+.*\)', ')', line) - logging.info("x[%s]\nline[%s]", x, line) - # if the function is long, void ABC::foo() - # breaks into two lines void ABC::\n foo() - temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1) - if len(temp_line) > max_code_len_per_line: - line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1) - else: - line = temp_line - logging.info("line[%s]", line) - # add template as the above if there is one - template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line) - template_line = re.sub(r'\s*=.*,', ',', template_line) - template_line = re.sub(r'\s*=.*', '', template_line) - line = template_line + template_string + line - func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group() - logging.info("line[%s]", line) - logging.info("func_name[%s]", func_name) - return line, func_name - - def write_func_content(self, content, func_name, need_generate): - if not (func_name in self.func_list_exist) and need_generate: - self.output_fd.write(content) - self.func_list_exist.append(func_name) - logging.info('add func:[%s]', func_name) - - def gen_comment(self, start_i): - comment_line = '' - # Function comments are on top of function declarations, copy them over - k = start_i - 1 # one line before this func start - if pattern_template.search(self.input_content[k]): - k -= 1 - if pattern_comment_2_end.search(self.input_content[k]): - comment_line = self.input_content[k].lstrip() - while not pattern_comment_2_start.search(self.input_content[k]): - k -= 1 - comment_line = self.input_content[k].lstrip() + comment_line - else: - for j in range(k, 0, -1): - c_line = self.input_content[j] - if pattern_comment.search(c_line): - c_line = re.sub(r'\s*//', '//', c_line) - comment_line = c_line + comment_line - else: - break - return comment_line - - @staticmethod - def implement_function(func): - function_def = '' - function_def += '{\n' - - all_items = func.split() - start = 0 - return_type = all_items[start] - if return_type == "const": - start += 1 - return_type = all_items[start] - if return_type.startswith(('std::map', 'std::set', 'std::vector')): - return_type = "std::map" - if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')): - return_type = "Ptr" - if len(all_items) > start + 1 and all_items[start + 1].startswith('&'): - return_type += "&" - if RETURN_STATEMENTS.__contains__(return_type): - function_def += RETURN_STATEMENTS[return_type] - else: - logging.warning("Unhandled return type[%s]", return_type) - - function_def += '\n' - function_def += '}\n' - function_def += '\n' - return function_def - - -def collect_header_files(path): - """ - :param path: - :return: - """ - header_files = [] - shared_includes_content = [] - for root, dirs, files in os.walk(path): - files.sort() - for file in files: - if file.find("git") >= 0: - continue - if not file.endswith('.h'): - continue - file_path = os.path.join(root, file) - file_path = file_path.replace('\\', '/') - header_files.append(file_path) - include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:]) - shared_includes_content.append(include_str) - # for acl error code - shared_includes_content.append('#include \n') - shared_includes_content.append('const int ACL_ERROR_COMPILING_STUB_MODE = 100039;\n') - return header_files, shared_includes_content - - -def generate_stub_file(inc_dir, out_cc_dir): - """ - :param inc_dir: - :param out_cc_dir: - :return: - """ - target_header_files, shared_includes_content = collect_header_files(inc_dir) - for header_file in target_header_files: - if not file_endswith_white_list_suffix(header_file): - continue - cc_file = re.sub('.h*$', '.cc', header_file) - h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content) - h_2_cc.h2cc() - - -def gen_code(inc_dir, out_cc_dir): - """ - :param inc_dir: - :param out_cc_dir: - :return: - """ - if not inc_dir.endswith('/'): - inc_dir += '/' - if not out_cc_dir.endswith('/'): - out_cc_dir += '/' - for include_dir_key_word in include_dir_key_words: - generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir) - - -if __name__ == '__main__': - inc_dir = sys.argv[1] - out_cc_dir = sys.argv[2] - gen_code(inc_dir, out_cc_dir) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index a3f9a865..4f162fd3 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -719,10 +719,24 @@ target_link_libraries(ge_compiler ############ libascendcl.so ############ file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/dummy.c CONTENT "") -add_library(dummy_obj OBJECT ${CMAKE_BINARY_DIR}/dummy.c) -set(DUMMY_OBJ $) +#add_library(dummy_obj OBJECT ${CMAKE_BINARY_DIR}/dummy.c) +#set(DUMMY_OBJ $) -add_library(opensrc_ascendcl SHARED ${DUMMY_OBJ}) +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object) + +if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a) + execute_process( + COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object + ) + file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o) +else() + set(OBJECT_LIST ${CMAKE_BINARY_DIR}/dummy.c) +endif() + +add_library(opensrc_ascendcl SHARED + ${OBJECT_LIST} +) target_compile_options(opensrc_ascendcl PRIVATE -O2 -fvisibility=hidden diff --git a/ge/analyzer/analyzer.cc b/ge/analyzer/analyzer.cc index 29181384..b7d09bea 100755 --- a/ge/analyzer/analyzer.cc +++ b/ge/analyzer/analyzer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,34 +40,32 @@ const std::string kFilePath = "./"; const std::string kAnalyzeFile = "ge_check_op.json"; const std::string kUnknownShape = "unknownshape"; -const std::string kUnsupport = "unsupport"; +const std::string kUnsupport = "unsupport"; const std::string kSessionId = "session_id"; -const std::string kGraphId = "graph_id"; -const std::string kOpInfo = "op_info"; +const std::string kGraphId = "graph_id"; +const std::string kOpInfo = "op_info"; const std::string kErrorType = "error_type"; -const std::string kOpName = "name"; -const std::string kOpType = "type"; -const std::string kReason = "reason"; -const std::string kInput = "input"; -const std::string kOutput = "output"; -const std::string kShape = "shape"; -const std::string kDataType = "data_type"; -const std::string kLayout = "layout"; -const std::string kResult = "result"; -const std::string kOp = "op"; - -std::map errors_map { - {PARSER, "paser_error"}, - {INFER_SHAPE, "infer_shape_error"}, - {CHECKSUPPORT, "check_support_error"}, - {GRAPH_OPTIMIZE, "graph_optimize_error"}, - {GRAPH_PARTION, "graph_partion_error"}, - {GRAPH_BUILDER, "graph_builder_error"} -}; -} - -Analyzer* Analyzer::GetInstance() { +const std::string kOpName = "name"; +const std::string kOpType = "type"; +const std::string kReason = "reason"; +const std::string kInput = "input"; +const std::string kOutput = "output"; +const std::string kShape = "shape"; +const std::string kDataType = "data_type"; +const std::string kLayout = "layout"; +const std::string kResult = "result"; +const std::string kOp = "op"; + +std::map errors_map{{PARSER, "paser_error"}, + {INFER_SHAPE, "infer_shape_error"}, + {CHECKSUPPORT, "check_support_error"}, + {GRAPH_OPTIMIZE, "graph_optimize_error"}, + {GRAPH_PARTION, "graph_partion_error"}, + {GRAPH_BUILDER, "graph_builder_error"}}; +} // namespace + +Analyzer *Analyzer::GetInstance() { static Analyzer instance; return &instance; } @@ -77,8 +75,9 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { std::lock_guard lg(mutex_); auto iter = graph_infos_.find(session_id); if (iter == graph_infos_.end()) { - std::shared_ptr graph_info(new(std::nothrow) GraphInfo()); - GE_CHECK_NOTNULL(graph_info); + auto p = new (std::nothrow) GraphInfo(); + GE_CHECK_NOTNULL(p); + std::shared_ptr graph_info(p); std::map> graph_map; graph_map[graph_id] = graph_info; graph_info->session_id = session_id; @@ -87,8 +86,9 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { } else { auto iter1 = (iter->second).find(graph_id); if (iter1 == (iter->second).end()) { - std::shared_ptr graph_info(new(std::nothrow) GraphInfo()); - GE_CHECK_NOTNULL(graph_info); + auto p = new (std::nothrow) GraphInfo(); + GE_CHECK_NOTNULL(p); + std::shared_ptr graph_info(p); graph_info->session_id = session_id; graph_info->graph_id = graph_id; (iter->second).insert({graph_id, graph_info}); @@ -100,14 +100,7 @@ Status Analyzer::BuildJsonObject(uint64_t session_id, uint64_t graph_id) { } ge::Status Analyzer::Initialize() { - // Initialize file - string real_path = RealPath(kFilePath.c_str()); - if (real_path.empty()) { - GELOGE(FAILED, "File path is invalid."); - return FAILED; - } - json_file_name_ = real_path + "/" + kAnalyzeFile; - + ClearHistoryFile(); return SUCCESS; } @@ -145,7 +138,6 @@ void Analyzer::DestroyGraphJsonObject(uint64_t session_id, uint64_t graph_id) { if (iter1 == (iter->second).end()) { GELOGW("Can not find the graph json object by session_id[%lu] and graph_id[%lu]. Do nothing.", session_id, graph_id); - return; } (iter->second).erase(iter1); } @@ -182,8 +174,15 @@ ge::Status Analyzer::CreateAnalyzerFile() { return SUCCESS; } GELOGD("start to create analyzer file!"); - + // Check whether the manifest exists, if not, create it. + string real_path = RealPath(kFilePath.c_str()); + if (real_path.empty()) { + GELOGE(FAILED, "File path is invalid."); + return FAILED; + } std::lock_guard lg(file_mutex_); + json_file_name_ = real_path + "/" + kAnalyzeFile; + GELOGD("Created analyzer file:[%s]", json_file_name_.c_str()); int fd = open(json_file_name_.c_str(), O_WRONLY | O_CREAT | O_TRUNC, kFileAuthority); if (fd < 0) { GELOGE(INTERNAL_ERROR, "Fail to open the file: %s.", json_file_name_.c_str()); @@ -199,27 +198,25 @@ ge::Status Analyzer::CreateAnalyzerFile() { return SUCCESS; } -ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_id) { +ge::Status Analyzer::SaveAnalyzerDataToFile() { GELOGD("start to save analyze file!"); - - auto graph_info = GetJsonObject(session_id, graph_id); - GE_CHECK_NOTNULL(graph_info); - if (graph_info->op_info.size() == 0) { - GELOGD("session_id:%lu graph_id:%lu does not owner op info, break it!", session_id, graph_id); - return SUCCESS; - } std::lock_guard lg(file_mutex_); - json_file_.open(json_file_name_, std::ios::app); + json_file_.open(json_file_name_, std::ios::out); if (!json_file_.is_open()) { GELOGE(FAILED, "analyzer file does not exist[%s]", json_file_name_.c_str()); return PARAM_INVALID; } - json jsn; - GraphInfoToJson(jsn, *graph_info); - json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; - json_file_.close(); + std::lock_guard lk(mutex_); + for (auto &ele : graph_infos_) { + for (auto &ele2 : ele.second) { + json jsn; + GraphInfoToJson(jsn, *(ele2.second)); + json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; + } + } + json_file_.close(); return SUCCESS; } @@ -240,7 +237,13 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { return FAILED; } // create json file - return CreateAnalyzerFile(); + status = CreateAnalyzerFile(); + if (status != SUCCESS) { + GELOGE(status, "create analyzer file failed!"); + return status; + } + // save data to file + return SaveAnalyzerDataToFile(); } ge::Status Analyzer::SaveOpInfo(ge::OpDescPtr desc, DataInfo &data_info, @@ -253,18 +256,18 @@ ge::Status Analyzer::SaveOpInfo(ge::OpDescPtr desc, DataInfo &data_info, op_info.error_type = iter->second; op_info.op_name = desc->GetName(); op_info.op_type = desc->GetType(); - op_info.reason = data_info.reason; + op_info.reason = data_info.reason; for (const auto &ptr : desc->GetAllInputsDescPtr()) { TensorInfo tensor_info; - tensor_info.shape = ptr->GetShape().GetDims(); + tensor_info.shape = ptr->GetShape().GetDims(); tensor_info.d_type = ge::TypeUtils::DataTypeToSerialString(ptr->GetDataType()); tensor_info.layout = ge::TypeUtils::FormatToSerialString(ptr->GetFormat()); op_info.input_info.emplace_back(tensor_info); } for (const auto &ptr : desc->GetAllOutputsDescPtr()) { TensorInfo tensor_info; - tensor_info.shape = ptr->GetShape().GetDims(); + tensor_info.shape = ptr->GetShape().GetDims(); tensor_info.d_type = ge::TypeUtils::DataTypeToSerialString(ptr->GetDataType()); tensor_info.layout = ge::TypeUtils::FormatToSerialString(ptr->GetFormat()); op_info.output_info.emplace_back(tensor_info); @@ -274,13 +277,13 @@ ge::Status Analyzer::SaveOpInfo(ge::OpDescPtr desc, DataInfo &data_info, return SUCCESS; } -void Analyzer::TensorInfoToJson(json& j, const TensorInfo &tensor_info) { +void Analyzer::TensorInfoToJson(json &j, const TensorInfo &tensor_info) { j[kShape] = tensor_info.shape; j[kDataType] = tensor_info.d_type; j[kLayout] = tensor_info.layout; } -void Analyzer::OpInfoToJson(json& j, const OpInfo &op_info) { +void Analyzer::OpInfoToJson(json &j, const OpInfo &op_info) { j[kErrorType] = op_info.error_type; j[kOpName] = op_info.op_name; j[kOpType] = op_info.op_type; @@ -297,7 +300,7 @@ void Analyzer::OpInfoToJson(json& j, const OpInfo &op_info) { } } -void Analyzer::GraphInfoToJson(json& j, const GraphInfo &graph_info) { +void Analyzer::GraphInfoToJson(json &j, const GraphInfo &graph_info) { GELOGD("start to buff graph info!"); j[kSessionId] = graph_info.session_id; j[kGraphId] = graph_info.graph_id; @@ -309,4 +312,4 @@ void Analyzer::GraphInfoToJson(json& j, const GraphInfo &graph_info) { } j[kOp] = json_op_infos; } -} // namespace ge +} // namespace ge diff --git a/ge/analyzer/analyzer.h b/ge/analyzer/analyzer.h index fd89b150..1afeeca3 100755 --- a/ge/analyzer/analyzer.h +++ b/ge/analyzer/analyzer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,12 +33,12 @@ namespace ge { namespace analyzer { enum AnalyzeType { - PARSER = 0, - INFER_SHAPE = 1, - CHECKSUPPORT = 2, + PARSER = 0, + INFER_SHAPE = 1, + CHECKSUPPORT = 2, GRAPH_OPTIMIZE = 3, - GRAPH_PARTION = 4, - GRAPH_BUILDER = 5, + GRAPH_PARTION = 4, + GRAPH_BUILDER = 5, }; struct TensorInfo { @@ -66,8 +66,7 @@ struct DataInfo { DataInfo() = default; ~DataInfo() = default; - DataInfo(uint64_t sess, uint64_t graph, AnalyzeType type, - ge::NodePtr node, std::string error_info) { + DataInfo(uint64_t sess, uint64_t graph, AnalyzeType type, ge::NodePtr node, std::string error_info) { session_id = sess; graph_id = graph; analyze_type = type; @@ -80,10 +79,10 @@ struct DataInfo { ge::NodePtr node_ptr{nullptr}; std::string reason; }; -} +} // namespace analyzer class Analyzer { -public: + public: /** * @ingroup ge * @brief: get analyzer instance. @@ -157,39 +156,33 @@ public: */ ge::Status DoAnalyze(analyzer::DataInfo &data_info); - /** - * @ingroup ge - * @brief: Buff analyzed data and output to json file - * @param [in]: session id , graph id - * @return: 0: SUCCESS other: FAILED - */ - ge::Status SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_id); - Analyzer(const Analyzer &) = delete; - Analyzer& operator=(const Analyzer&) = delete; + Analyzer &operator=(const Analyzer &) = delete; Analyzer(Analyzer &&) = delete; - Analyzer& operator=(Analyzer &&) = delete; -private: - void TensorInfoToJson(nlohmann::json& j, const analyzer::TensorInfo &tensor_info); - void OpInfoToJson(nlohmann::json& j, const analyzer::OpInfo &op_info); - void GraphInfoToJson(nlohmann::json& j, const analyzer::GraphInfo &graph_info); + Analyzer &operator=(Analyzer &&) = delete; + + private: + void TensorInfoToJson(nlohmann::json &j, const analyzer::TensorInfo &tensor_info); + void OpInfoToJson(nlohmann::json &j, const analyzer::OpInfo &op_info); + void GraphInfoToJson(nlohmann::json &j, const analyzer::GraphInfo &graph_info); + ge::Status SaveAnalyzerDataToFile(); ge::Status SaveOpInfo(ge::OpDescPtr desc, analyzer::DataInfo &data_info, - std::shared_ptr graph_info); + std::shared_ptr graph_info); void ClearHistoryFile(); ge::Status CreateAnalyzerFile(); - explicit Analyzer() {}; + explicit Analyzer(){}; ~Analyzer() = default; -private: + private: std::map>> graph_infos_; - std::recursive_mutex mutex_; // protect graph_infos_ - std::mutex file_mutex_; // protect json_file_ + std::recursive_mutex mutex_; // protect graph_infos_ + std::mutex file_mutex_; // protect json_file_ std::ofstream json_file_; std::string json_file_name_; std::atomic_bool is_json_file_create_{false}; }; -} // namespace ge -#endif // DOMI_ANALYZER_ANANLYZER_H_ +} // namespace ge +#endif // DOMI_ANALYZER_ANANLYZER_H_ diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 522985fa..7c4cf9c8 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -384,7 +384,7 @@ Status Session::RunGraphAsync(uint32_t graph_id, const std::vectorSessionManagerObj().RunGraphAsync(sessionId_, graph_id, inputs, callback); if (ret != SUCCESS) { diff --git a/ge/client/ge_prof.cc b/ge/client/ge_prof.cc index 45a315b7..ad9cc9eb 100644 --- a/ge/client/ge_prof.cc +++ b/ge/client/ge_prof.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,21 +39,12 @@ const std::string kDeviceIdList = "devIdList"; const std::string kAicoreMetrics = "aicoreMetrics"; const std::map kProfAicoreMetricsToString = { -<<<<<<< HEAD:ge/client/ge_prof.cc - {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, - {ge::kAicorePipeline, "AICORE_PIPELINE"}, - {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, - {ge::kAicoreMemory, "AICORE_MEMORY"}, - {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, - {ge::kAicoreStall, "AICORE_STALL"}}; -======= {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, {ge::kAicorePipeline, "AICORE_PIPELINE"}, {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, {ge::kAicoreMemory, "AICORE_MEMORY"}, {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, {ge::kAicoreStall, "AICORE_STALL"}}; ->>>>>>> cd365aa247c64e30487d1e71e4f724a889848f80:src/ge/client/ge_prof.cc } // namespace static bool g_graph_prof_init_ = false; @@ -174,7 +165,7 @@ bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector(profiler_config->config.aicoreMetrics)); + kProfAicoreMetricsToString.find(static_cast(profiler_config->config.aicoreMetrics)); if (iter == kProfAicoreMetricsToString.end()) { GELOGW("The prof aicore metrics is invalid."); return false; @@ -333,17 +324,10 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { return GE_PROF_NOT_INIT; } - for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { - uint64_t data_type_config; - Status status = ProfGetDataTypeConfig(profiler_config->config.devIdList[i], data_type_config); - if (status != SUCCESS) { - GELOGE(status, "Prof get data type config failed, prof result = %d", status); - return status; - } - if (data_type_config != profiler_config->config.dataTypeConfig) { - GELOGE(FAILED, "data type config verify failed"); - return FAILED; - } + Status ret = ProfStopProfiling(&profiler_config->config); + if (ret != SUCCESS) { + GELOGE(ret, "Stop profiling failed, prof result = %d", ret); + return ret; } std::vector prof_params; @@ -360,22 +344,12 @@ Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { command.module_index = profiler_config->config.dataTypeConfig; GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str(), command.module_index); -<<<<<<< HEAD:ge/client/ge_prof.cc - Status ret = graph_loader.CommandHandle(command); -======= ret = graph_loader.CommandHandle(command); ->>>>>>> cd365aa247c64e30487d1e71e4f724a889848f80:src/ge/client/ge_prof.cc if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); return FAILED; } - ret = ProfStopProfiling(&profiler_config->config); - if (ret != SUCCESS) { - GELOGE(ret, "Stop profiling failed, prof result = %d", ret); - return ret; - } - GELOGI("Successfully execute GraphProfStopProfiling."); return SUCCESS; } diff --git a/ge/client/proto/ge_api.proto b/ge/client/proto/ge_api.proto index 26d705fe..ac5b3b3a 100644 --- a/ge/client/proto/ge_api.proto +++ b/ge/client/proto/ge_api.proto @@ -1 +1,104 @@ -../../proto/ge_api.proto \ No newline at end of file +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; +package ge.api_pb; + +import "ge_ir.proto"; + +// GE initialize +message GEInitialize { + map options = 1; +}; + +// initialize response +message GEInitializeResponse { + uint32 status = 1; + uint32 clientId = 2; +}; + +// GE finalize +message GEFinalize { + bool final = 1; + uint32 clientId = 2; +}; + +message GEFinalizeResponse { + uint32 status = 1; +}; + +// GE Session +message CreateSession{ + map options = 1; +}; + +message CreateSessionResponse { + uint32 status = 1; + uint64 sessionId = 2; +}; + +//GE AddGraph +//model serialize :: serializegraph +message SessionAddGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + ge.proto.GraphDef graph = 3; +}; + +message SessionAddGraphResponse { + uint32 status = 1; +}; + +//GE SessionRemoveGraph +message SessionRemoveGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; +}; + +message SessionRemoveGraphResponse { + uint32 status = 1; +}; + +message SessionRunGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + repeated ge.proto.TensorDef tensor = 3; +}; + +message SessionBuildGraph{ + uint32 graphId = 1; + uint64 sessionId = 2; + repeated ge.proto.TensorDef tensor = 3; + string savePath = 4; +}; + +message SessionRunGraphResponse { + uint32 status = 1; + repeated ge.proto.TensorDef tensor = 2; +}; + +message SessionBuildGraphResponse { + uint32 status = 1; +}; + +message DestroySession{ + bool final = 1; + uint64 sessionId = 2; +}; + +message DestroySessionResponse { + uint32 status = 1; +}; diff --git a/ge/client/proto/ge_ir.proto b/ge/client/proto/ge_ir.proto index e7bfe0cb..87886c84 100644 --- a/ge/client/proto/ge_ir.proto +++ b/ge/client/proto/ge_ir.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package ge.proto; diff --git a/ge/client/proto/insert_op.proto b/ge/client/proto/insert_op.proto index bf918b20..a059e122 100644 --- a/ge/client/proto/insert_op.proto +++ b/ge/client/proto/insert_op.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package domi; @@ -45,9 +61,6 @@ message AippOpParams { // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ uint32 related_input_rank = 2; - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ diff --git a/ge/client/proto/om.proto b/ge/client/proto/om.proto index e15e5f80..dd992191 100755 --- a/ge/client/proto/om.proto +++ b/ge/client/proto/om.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/client/proto/task.proto b/ge/client/proto/task.proto index d0c09840..50ea061b 100644 --- a/ge/client/proto/task.proto +++ b/ge/client/proto/task.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/common/auth/file_saver.cc b/ge/common/auth/file_saver.cc index 60d99c0b..4aaf9c19 100755 --- a/ge/common/auth/file_saver.cc +++ b/ge/common/auth/file_saver.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,26 +55,9 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size == 0 || data == nullptr, return PARAM_INVALID); - mmSsize_t write_count; - uint32_t size_2g = ((uint32_t) 0x1 << 31); - uint32_t size_1g = ((uint32_t) 0x1 << 30); - // Write data - if (size > size_2g) { - auto seek = reinterpret_cast(const_cast(data)); - while (size > size_1g) { - write_count = mmWrite(fd, reinterpret_cast(seek), size_1g); - if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { - GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); - return FAILED; - } - size -= size_1g; - seek += size_1g; - } - write_count = mmWrite(fd, reinterpret_cast(seek), size); - } else { - write_count = mmWrite(fd, const_cast(data), size); - } + // Write data + int32_t write_count = mmWrite(fd, const_cast(data), size); // -1: Failed to write to file; - 2: Illegal parameter if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); @@ -116,10 +99,10 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi ModelPartitionTable &model_partition_table, const std::vector &partition_datas) { - GE_CHK_BOOL_RET_STATUS(!partition_datas.empty() && model_partition_table.num != 0 - && model_partition_table.num == partition_datas.size(), FAILED, - "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", - model_partition_table.num, partition_datas.size()); + GE_CHK_BOOL_RET_STATUS( + !partition_datas.empty() && model_partition_table.num != 0 && model_partition_table.num == partition_datas.size(), + FAILED, "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", model_partition_table.num, + partition_datas.size()); // Open file int32_t fd = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(OpenFile(fd, file_path) != SUCCESS, return FAILED); @@ -127,18 +110,16 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi do { // Write file header GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; - break); + WriteData(static_cast(&file_header), sizeof(ModelFileHeader), fd) != SUCCESS, ret = FAILED; break); // Write model partition table uint32_t table_size = static_cast(SIZE_OF_MODEL_PARTITION_TABLE(model_partition_table)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); + WriteData(static_cast(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); // Write partition data for (const auto &partitionData : partition_datas) { - GELOGI("GC:size[%zu]", partitionData.size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - WriteData(static_cast(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; - break); + WriteData(static_cast(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; + break); } } while (0); // Close file @@ -151,9 +132,9 @@ Status FileSaver::SaveToBuffWithFileHeader(const ModelFileHeader &file_header, const std::vector &partitionDatas, ge::ModelBufferData &model) { GE_CHK_BOOL_RET_STATUS( - !partitionDatas.empty() && model_partition_table.num != 0 && model_partition_table.num == partitionDatas.size(), - FAILED, "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", - model_partition_table.num, partitionDatas.size()); + !partitionDatas.empty() && model_partition_table.num != 0 && model_partition_table.num == partitionDatas.size(), + FAILED, "Invalid param:partition data size is (%u), model_partition_table.num is (%zu).", model_partition_table.num, + partitionDatas.size()); uint32_t model_header_size = sizeof(ModelFileHeader); uint32_t table_size = static_cast(SIZE_OF_MODEL_PARTITION_TABLE(model_partition_table)); uint32_t total_size = model_header_size + table_size; diff --git a/ge/common/auth/file_saver.h b/ge/common/auth/file_saver.h index 79e2126e..d415746d 100644 --- a/ge/common/auth/file_saver.h +++ b/ge/common/auth/file_saver.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,10 +74,8 @@ class FileSaver { ModelPartitionTable &model_partition_table, const std::vector &partition_datas); - static Status SaveToBuffWithFileHeader(const ModelFileHeader &file_header, - ModelPartitionTable &model_partition_table, - const std::vector &partitionDatas, - ge::ModelBufferData& model); + static Status SaveToBuffWithFileHeader(const ModelFileHeader &file_header, ModelPartitionTable &model_partition_table, + const std::vector &partitionDatas, ge::ModelBufferData &model); static Status SaveToFile(const string &file_path, const void *data, int len); diff --git a/ge/common/base64.h b/ge/common/base64.h index fb6c1870..26819c88 100644 --- a/ge/common/base64.h +++ b/ge/common/base64.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,24 +25,23 @@ namespace ge { namespace { -const char* kBase64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; +const char *kBase64Chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; const char kEqualSymbol = '='; const size_t kBase64CharsNum = 64; const size_t kThreeByteOneGroup = 3; const size_t kFourByteOneGroup = 4; -} +} // namespace namespace base64 { -static inline bool IsBase64Char(const char &c) { - return (isalnum(c) || (c == '+') || (c == '/')); -} +static inline bool IsBase64Char(const char &c) { return (isalnum(c) || (c == '+') || (c == '/')); } static std::string EncodeToBase64(const std::string &raw_data) { size_t encode_length = raw_data.size() / kThreeByteOneGroup * kFourByteOneGroup; encode_length += raw_data.size() % kThreeByteOneGroup == 0 ? 0 : kFourByteOneGroup; - size_t raw_data_index = 0 ; + size_t raw_data_index = 0; size_t encode_data_index = 0; std::string encode_data; encode_data.resize(encode_length); @@ -80,8 +79,7 @@ static std::string EncodeToBase64(const std::string &raw_data) { #pragma GCC diagnostic ignored "-Wunused-function" static Status DecodeFromBase64(const std::string &base64_data, std::string &decode_data) { if (base64_data.size() % kFourByteOneGroup != 0) { - GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", - base64_data.size()); + GELOGE(PARAM_INVALID, "base64 data size must can be divided by 4, but given data size is %zu", base64_data.size()); return PARAM_INVALID; } decode_data.clear(); @@ -94,8 +92,7 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco for (std::size_t input_data_index = 0; input_data_index < base64_data_len; input_data_index += 4) { for (size_t i = 0; i < kFourByteOneGroup; ++i) { - if (base64_data[input_data_index + i] == kEqualSymbol && - input_data_index >= base64_data_len - 4 && i > 1) { + if (base64_data[input_data_index + i] == kEqualSymbol && input_data_index >= base64_data_len - 4 && i > 1) { byte_4[i] = kBase64CharsNum; } else if (IsBase64Char(base64_data[input_data_index + i])) { byte_4[i] = FindCharInBase64Chars(base64_data[input_data_index + i]); @@ -105,18 +102,18 @@ static Status DecodeFromBase64(const std::string &base64_data, std::string &deco } } decode_data += static_cast((byte_4[0] << 2u) + ((byte_4[1] & 0x30) >> 4u)); - if (byte_4[2] >= kBase64CharsNum){ + if (byte_4[2] >= kBase64CharsNum) { break; } else if (byte_4[3] >= kBase64CharsNum) { - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); + decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); break; } - decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); - decode_data += static_cast(((byte_4[2] & 0x03) << 6u) + byte_4[3]); + decode_data += static_cast(((byte_4[1] & 0x0f) << 4u) + ((byte_4[2] & 0x3c) >> 2u)); + decode_data += static_cast(((byte_4[2] & 0x03) << 6u) + byte_4[3]); } return SUCCESS; } #pragma GCC diagnostic pop -} +} // namespace base64 } // namespace ge #endif // GE_COMMON_BASE64_H_ \ No newline at end of file diff --git a/ge/common/context/ctx.cc b/ge/common/context/ctx.cc index 9fe2f8c7..f6ae364d 100755 --- a/ge/common/context/ctx.cc +++ b/ge/common/context/ctx.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/cust_aicpu_kernel_store.cc b/ge/common/cust_aicpu_kernel_store.cc index 86881b0e..46eb484b 100755 --- a/ge/common/cust_aicpu_kernel_store.cc +++ b/ge/common/cust_aicpu_kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,9 +20,7 @@ namespace ge { CustAICPUKernelStore::CustAICPUKernelStore() {} -void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel) { - AddKernel(kernel); -} +void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel) { AddKernel(kernel); } void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr &op_desc) const { GELOGI("LoadCustAICPUKernelBinToOpDesc in"); diff --git a/ge/common/cust_aicpu_kernel_store.h b/ge/common/cust_aicpu_kernel_store.h index 033a636b..6dff0435 100755 --- a/ge/common/cust_aicpu_kernel_store.h +++ b/ge/common/cust_aicpu_kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/debug/memory_dumper.cc b/ge/common/debug/memory_dumper.cc index d2b8d674..1a7d9db8 100644 --- a/ge/common/debug/memory_dumper.cc +++ b/ge/common/debug/memory_dumper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/debug/memory_dumper.h b/ge/common/debug/memory_dumper.h index a71f86f4..4995f5f7 100755 --- a/ge/common/debug/memory_dumper.h +++ b/ge/common/debug/memory_dumper.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc index 17019c5a..fbf9afe7 100644 --- a/ge/common/dump/dump_manager.cc +++ b/ge/common/dump/dump_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_manager.h b/ge/common/dump/dump_manager.h index 53a643f9..dbc89cc8 100644 --- a/ge/common/dump/dump_manager.h +++ b/ge/common/dump/dump_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index e92ada05..8c4ff330 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -252,4 +252,4 @@ Status DumpOp::LaunchDumpOp() { } return SUCCESS; } -} // namesapce ge +} // namespace ge diff --git a/ge/common/dump/dump_op.h b/ge/common/dump/dump_op.h index d59962e6..b3042245 100755 --- a/ge/common/dump/dump_op.h +++ b/ge/common/dump/dump_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/dump/dump_properties.cc b/ge/common/dump/dump_properties.cc index a4540367..b6247c6e 100644 --- a/ge/common/dump/dump_properties.cc +++ b/ge/common/dump/dump_properties.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on"; const uint32_t kAicoreOverflow = (0x1 << 0); const uint32_t kAtomicOverflow = (0x1 << 1); const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow); -} +} // namespace namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) { CopyFrom(other); } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=( - const DumpProperties &other) { + const DumpProperties &other) { CopyFrom(other); return *this; } @@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti // The following is the new dump scenario of the fusion operator FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue( - const std::string &model, const std::set &layers) { + const std::string &model, const std::set &layers) { for (const std::string &layer : layers) { GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str()); } @@ -136,7 +136,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpPrope } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpProperties::GetPropertyValue( - const std::string &model) const { + const std::string &model) const { auto iter = model_dump_properties_map_.find(model); if (iter != model_dump_properties_map_.end()) { return iter->second; @@ -145,7 +145,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set DumpPrope } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump( - const std::string &model, const std::string &om_name, const std::string &op_name) const { + const std::string &model, const std::string &om_name, const std::string &op_name) const { // if dump all if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) { return true; @@ -201,7 +201,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch( - const std::string dump_op_switch) { + const std::string &dump_op_switch) { dump_op_switch_ = dump_op_switch; } @@ -266,4 +266,4 @@ void DumpProperties::SetDumpDebugOptions() { GELOGI("ge.exec.enableDumpDebug is false or is not set."); } } -} // namespace +} // namespace ge diff --git a/ge/common/dump/dump_properties.h b/ge/common/dump/dump_properties.h index 682d2d08..7909d5a5 100644 --- a/ge/common/dump/dump_properties.h +++ b/ge/common/dump/dump_properties.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,7 +65,7 @@ class DumpProperties { const std::string &GetDumpStatus() const; - void SetDumpOpSwitch(const std::string dump_op_switch); + void SetDumpOpSwitch(const std::string &dump_op_switch); const std::string &GetDumpOpSwitch() const; @@ -77,9 +77,9 @@ class DumpProperties { uint32_t GetOpDebugMode() const { return op_debug_mode_; } - const std::string &GetEnableDump() const {return enable_dump_;} + const std::string &GetEnableDump() const { return enable_dump_; } - const std::string &GetEnableDumpDebug() const {return enable_dump_debug_;} + const std::string &GetEnableDumpDebug() const { return enable_dump_debug_; } private: void CopyFrom(const DumpProperties &other); @@ -99,6 +99,6 @@ class DumpProperties { bool is_op_debug_ = false; uint32_t op_debug_mode_ = 0; }; -} +} // namespace ge -#endif //GE_COMMON_DUMP_DUMP_PROPERTIES_H_ \ No newline at end of file +#endif // GE_COMMON_DUMP_DUMP_PROPERTIES_H_ \ No newline at end of file diff --git a/ge/common/dump/dump_server.cc b/ge/common/dump/dump_server.cc index 44e0f872..1f95dc3a 100644 --- a/ge/common/dump/dump_server.cc +++ b/ge/common/dump/dump_server.cc @@ -1,5 +1,5 @@ /** -* Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fmk_error_codes.cc b/ge/common/fmk_error_codes.cc index ddb8089d..3ad8503a 100755 --- a/ge/common/fmk_error_codes.cc +++ b/ge/common/fmk_error_codes.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/datatype_transfer.cc b/ge/common/formats/format_transfers/datatype_transfer.cc index 725eed98..a603b2f4 100644 --- a/ge/common/formats/format_transfers/datatype_transfer.cc +++ b/ge/common/formats/format_transfers/datatype_transfer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,8 +89,8 @@ Status TransDataSrc2Fp16(const CastArgs &args, uint8_t *dst, const size_t data_s } Status CastKernel(const CastArgs &args, uint8_t *dst, const size_t data_size, const DataTypeTransMode trans_mode) { - static std::map> - transfer_handle = { + static std::map> transfer_handle = + { {kTransferWithDatatypeFloatToFloat16, TransDataSrc2Fp16}, {kTransferWithDatatypeFloatToInt32, TransDataSrc2Dst}, {kTransferWithDatatypeFloat16ToFloat, TransDataSrc2Dst}, @@ -107,7 +107,7 @@ Status CastKernel(const CastArgs &args, uint8_t *dst, const size_t data_size, co {kTransferWithDatatypeInt32ToInt64, TransDataSrc2Dst}, {kTransferWithDatatypeInt32ToDouble, TransDataSrc2Dst}, {kTransferWithDatatypeDoubleToInt32, TransDataSrc2Dst}, - }; + }; auto it = transfer_handle.find(trans_mode); if (it == transfer_handle.end()) { return UNSUPPORTED; diff --git a/ge/common/formats/format_transfers/datatype_transfer.h b/ge/common/formats/format_transfers/datatype_transfer.h index 22313e90..4d93fd6c 100755 --- a/ge/common/formats/format_transfers/datatype_transfer.h +++ b/ge/common/formats/format_transfers/datatype_transfer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc index 12d13e44..40dc749d 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -105,8 +105,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size auto dst_offset = dst_idx * size; // The memcpy_s/memset_s argument `dstMax` must be less than 2G auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h index 8ff704eb..d2156018 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc index 4060a3b2..76d8696a 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h" #include diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h index 6a31a746..41581dec 100755 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWCN_FRACTAL_Z_3D_H_ #define GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWCN_FRACTAL_Z_3D_H_ diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc index 457469c7..9de2e3a0 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h" #include diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h index 728cfbdc..1c4986b8 100755 --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWNC_FRACTAL_Z_3D_TRANSPOSE_H_ #define GE_COMMON_FORMATS_FORMAT_TRANSFERS_FORMAT_TRANSFER_DHWNC_FRACTAL_Z_3D_TRANSPOSE_H_ diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index cb4de6b5..65798f29 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -154,8 +154,9 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con for (int64_t w1_idx = 0; w1_idx < num_w1; w1_idx++) { auto dst_offset = (h1h0_head + w1_idx * h1h0w0) * size; auto src_offset = (src_h_head + w1_idx * w0) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? - dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -168,8 +169,9 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con auto src_w_idx = w1_head + w0_idx; auto dst_offset = (h1h0_head + num_w1 * h1h0w0 + w0_idx) * size; auto src_offset = (src_h_head + src_w_idx) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? - dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -225,8 +227,9 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con for (int64_t w1_idx = 0; w1_idx < num_w1; w1_idx++) { auto src_offset = (h1h0_head + w1_idx * h1h0w0) * size; auto dst_offset = (dst_h_head + w1_idx * w0) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? - dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -239,8 +242,9 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con auto dst_w_idx = w1_head + w0_idx; auto src_offset = (h1h0_head + num_w1 * h1h0w0 + w0_idx) * size; auto dst_offset = (dst_h_head + dst_w_idx) * size; - auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? - dst_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.h b/ge/common/formats/format_transfers/format_transfer_fractal_nz.h index 68abdbc8..49e82884 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc index 0e941486..f2ec29da 100644 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,11 +124,11 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { std::shared_ptr dst(new (std::nothrow) uint8_t[dst_size], std::default_delete()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - dst == nullptr, - GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); - return OUT_OF_MEMORY;); + dst == nullptr, + GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", + TypeUtils::FormatToSerialString(args.src_format).c_str(), + TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); + return OUT_OF_MEMORY;); for (int64_t vfi = 0; vfi < vf_cnt; vfi++) { // vertical fractal matrix base index @@ -152,8 +152,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) { auto idx = gfi * fractal_ele_cnt + col * c0 + row; auto offset = idx * size; auto protected_size = dst_size - offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - offset + : static_cast(SECUREC_MEM_MAX_LEN); errno_t ret = EOK; if (need_pad_zero) { ret = memset_s(dst.get() + offset, static_cast(protected_size), 0, static_cast(size)); @@ -209,11 +209,11 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { std::shared_ptr dst(new (std::nothrow) uint8_t[dst_size], std::default_delete()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - dst == nullptr, - GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); - return OUT_OF_MEMORY;); + dst == nullptr, + GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", + TypeUtils::FormatToSerialString(args.src_format).c_str(), + TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); + return OUT_OF_MEMORY;); for (int64_t c1i = 0; c1i < c1; c1i++) { for (int64_t hi = 0; hi < h; hi++) { @@ -223,8 +223,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) { int64_t dst_idx = c1i * hwn1n0c0 + hi * wn1n0c0 + wi * n1n0c0 + n1n0i * c0 + c0i; int64_t dst_offset = dst_idx * data_size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto pad_zero = ((c1i * c0 + c0i) >= c) || (n1n0i >= n); errno_t ret = EOK; if (pad_zero) { @@ -284,11 +284,11 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { std::shared_ptr dst(new (std::nothrow) uint8_t[dst_size], std::default_delete()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - dst == nullptr, - GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", - TypeUtils::FormatToSerialString(args.src_format).c_str(), - TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); - return OUT_OF_MEMORY;); + dst == nullptr, + GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld", + TypeUtils::FormatToSerialString(args.src_format).c_str(), + TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size); + return OUT_OF_MEMORY;); for (int64_t c1i = 0; c1i < c1; c1i++) { for (int64_t hi = 0; hi < h; hi++) { @@ -298,8 +298,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) { int64_t dst_idx = c1i * hwn1n0c0 + hi * wn1n0c0 + wi * n1n0c0 + n1n0i * c0 + c0i; int64_t dst_offset = dst_idx * data_size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto pad_zero = ((c1i * c0 + c0i) >= c) || (n1n0i >= n); errno_t ret = EOK; if (pad_zero) { diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.h b/ge/common/formats/format_transfers/format_transfer_fractal_z.h index d640eb60..5ae83303 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc index 009bce2b..d5507765 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,8 +158,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + w1_idx * w0) * size; auto dst_offset = (h0_head + w1_idx * h0w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -174,8 +174,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con auto src_offset = (src_h_head + src_w_idx) * size; auto dst_offset = (w0_head + w0_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -236,8 +236,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto src_offset = (h0_head + w1_idx * h0w0) * size; auto dst_offset = (dst_h_head + w1_idx * w0) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size * w0)); if (ret != EOK) { @@ -252,8 +252,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con auto dst_w_idx = w1_head + w0_idx; auto dst_offset = (dst_h_head + dst_w_idx) * size; auto protected_size = dst_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_zz.h b/ge/common/formats/format_transfers/format_transfer_fractal_zz.h index c1898e5b..93f40920 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_zz.h +++ b/ge/common/formats/format_transfers/format_transfer_fractal_zz.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc index 2076f6f9..b0eebcfa 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,8 +104,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t src_idx = c1_idx * hwncc0 + h_idx * wncc0 + w_idx * ncc0 + nc_idx * c0 + c0_idx; auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; - auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? - total_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -139,7 +140,7 @@ Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult & } GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, - ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); + ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); return PARAM_INVALID; } GELOGD("Begin to trans format from FracZ to HWCN, src shape %s, data type %s, dst shape %s, memory size %ld", diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h index 4cc393d3..a7efbfcb 100644 --- a/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_hwcn.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc index 042559ca..9f8d9e39 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,8 +104,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t src_idx = c1_idx * hwncc0 + h_idx * wncc0 + w_idx * ncc0 + nc_idx * c0 + c0_idx; auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; - auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? - total_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -139,7 +140,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & } GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, - ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); + ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); return PARAM_INVALID; } GELOGD("Begin to trans format from FracZ to NCHW, src shape %s, data type %s, dst shape %s, memory size %ld", diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h index 9b22a7e0..af2cedd0 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc index 98bd1807..9a1e5f3b 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,8 +104,9 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size int64_t src_idx = c1_idx * hwncc0 + h_idx * wncc0 + w_idx * ncc0 + nc_idx * c0 + c0_idx; auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; - auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) ? - total_size - dst_offset : static_cast(SECUREC_MEM_MAX_LEN); + auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { @@ -138,7 +139,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult & } GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size, - ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); + ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str()); return PARAM_INVALID; } GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld", diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h index efeb2506..41654304 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nhwc.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc index d2f8cf30..7101256a 100755 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,8 +122,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t dst_idx = c0_idx + co_head_addr; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); int64_t c_idx = c0_idx + c1_idx * c0; int64_t src_idx = h_idx * wcn + w_idx * cn + c_idx * n + n_idx; auto src_offset = src_idx * size; @@ -141,7 +141,7 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in } } else { auto ret = - memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); + memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); if (ret != EOK) { GELOGE(INTERNAL_ERROR, "Failed to set to 0 to C1HWNCoC0[%ld, %ld, %ld, %ld, %ld, %ld] offset %ld, " diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h index 079cb449..81d7358e 100644 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc index 31744d86..57ab1266 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,8 +102,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h index 453c843e..6d599933 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc index ee3f9917..e68e54de 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,8 +102,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in auto src_offset = src_idx * size; auto dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset, static_cast(protected_size), args.data + src_offset, static_cast(size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h index 8b456019..8ff60bb1 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc index 6f065fc5..2039da47 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h index d9a3490c..4a0fce95 100755 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc index ebc15da7..b4e92cbc 100755 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -115,8 +115,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t dst_index = c0_idx + w_head_addr; int64_t dst_offset = dst_index * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); int64_t cIdx = c0_idx + c1_idx * c0; int64_t srcIdx = n_idx * chw + cIdx * hw + h_idx * w + w_idx; auto src_offset = srcIdx * size; @@ -133,7 +133,7 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in } } else { auto ret = - memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); + memset_s(dst.get() + dst_offset, static_cast(protected_size), 0, static_cast(size)); if (ret != EOK) { GELOGE(INTERNAL_ERROR, "Failed to set to 0 to " @@ -173,10 +173,10 @@ Status FormatTransferNchwNc1hwc0::TransFormat(const TransArgs &args, TransResult return PARAM_INVALID; } GELOGD( - "Begin to trans format from NCHW to NC1HWC0, src shape %s, data type " - "%s, dst shape %s memory size %ld", - ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), - ShapeToString(args.dst_shape).c_str(), total_size); + "Begin to trans format from NCHW to NC1HWC0, src shape %s, data type " + "%s, dst shape %s memory size %ld", + ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), + ShapeToString(args.dst_shape).c_str(), total_size); if (GetDstDataAfterTrans(args, result, size, total_size) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to get data after trans, src shape %s, data type %s, dst shape %s, memory size %ld", ShapeToString(args.src_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str(), diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h index dd31574d..c6269579 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc index 3ae7a924..a5be94ff 100755 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -119,8 +119,8 @@ Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const in int64_t dst_idx = c0_idx + w_head_addr; int64_t dst_offset = dst_idx * size; auto protected_size = total_size - dst_offset < static_cast(SECUREC_MEM_MAX_LEN) - ? total_size - dst_offset - : static_cast(SECUREC_MEM_MAX_LEN); + ? total_size - dst_offset + : static_cast(SECUREC_MEM_MAX_LEN); int64_t c_idx = c0_idx + c1_idx * c0; int64_t src_idx = n_idx * hwc + h_idx * wc + w_idx * c + c_idx; auto src_offset = src_idx * size; diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h index 47c0d50e..fb190f54 100755 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc index 19f54040..3be4d67d 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,22 +28,22 @@ namespace ge { namespace formats { namespace { std::map>> perm_args{ - {FORMAT_NCHW, - {{FORMAT_NHWC, std::vector({0, 2, 3, 1})}, - {FORMAT_HWCN, std::vector({2, 3, 1, 0})}, - {FORMAT_CHWN, std::vector({1, 2, 3, 0})}}}, - {FORMAT_NHWC, - {{FORMAT_NCHW, std::vector({0, 3, 1, 2})}, - {FORMAT_CHWN, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 3, 0})}}}, - {FORMAT_HWCN, - {{FORMAT_NCHW, std::vector({3, 2, 0, 1})}, - {FORMAT_NHWC, std::vector({3, 0, 1, 2})}, - {FORMAT_CHWN, std::vector({2, 0, 1, 3})}}}, - {FORMAT_CHWN, - {{FORMAT_NCHW, std::vector({3, 0, 1, 2})}, - {FORMAT_NHWC, std::vector({3, 1, 2, 0})}, - {FORMAT_HWCN, std::vector({1, 2, 0, 3})}}}, + {FORMAT_NCHW, + {{FORMAT_NHWC, std::vector({0, 2, 3, 1})}, + {FORMAT_HWCN, std::vector({2, 3, 1, 0})}, + {FORMAT_CHWN, std::vector({1, 2, 3, 0})}}}, + {FORMAT_NHWC, + {{FORMAT_NCHW, std::vector({0, 3, 1, 2})}, + {FORMAT_CHWN, std::vector({3, 1, 2, 0})}, + {FORMAT_HWCN, std::vector({1, 2, 3, 0})}}}, + {FORMAT_HWCN, + {{FORMAT_NCHW, std::vector({3, 2, 0, 1})}, + {FORMAT_NHWC, std::vector({3, 0, 1, 2})}, + {FORMAT_CHWN, std::vector({2, 0, 1, 3})}}}, + {FORMAT_CHWN, + {{FORMAT_NCHW, std::vector({3, 0, 1, 2})}, + {FORMAT_NHWC, std::vector({3, 1, 2, 0})}, + {FORMAT_HWCN, std::vector({1, 2, 0, 3})}}}, }; bool IsShapeArgValid(const std::vector &src_shape, const std::vector &perm_arg) { @@ -163,8 +163,8 @@ Status Transpose(const uint8_t *src, const std::vector &src_shape, Data auto src_offset = GenOffset(src_heads, dst_indexes) * data_size; auto dst_offset_bytes = dst_index * data_size; auto protected_size = dst_size - dst_offset_bytes < static_cast(SECUREC_MEM_MAX_LEN) - ? dst_size - dst_offset_bytes - : static_cast(SECUREC_MEM_MAX_LEN); + ? dst_size - dst_offset_bytes + : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(dst.get() + dst_offset_bytes, static_cast(protected_size), src + src_offset, static_cast(data_size)); if (ret != EOK) { diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.h b/ge/common/formats/format_transfers/format_transfer_transpose.h index 7fa19ff0..0e84ef8c 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.h +++ b/ge/common/formats/format_transfers/format_transfer_transpose.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,6 @@ Status TransposeWithShapeCheck(const uint8_t *src, const std::vector &s Status GetPermByForamt(Format src_format, Format dst_format, std::vector &perm); - class FormatTransferTranspose : public FormatTransfer { public: Status TransFormat(const TransArgs &args, TransResult &result) override; diff --git a/ge/common/formats/formats.cc b/ge/common/formats/formats.cc index 697e16ad..d01d055b 100755 --- a/ge/common/formats/formats.cc +++ b/ge/common/formats/formats.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,8 +51,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransFormat(const TransArg GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_format, const std::vector &src_shape, - DataType data_type, - Format dst_format, + DataType data_type, Format dst_format, std::vector &dst_shape) { formats::TransArgs args; args.src_format = src_format; diff --git a/ge/common/formats/formats.h b/ge/common/formats/formats.h index 52ae84ad..b58c67aa 100644 --- a/ge/common/formats/formats.h +++ b/ge/common/formats/formats.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,8 +36,8 @@ namespace formats { */ Status TransFormat(const TransArgs &args, TransResult &result); -Status TransShape(Format src_format, const std::vector &src_shape, DataType data_type, - Format dst_format, std::vector &dst_shape); +Status TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, + std::vector &dst_shape); Status TransDataType(const CastArgs &args, TransResult &result); diff --git a/ge/common/formats/utils/formats_definitions.h b/ge/common/formats/utils/formats_definitions.h index 7f873f1b..2faa60e1 100755 --- a/ge/common/formats/utils/formats_definitions.h +++ b/ge/common/formats/utils/formats_definitions.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,38 +23,13 @@ static const int kCubeSize = 16; static const int kNiSize = 16; static const int64_t kShapeItemNumMAX = 1024UL * 1024UL * 1024UL * 1024UL; -enum NchwDimIndex { - kNchwN, - kNchwC, - kNchwH, - kNchwW, - kNchwDimsNum -}; +enum NchwDimIndex { kNchwN, kNchwC, kNchwH, kNchwW, kNchwDimsNum }; -enum NhwcDimIndex { - kNhwcN, - kNhwcH, - kNhwcW, - kNhwcC, - kNhwcDimsNum -}; +enum NhwcDimIndex { kNhwcN, kNhwcH, kNhwcW, kNhwcC, kNhwcDimsNum }; -enum HwcnDimIndex { - kHwcnH, - kHwcnW, - kHwcnC, - kHwcnN, - kHwcnDimsNum -}; +enum HwcnDimIndex { kHwcnH, kHwcnW, kHwcnC, kHwcnN, kHwcnDimsNum }; -enum Nc1hwc0DimIndex { - kNc1hwc0N, - kNc1hwc0C1, - kNc1hwc0H, - kNc1hwc0W, - kNc1hwc0C0, - kNc1hwc0DimsNum -}; +enum Nc1hwc0DimIndex { kNc1hwc0N, kNc1hwc0C1, kNc1hwc0H, kNc1hwc0W, kNc1hwc0C0, kNc1hwc0DimsNum }; enum C1hwncoc0DimIndex { kC1hwncoc0C1, @@ -66,31 +41,11 @@ enum C1hwncoc0DimIndex { kC1hwncoc0DimsNum }; -enum FracZDimIndex { - kFracZHWC1, - kFracZN0, - kFracZNi, - kFracZC0, - kFracZDimsNum -}; +enum FracZDimIndex { kFracZHWC1, kFracZN0, kFracZNi, kFracZC0, kFracZDimsNum }; -enum DhwcnDimIndex { - kDhwcnD, - kDhwcnH, - kDhwcnW, - kDhwcnC, - kDhwcnN, - kDhwcnDimsNum -}; +enum DhwcnDimIndex { kDhwcnD, kDhwcnH, kDhwcnW, kDhwcnC, kDhwcnN, kDhwcnDimsNum }; -enum DhwncDimIndex { - kDhwncD, - kDhwncH, - kDhwncW, - kDhwncN, - kDhwncC, - kDhwncDimsNum -}; +enum DhwncDimIndex { kDhwncD, kDhwncH, kDhwncW, kDhwncN, kDhwncC, kDhwncDimsNum }; } // namespace formats } // namespace ge #endif // GE_COMMON_FORMATS_UTILS_FORMATS_DEFINITIONS_H_ diff --git a/ge/common/formats/utils/formats_trans_utils.cc b/ge/common/formats/utils/formats_trans_utils.cc index e184a866..23da0f74 100755 --- a/ge/common/formats/utils/formats_trans_utils.cc +++ b/ge/common/formats/utils/formats_trans_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/formats/utils/formats_trans_utils.h b/ge/common/formats/utils/formats_trans_utils.h index a480b814..8b6f0604 100755 --- a/ge/common/formats/utils/formats_trans_utils.h +++ b/ge/common/formats/utils/formats_trans_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fp16_t.cc b/ge/common/fp16_t.cc index 2f94323d..7b111e63 100755 --- a/ge/common/fp16_t.cc +++ b/ge/common/fp16_t.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/fp16_t.h b/ge/common/fp16_t.h index 0c5cd17b..0fda2cd2 100755 --- a/ge/common/fp16_t.h +++ b/ge/common/fp16_t.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/datatype_util.cc b/ge/common/ge/datatype_util.cc index 15234768..79a473fe 100755 --- a/ge/common/ge/datatype_util.cc +++ b/ge/common/ge/datatype_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,46 +22,46 @@ namespace { const std::vector kEmptyDatatypeVector; std::map> g_translatable_data_type = { - // key:src datatype, value:dst datatype - {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}, - {ge::DT_BOOL, {ge::DT_INT32}}, - {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, - {ge::DT_INT64, {ge::DT_INT32}}}; + // key:src datatype, value:dst datatype + {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}, + {ge::DT_BOOL, {ge::DT_INT32}}, + {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, + {ge::DT_INT64, {ge::DT_INT32}}}; std::map> g_reverse_translatable_data_type = { - // key:dst datatype,value:src datatype - {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, - {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, - {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; + // key:dst datatype,value:src datatype + {ge::DT_FLOAT16, {ge::DT_FLOAT, ge::DT_FLOAT16}}, + {ge::DT_INT32, {ge::DT_BOOL, ge::DT_INT64}}, + {ge::DT_FLOAT, {ge::DT_FLOAT16, ge::DT_FLOAT}}}; std::map g_dump_data_type_map = { - // key:ge datatype,value:proto datatype - {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, - {ge::DT_FLOAT, ge::proto::DT_FLOAT}, - {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, - {ge::DT_INT8, ge::proto::DT_INT8}, - {ge::DT_UINT8, ge::proto::DT_UINT8}, - {ge::DT_INT16, ge::proto::DT_INT16}, - {ge::DT_UINT16, ge::proto::DT_UINT16}, - {ge::DT_INT32, ge::proto::DT_INT32}, - {ge::DT_INT64, ge::proto::DT_INT64}, - {ge::DT_UINT32, ge::proto::DT_UINT32}, - {ge::DT_UINT64, ge::proto::DT_UINT64}, - {ge::DT_BOOL, ge::proto::DT_BOOL}, - {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, - {ge::DT_DUAL, ge::proto::DT_DUAL}, - {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, - {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, - {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, - {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, - {ge::DT_QINT8, ge::proto::DT_QINT8}, - {ge::DT_QINT16, ge::proto::DT_QINT16}, - {ge::DT_QINT32, ge::proto::DT_QINT32}, - {ge::DT_QUINT8, ge::proto::DT_QUINT8}, - {ge::DT_QUINT16, ge::proto::DT_QUINT16}, - {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, - {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, - {ge::DT_STRING, ge::proto::DT_STRING}, + // key:ge datatype,value:proto datatype + {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, + {ge::DT_FLOAT, ge::proto::DT_FLOAT}, + {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, + {ge::DT_INT8, ge::proto::DT_INT8}, + {ge::DT_UINT8, ge::proto::DT_UINT8}, + {ge::DT_INT16, ge::proto::DT_INT16}, + {ge::DT_UINT16, ge::proto::DT_UINT16}, + {ge::DT_INT32, ge::proto::DT_INT32}, + {ge::DT_INT64, ge::proto::DT_INT64}, + {ge::DT_UINT32, ge::proto::DT_UINT32}, + {ge::DT_UINT64, ge::proto::DT_UINT64}, + {ge::DT_BOOL, ge::proto::DT_BOOL}, + {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, + {ge::DT_DUAL, ge::proto::DT_DUAL}, + {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, + {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, + {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, + {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, + {ge::DT_QINT8, ge::proto::DT_QINT8}, + {ge::DT_QINT16, ge::proto::DT_QINT16}, + {ge::DT_QINT32, ge::proto::DT_QINT32}, + {ge::DT_QUINT8, ge::proto::DT_QUINT8}, + {ge::DT_QUINT16, ge::proto::DT_QUINT16}, + {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, + {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, + {ge::DT_STRING, ge::proto::DT_STRING}, }; } // namespace diff --git a/ge/common/ge/datatype_util.h b/ge/common/ge/datatype_util.h index e42b25a7..480b35e7 100644 --- a/ge/common/ge/datatype_util.h +++ b/ge/common/ge/datatype_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ static const int32_t kGeSizeUint16 = sizeof(uint16_t); static const int32_t kGeSizeUint32 = sizeof(uint32_t); static std::map CONST_OPDATA_TYPE_SIZE_MAP = { - {ge::DT_FLOAT, kGeSizeFloat}, {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8}, - {ge::DT_INT16, kGeSizeInt16}, {ge::DT_INT32, kGeSizeInt32}, {ge::DT_INT64, kGeSizeInt64}, - {ge::DT_UINT8, kGeSizeUint8}, {ge::DT_UINT16, kGeSizeUint16}, {ge::DT_UINT32, kGeSizeUint32}, - {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble}, {ge::DT_BOOL, kGeSizeBool}}; + {ge::DT_FLOAT, kGeSizeFloat}, {ge::DT_FLOAT16, kGeSizeHalfFloat}, {ge::DT_INT8, kGeSizeInt8}, + {ge::DT_INT16, kGeSizeInt16}, {ge::DT_INT32, kGeSizeInt32}, {ge::DT_INT64, kGeSizeInt64}, + {ge::DT_UINT8, kGeSizeUint8}, {ge::DT_UINT16, kGeSizeUint16}, {ge::DT_UINT32, kGeSizeUint32}, + {ge::DT_UINT64, kGeSizeUint64}, {ge::DT_DOUBLE, kGeSizeDouble}, {ge::DT_BOOL, kGeSizeBool}}; class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY DataTypeUtil { public: diff --git a/ge/common/ge/ge_util.h b/ge/common/ge/ge_util.h index 52e7c370..c6319bd3 100644 --- a/ge/common/ge/ge_util.h +++ b/ge/common/ge/ge_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/op_tiling_manager.cc b/ge/common/ge/op_tiling_manager.cc index 251634e2..ec43ab2e 100644 --- a/ge/common/ge/op_tiling_manager.cc +++ b/ge/common/ge/op_tiling_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,8 @@ std::string OpTilingManager::GetPath() { if (opp_path_env != nullptr) { char resolved_path[PATH_MAX]; if (realpath(opp_path_env, resolved_path) == NULL) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19024", {"env", "value", "situation"}, {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); + ErrorManager::GetInstance().ATCReportErrMessage("E19024", {"env", "value", "situation"}, + {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env); return std::string(); } diff --git a/ge/common/ge/op_tiling_manager.h b/ge/common/ge/op_tiling_manager.h index d4e7f34e..320e1411 100644 --- a/ge/common/ge/op_tiling_manager.h +++ b/ge/common/ge/op_tiling_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,4 +36,3 @@ class OpTilingManager { } // namespace ge #endif // GE_COMMON_GE_OP_TILING_MANAGER_H_ - diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index 57d51223..c56b2a2a 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,13 +93,15 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec std::vector path_vec; SplitPath(path, path_vec); for (const auto &single_path : path_vec) { - GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, - "The shared library file path is too long!"); + GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, + GELOGE(GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!"); continue); // load break when number of loaded so reach maximum if (num_of_loaded_so >= kMaxNumOfSo) { - GELOGW("The number of dynamic libraries loaded exceeds the kMaxNumOfSo," - " and only the first %d shared libraries will be loaded.", kMaxNumOfSo); + GELOGW( + "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," + " and only the first %d shared libraries will be loaded.", + kMaxNumOfSo); break; } @@ -180,9 +182,9 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded // load continue if the total size of so reaches maximum when it is loaded if (size_of_loaded_so + file_size > kMaxSizeOfLoadedSo) { GELOGW( - "%s is skipped because the size of loaded share library reaches maximum if it is loaded! " - "(size: %ldB, size of loaded share library: %ldB, maximum: %dB)", - file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo); + "%s is skipped because the size of loaded share library reaches maximum if it is loaded! " + "(size: %ldB, size of loaded share library: %ldB, maximum: %dB)", + file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo); return FAILED; } @@ -229,8 +231,10 @@ Status PluginManager::Load(const string &path, const vector &func_check_ // load break when number of loaded so reach maximum if (num_of_loaded_so >= kMaxNumOfSo) { - GELOGW("The number of dynamic libraries loaded exceeds the kMaxNumOfSo," - " and only the first %d shared libraries will be loaded.", kMaxNumOfSo); + GELOGW( + "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," + " and only the first %d shared libraries will be loaded.", + kMaxNumOfSo); break; } diff --git a/ge/common/ge/plugin_manager.h b/ge/common/ge/plugin_manager.h index 903367a3..b35a631a 100755 --- a/ge/common/ge/plugin_manager.h +++ b/ge/common/ge/plugin_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 92da8e14..8a594cb9 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,6 +94,13 @@ void TBEPluginManager::ProcessSoFullName(vector &file_list, string &caff full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(), caffe_parser_so_suff) == 0) { caffe_parser_path = full_name; + } else if ((full_name.size() >= aicpu_so_suff.size() && + full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == 0) || + (full_name.size() >= aicpu_host_so_suff.size() && + full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(), + aicpu_host_so_suff) == 0)) { + // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage. + domi::GetContext().aicpu_op_run_paths.push_back(full_name); } else { // Save parser so path into file_list vector file_list.push_back(full_name); @@ -186,8 +193,8 @@ void TBEPluginManager::LoadCustomOpLib() { } } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY -void TBEPluginManager::LoadPluginSo(const std::map &options) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo( + const std::map &options) { vector file_list; string caffe_parser_path; std::string plugin_path; @@ -223,10 +230,39 @@ void TBEPluginManager::LoadPluginSo(const std::map &options) { } } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY -void TBEPluginManager::InitPreparation(const std::map &options) { +Status TBEPluginManager::CheckCustomAiCpuOpLib() { + std::vector vec_op_type; + + domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); + for (size_t i = 0; i < vec_op_type.size(); i++) { + bool aicpu_so_exist = false; + std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; + for (size_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { + string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; + if (bin_file_path.size() >= ai_cpu_so_name.size() && + bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == + 0) { + aicpu_so_exist = true; + break; + } + } + if (!aicpu_so_exist) { + GELOGE(FAILED, "Can't find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); + return FAILED; + } + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::InitPreparation( + const std::map &options) { options_.insert(options.begin(), options.end()); // Load TBE plugin TBEPluginManager::Instance().LoadCustomOpLib(); + Status ret = CheckCustomAiCpuOpLib(); + if (ret != SUCCESS) { + GELOGE(ret, "Check custom aicpu run so failed!"); + return; + } } } // namespace ge diff --git a/ge/common/ge/tbe_plugin_manager.h b/ge/common/ge/tbe_plugin_manager.h index 41db8ef9..2a55e450 100755 --- a/ge/common/ge/tbe_plugin_manager.h +++ b/ge/common/ge/tbe_plugin_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,23 +32,23 @@ namespace ge { using SoHandlesVec = std::vector; -using std::vector; -using std::string; -using std::map; using std::function; +using std::map; +using std::string; +using std::vector; class TBEPluginManager { public: Status Finalize(); // Get TBEPluginManager singleton instance - static TBEPluginManager& Instance(); + static TBEPluginManager &Instance(); static string GetPath(); static void InitPreparation(const std::map &options); - void LoadPluginSo(const std::map< string, string> &options); + void LoadPluginSo(const std::map &options); private: TBEPluginManager() = default; @@ -62,6 +62,7 @@ class TBEPluginManager { static void GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path); static void GetCustomOpPath(std::string &customop_path); void LoadCustomOpLib(); + static Status CheckCustomAiCpuOpLib(); SoHandlesVec handles_vec_; static std::map options_; diff --git a/ge/common/ge_common.mk b/ge/common/ge_common.mk index efddc788..e40ef3c1 100755 --- a/ge/common/ge_common.mk +++ b/ge/common/ge_common.mk @@ -71,10 +71,7 @@ GE_COMMON_LOCAL_C_INCLUDES := \ $(TOPDIR)third_party/openssl/include/x86/include \ $(TOPDIR)framework/domi \ $(TOPDIR)framework/domi/common \ - $(TOPDIR)framework/domi/common/op \ - $(TOPDIR)graphengine/ge \ - $(TOPDIR)graphengine/ge/common \ - $(TOPDIR)graphengine/ge/common/op \ + $(TOPDIR)framework/domi/common/op #compile host libge_common include $(CLEAR_VARS) diff --git a/ge/common/helper/model_cache_helper.cc b/ge/common/helper/model_cache_helper.cc index e84e0077..b1a71b0a 100755 --- a/ge/common/helper/model_cache_helper.cc +++ b/ge/common/helper/model_cache_helper.cc @@ -1007,9 +1007,10 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { return PARAM_INVALID; } // Offset is needed by SaveVarVddr instead of logic address - ret = VarManager::Instance(session_id_)->SaveVarAddr(iter.first, tensor_addr_mgr.tensor_desc, - reinterpret_cast(reinterpret_cast(offset)), - tensor_addr_mgr.memory_type); + ret = + VarManager::Instance(session_id_) + ->SaveVarAddr(iter.first, tensor_addr_mgr.tensor_desc, + reinterpret_cast(reinterpret_cast(offset)), tensor_addr_mgr.memory_type); if (ret != SUCCESS) { GELOGW("Fail to recover VarAddr or TensorDesc of var[%s].", iter.first.c_str()); return ret; @@ -1496,6 +1497,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map(); uint64_t var_mem_size = mem_resource_json[kVarMemSize].get(); diff --git a/ge/common/helper/model_cache_helper.h b/ge/common/helper/model_cache_helper.h index 68381e96..7524b224 100755 --- a/ge/common/helper/model_cache_helper.h +++ b/ge/common/helper/model_cache_helper.h @@ -42,7 +42,7 @@ class ModelCacheHelper { ModelCacheHelper(uint64_t session_id, uint32_t graph_id, ComputeGraphPtr &compute_graph); ~ModelCacheHelper(); - Status SaveCacheInfoToCache () const; + Status SaveCacheInfoToCache() const; Status SaveVarManagerToCache(bool before_build) const; Status SaveOmModelToCache(const GeModelPtr &ge_model) const; bool IsModelCacheHit() const; @@ -97,7 +97,7 @@ class ModelCacheHelper { std::vector> &var_addr_mgr_vector, std::unordered_set &var_offset_set); static Status ParseCurVarTensorDescMapFromJson( - const Json &json, std::unordered_map &cur_var_tensor_desc_map); + const Json &json, std::unordered_map &cur_var_tensor_desc_map); static Status ParseTransRoadsFromJson(const Json &json, std::unordered_map> &trans_roads); static Status ParseChangedGraphIdFromJson(const Json &json, diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index bb4502c7..15683257 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,14 +27,13 @@ #include "graph/utils/attr_utils.h" #include "graph/utils/graph_utils.h" -using std::string; using domi::ModelTaskDef; +using std::string; namespace { const int64_t kOriginalOmPartitionNum = 1; } - namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelHelper::~ModelHelper() { (void)ReleaseLocalModelData(); } @@ -56,7 +55,7 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil item = "aicpu kernels"; } ErrorManager::GetInstance().ATCReportErrMessage("E19023", {"size", "item", "maxsize"}, - {std::to_string(size), item, std::to_string(UINT32_MAX)}); + {std::to_string(size), item, std::to_string(UINT32_MAX)}); } return PARAM_INVALID; } @@ -78,7 +77,7 @@ Status ModelHelper::SaveModelPartition(std::shared_ptr &om_fil FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file, - ModelBufferData& model) { + ModelBufferData &model) { if (output_file.empty()) { GELOGE(FAILED, "GraphBuilder SaveModel received invalid file name prefix"); return FAILED; @@ -110,19 +109,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod GELOGI("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); // weight is not necessary if (ge_model_weight.GetSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, - ModelPartitionType::WEIGHTS_DATA, - ge_model_weight.GetData(), - ge_model_weight.GetSize()), "Add weight partition failed"); + GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::WEIGHTS_DATA, + ge_model_weight.GetData(), ge_model_weight.GetSize()), + "Add weight partition failed"); } TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); GELOGI("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); if (tbe_kernel_store.DataSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, - ModelPartitionType::TBE_KERNELS, - tbe_kernel_store.Data(), - tbe_kernel_store.DataSize()), "Add tbe kernel partition failed"); + GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::TBE_KERNELS, tbe_kernel_store.Data(), + tbe_kernel_store.DataSize()), + "Add tbe kernel partition failed"); } // no need to check value, DATA->NetOutput @@ -131,10 +128,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); GELOGI("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); if (cust_aicpu_kernel_store.DataSize() > 0) { - GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, - ModelPartitionType::CUST_AICPU_KERNELS, - cust_aicpu_kernel_store.Data(), - cust_aicpu_kernel_store.DataSize()), + GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, ModelPartitionType::CUST_AICPU_KERNELS, + cust_aicpu_kernel_store.Data(), cust_aicpu_kernel_store.DataSize()), "Add cust aicpu kernel partition failed"); } @@ -459,8 +454,8 @@ Status ModelHelper::ReleaseLocalModelData() noexcept { return result; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName( - const string &file_name, string &base_name) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName(const string &file_name, + string &base_name) { GELOGD("Get base_name from file, file_name:%s", file_name.c_str()); GE_CHK_BOOL_EXEC_WARN(!file_name.empty(), return FAILED, "File path may not valid, check params --output"); size_t start_position = 0; @@ -475,8 +470,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseName return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetModelNameFromMergedGraphName( - const string &graph_name, string &model_name) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +ModelHelper::GetModelNameFromMergedGraphName(const string &graph_name, string &model_name) { GELOGD("Get model_name from graph_name, graph_name:%s", graph_name.c_str()); // this can only be used after merged graph(graph name will be append with "_x", x is index); GE_CHK_BOOL_EXEC_WARN(!graph_name.empty(), return FAILED, "File path may not valid, check params --output"); diff --git a/ge/common/helper/om_file_helper.cc b/ge/common/helper/om_file_helper.cc index 39cd7ad7..ca506731 100644 --- a/ge/common/helper/om_file_helper.cc +++ b/ge/common/helper/om_file_helper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,8 +89,8 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { // Model length too small if (model.model_len < (sizeof(ModelFileHeader) + sizeof(ModelPartitionTable))) { GELOGE(PARAM_INVALID, - "Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].", - model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable)); + "Invalid model. length[%u] < sizeof(ModelFileHeader)[%zu] + sizeof(ModelPartitionTable)[%zu].", + model.model_len, sizeof(ModelFileHeader), sizeof(ModelPartitionTable)); return PARAM_INVALID; } @@ -101,7 +101,7 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { (MODEL_FILE_MAGIC_NUM != model_header->magic)) { GELOGE(PARAM_INVALID, "Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " - "MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", + "MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", model_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, model_header->magic); return PARAM_INVALID; } diff --git a/ge/common/kernel_store.cc b/ge/common/kernel_store.cc index e339b30c..e465d184 100755 --- a/ge/common/kernel_store.cc +++ b/ge/common/kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,8 +51,8 @@ bool KernelStore::Build() { kernel_head.name_len = static_cast(kernel->GetName().length()); kernel_head.bin_len = static_cast(kernel->GetBinDataSize()); - GELOGI("get kernel bin name %s, addr %p, size %u", - kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); + GELOGI("get kernel bin name %s, addr %p, size %u", kernel->GetName().c_str(), kernel->GetBinData(), + kernel->GetBinDataSize()); mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); next_buffer += sizeof(kernel_head); diff --git a/ge/common/kernel_store.h b/ge/common/kernel_store.h index b3f4a62e..d73f26c5 100755 --- a/ge/common/kernel_store.h +++ b/ge/common/kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/fp16_math.cc b/ge/common/math/fp16_math.cc index e465c953..56183ced 100755 --- a/ge/common/math/fp16_math.cc +++ b/ge/common/math/fp16_math.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/fp16_math.h b/ge/common/math/fp16_math.h index 48559eb3..c3a4eb28 100755 --- a/ge/common/math/fp16_math.h +++ b/ge/common/math/fp16_math.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/math/math_util.h b/ge/common/math/math_util.h index 3255e3c1..e5a53d16 100755 --- a/ge/common/math/math_util.h +++ b/ge/common/math/math_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -612,295 +612,268 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) { return SUCCESS; } -#define FMK_INT_ADDCHECK(a, b) \ - if (ge::CheckIntAddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_ADDCHECK(a, b) \ + if (ge::CheckIntAddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT8_ADDCHECK(a, b) \ - if (ge::CheckInt8AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int8 %d and %d addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT8_ADDCHECK(a, b) \ + if (ge::CheckInt8AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int8 %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT16_ADDCHECK(a, b) \ - if (ge::CheckInt16AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int16 %d and %d addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT16_ADDCHECK(a, b) \ + if (ge::CheckInt16AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int16 %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_ADDCHECK(a, b) \ - if (ge::CheckInt32AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int32 %d and %d addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT32_ADDCHECK(a, b) \ + if (ge::CheckInt32AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int32 %d and %d addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_ADDCHECK(a, b) \ - if (ge::CheckInt64AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int64 %ld and %ld addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT64_ADDCHECK(a, b) \ + if (ge::CheckInt64AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int64 %ld and %ld addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT8_ADDCHECK(a, b) \ - if (ge::CheckUint8AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint8 %u and %u addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT8_ADDCHECK(a, b) \ + if (ge::CheckUint8AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint8 %u and %u addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT16_ADDCHECK(a, b) \ - if (ge::CheckUint16AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("UINT16 %u and %u addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT16_ADDCHECK(a, b) \ + if (ge::CheckUint16AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("UINT16 %u and %u addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT32_ADDCHECK(a, b) \ - if (ge::CheckUint32AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint32 %u and %u addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT32_ADDCHECK(a, b) \ + if (ge::CheckUint32AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint32 %u and %u addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT64_ADDCHECK(a, b) \ - if (ge::CheckUint64AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint64 %lu and %lu addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT64_ADDCHECK(a, b) \ + if (ge::CheckUint64AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint64 %lu and %lu addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_ADDCHECK(a, b) \ - if (ge::CheckFp16AddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Fp16 %f and %f addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_ADDCHECK(a, b) \ + if (ge::CheckFp16AddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Fp16 %f and %f addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_ADDCHECK(a, b) \ - if (ge::CheckFloatAddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Float %f and %f addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_ADDCHECK(a, b) \ + if (ge::CheckFloatAddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Float %f and %f addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_ADDCHECK(a, b) \ - if (ge::CheckDoubleAddOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Double %lf and %lf addition can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_ADDCHECK(a, b) \ + if (ge::CheckDoubleAddOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Double %lf and %lf addition can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT_SUBCHECK(a, b) \ - if (ge::CheckIntSubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_SUBCHECK(a, b) \ + if (ge::CheckIntSubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT8_SUBCHECK(a, b) \ - if (ge::CheckInt8SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int8 %d and %d subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT8_SUBCHECK(a, b) \ + if (ge::CheckInt8SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int8 %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT16_SUBCHECK(a, b) \ - if (ge::CheckInt16SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int16 %d and %d subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT16_SUBCHECK(a, b) \ + if (ge::CheckInt16SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int16 %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_SUBCHECK(a, b) \ - if (ge::CheckInt32SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int32 %d and %d subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT32_SUBCHECK(a, b) \ + if (ge::CheckInt32SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int32 %d and %d subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_SUBCHECK(a, b) \ - if (ge::CheckInt64SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int64 %ld and %ld subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT64_SUBCHECK(a, b) \ + if (ge::CheckInt64SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int64 %ld and %ld subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT8_SUBCHECK(a, b) \ - if (ge::CheckUint8SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint8 %u and %u subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT8_SUBCHECK(a, b) \ + if (ge::CheckUint8SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint8 %u and %u subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT16_SUBCHECK(a, b) \ - if (ge::CheckUint16SubOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT16_SUBCHECK(a, b) \ + if (ge::CheckUint16SubOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint16 %u and %u subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT32_SUBCHECK(a, b) \ - if (ge::CheckUint32SubOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT32_SUBCHECK(a, b) \ + if (ge::CheckUint32SubOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint32 %u and %u subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT64_SUBCHECK(a, b) \ - if (ge::CheckUint64SubOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT64_SUBCHECK(a, b) \ + if (ge::CheckUint64SubOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint64 %lu and %lu subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_SUBCHECK(a, b) \ - if (ge::CheckFp16SubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Fp16 %f and %f subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_SUBCHECK(a, b) \ + if (ge::CheckFp16SubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Fp16 %f and %f subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_SUBCHECK(a, b) \ - if (ge::CheckFloatSubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Float %f and %f subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_SUBCHECK(a, b) \ + if (ge::CheckFloatSubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Float %f and %f subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_SUBCHECK(a, b) \ - if (ge::CheckDoubleSubOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Double %lf and %lf subtraction can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_SUBCHECK(a, b) \ + if (ge::CheckDoubleSubOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Double %lf and %lf subtraction can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT_MULCHECK(a, b) \ - if (ge::CheckIntMulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_MULCHECK(a, b) \ + if (ge::CheckIntMulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d multiplication can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT8_MULCHECK(a, b) \ - if (ge::CheckInt8MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int8 %d and %d multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT8_MULCHECK(a, b) \ + if (ge::CheckInt8MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int8 %d and %d multiplication can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT16_MULCHECK(a, b) \ - if (ge::CheckInt16MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT16_MULCHECK(a, b) \ + if (ge::CheckInt16MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int16 %d and %d multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_MULCHECK(a, b) \ - if (ge::CheckInt32MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT32_MULCHECK(a, b) \ + if (ge::CheckInt32MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int32 %d and %d multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_MULCHECK(a, b) \ - if (ge::Int64MulCheckOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT64_MULCHECK(a, b) \ + if (ge::Int64MulCheckOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int64 %ld and %ld multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT8_MULCHECK(a, b) \ - if (ge::CheckUint8MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT8_MULCHECK(a, b) \ + if (ge::CheckUint8MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint8 %u and %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT16_MULCHECK(a, b) \ - if (ge::CheckUint16MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT16_MULCHECK(a, b) \ + if (ge::CheckUint16MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint16 %u and %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT32_MULCHECK(a, b) \ - if (ge::CheckUint32MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_UINT32_MULCHECK(a, b) \ + if (ge::CheckUint32MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Uint32 %u and %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_UINT64_MULCHECK(a, b) \ - if (ge::CheckUint64MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Uint64 %lu and %lu multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_UINT64_MULCHECK(a, b) \ + if (ge::CheckUint64MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Uint64 %lu and %lu multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_MULCHECK(a, b) \ - if (ge::CheckFp16MulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Fp16 %f and %f multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_MULCHECK(a, b) \ + if (ge::CheckFp16MulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Fp16 %f and %f multiplication can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_MULCHECK(a, b) \ - if (ge::CheckFloatMulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Float %f and %f multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_MULCHECK(a, b) \ + if (ge::CheckFloatMulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Float %f and %f multiplication can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_MULCHECK(a, b) \ - if (ge::CheckDoubleMulOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Double %lf and %lf multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_MULCHECK(a, b) \ + if (ge::CheckDoubleMulOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Double %lf and %lf multiplication can result in overflow!", static_cast(a), \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT_DIVCHECK(a, b) \ - if (CheckIntDivOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int %d and %d division can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT_DIVCHECK(a, b) \ + if (CheckIntDivOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int %d and %d division can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT32_DIVCHECK(a, b) \ - if (CheckInt32DivOverflow((a), (b)) != SUCCESS) { \ - GELOGW("Int32 %d and %d division can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ +#define FMK_INT32_DIVCHECK(a, b) \ + if (CheckInt32DivOverflow((a), (b)) != SUCCESS) { \ + GELOGW("Int32 %d and %d division can result in overflow!", static_cast(a), static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_INT64_UINT32_MULCHECK(a, b) \ - if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ +#define FMK_INT64_UINT32_MULCHECK(a, b) \ + if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast(a), \ - static_cast(b)); \ - return INTERNAL_ERROR; \ + static_cast(b)); \ + return INTERNAL_ERROR; \ } -#define FMK_FP16_ZEROCHECK(a) \ - if (fabs(a) < DBL_EPSILON || a < 0) { \ - GELOGW("Fp16 %f can not less than or equal to zero! ", a); \ - return INTERNAL_ERROR; \ +#define FMK_FP16_ZEROCHECK(a) \ + if (fabs(a) < DBL_EPSILON || a < 0) { \ + GELOGW("Fp16 %f can not less than or equal to zero! ", a); \ + return INTERNAL_ERROR; \ } -#define FMK_FLOAT_ZEROCHECK(a) \ - if (fabs(a) < FLT_EPSILON || a < 0) { \ - GELOGW("Float %f can not less than or equal to zero! ", a); \ - return INTERNAL_ERROR; \ +#define FMK_FLOAT_ZEROCHECK(a) \ + if (fabs(a) < FLT_EPSILON || a < 0) { \ + GELOGW("Float %f can not less than or equal to zero! ", a); \ + return INTERNAL_ERROR; \ } -#define FMK_DOUBLE_ZEROCHECK(a) \ - if (fabs(a) < DBL_EPSILON || a < 0) { \ - GELOGW("Double %lf can not less than or equal to zero! ", a); \ - return INTERNAL_ERROR; \ +#define FMK_DOUBLE_ZEROCHECK(a) \ + if (fabs(a) < DBL_EPSILON || a < 0) { \ + GELOGW("Double %lf can not less than or equal to zero! ", a); \ + return INTERNAL_ERROR; \ } } // namespace ge #endif // GE_COMMON_MATH_MATH_UTIL_H_ diff --git a/ge/common/math_util.h b/ge/common/math_util.h index 913a1572..a12be9e0 100755 --- a/ge/common/math_util.h +++ b/ge/common/math_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,13 +28,13 @@ namespace ge { /** -* @ingroup domi_calibration -* @brief Initializes an input array to a specified value -* @param [in] n array initialization length -* @param [in] alpha initialization value -* @param [out] output array to be initialized -* @return Status -*/ + * @ingroup domi_calibration + * @brief Initializes an input array to a specified value + * @param [in] n array initialization length + * @param [in] alpha initialization value + * @param [out] output array to be initialized + * @return Status + */ template Status NnSet(const int32_t n, const Dtype alpha, Dtype *output) { GE_CHECK_NOTNULL(output); diff --git a/ge/common/model_parser/base.cc b/ge/common/model_parser/base.cc index bc38cea8..3b6b9407 100644 --- a/ge/common/model_parser/base.cc +++ b/ge/common/model_parser/base.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_parser/base.h b/ge/common/model_parser/base.h index 75db8b11..22d58ace 100755 --- a/ge/common/model_parser/base.h +++ b/ge/common/model_parser/base.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/model_saver.cc b/ge/common/model_saver.cc index fb1cd0a7..821fde60 100755 --- a/ge/common/model_saver.cc +++ b/ge/common/model_saver.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,8 +69,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi // Write data to file mmSsize_t mmpa_ret = mmWrite(fd, const_cast((const void *)model_char), len); if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19004", {"file", "errmsg"}, {file_path, strerror(errno)}); + ErrorManager::GetInstance().ATCReportErrMessage("E19004", {"file", "errmsg"}, {file_path, strerror(errno)}); // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno)); ret = FAILED; diff --git a/ge/common/model_saver.h b/ge/common/model_saver.h index 6da0a78c..411d5e35 100644 --- a/ge/common/model_saver.h +++ b/ge/common/model_saver.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/op/attr_value_util.cc b/ge/common/op/attr_value_util.cc index 4315a25d..5d74aa1d 100644 --- a/ge/common/op/attr_value_util.cc +++ b/ge/common/op/attr_value_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include "framework/common/op/attr_value_util.h" #include "framework/common/debug/log.h" #include "framework/common/util.h" -#include "register/register_types.h" namespace ge { #define DEFINE_SET_ATTR_VALUE_ONE(ARG_TYPE, FIELD) \ @@ -84,30 +83,27 @@ DEFINE_SET_ATTR_VALUE_LIST(const std::string &, s); ADD_TO_ATTR_MAP(map_key, value, attr) \ } \ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttr(KEY_TYPE map_key, VALUE_TYPE value, \ - AttrDefMap *attr_map) { \ - ADD_TO_ATTR_MAP(map_key, value, attr_map) \ - } \ - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddModelAttr(KEY_TYPE map_key, VALUE_TYPE value, \ - ModelDef *model_def) { \ + AttrDefMap *attr_map){ \ + ADD_TO_ATTR_MAP(map_key, value, attr_map)} FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void \ + AddModelAttr(KEY_TYPE map_key, VALUE_TYPE value, ModelDef *model_def) { \ GE_CHECK_NOTNULL_JUST_RETURN(model_def); \ auto attr = model_def->mutable_attr(); \ ADD_TO_ATTR_MAP(map_key, value, attr) \ } -#define DEFINE_ADD_ATTR_VALUE_LIST(KEY_TYPE, VALUE_TYPE) \ - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, \ - OpDef *op_def) { \ - GE_CHECK_NOTNULL_JUST_RETURN(op_def); \ - auto attr = op_def->mutable_attr(); \ - ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ - } \ - FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, AttrDefMap *attr_map) { \ - ADD_TO_ATTR_MAP_LIST(map_key, value, attr_map) \ - } \ - FMK_FUNC_DEV_VISIBILITY void AddModelAttrList(KEY_TYPE map_key, VALUE_TYPE value, ModelDef *model_def) { \ - GE_CHECK_NOTNULL_JUST_RETURN(model_def); \ - auto attr = model_def->mutable_attr(); \ - ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ +#define DEFINE_ADD_ATTR_VALUE_LIST(KEY_TYPE, VALUE_TYPE) \ + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, \ + OpDef *op_def) { \ + GE_CHECK_NOTNULL_JUST_RETURN(op_def); \ + auto attr = op_def->mutable_attr(); \ + ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ + } \ + FMK_FUNC_DEV_VISIBILITY void AddOpAttrList(KEY_TYPE map_key, VALUE_TYPE value, AttrDefMap *attr_map){ \ + ADD_TO_ATTR_MAP_LIST(map_key, value, attr_map)} FMK_FUNC_DEV_VISIBILITY void \ + AddModelAttrList(KEY_TYPE map_key, VALUE_TYPE value, ModelDef *model_def) { \ + GE_CHECK_NOTNULL_JUST_RETURN(model_def); \ + auto attr = model_def->mutable_attr(); \ + ADD_TO_ATTR_MAP_LIST(map_key, value, attr) \ } DEFINE_ADD_ATTR_VALUE(const std::string &, const std::string &); @@ -157,16 +153,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void AddOpAttr(const std::strin return false; \ } -#define DEFINE_GET_ATTR_CONST_POINT_REF(ARG_TYPE_KEY, ARG_TYPE_VALUE, FIELD) \ - FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool GetAttrDefValue( \ - ARG_TYPE_KEY map_key, const ARG_TYPE_VALUE *&value, const AttrDefMap &attr) { \ - auto it = attr.find(map_key); \ - if (it == attr.end()) { \ - return false; \ - } \ - \ - value = &(it->second.FIELD()); \ - return true; \ +#define DEFINE_GET_ATTR_CONST_POINT_REF(ARG_TYPE_KEY, ARG_TYPE_VALUE, FIELD) \ + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool GetAttrDefValue( \ + ARG_TYPE_KEY map_key, const ARG_TYPE_VALUE *&value, const AttrDefMap &attr) { \ + auto it = attr.find(map_key); \ + if (it == attr.end()) { \ + return false; \ + } \ + \ + value = &(it->second.FIELD()); \ + return true; \ } #define DEFINE_GET_BYTES_ATTR_VALUE(ARG_TYPE_KEY, ARG_TYPE_VALUE) \ diff --git a/ge/common/op/ge_op_utils.cc b/ge/common/op/ge_op_utils.cc index 579190d6..1dc268b2 100644 --- a/ge/common/op/ge_op_utils.cc +++ b/ge/common/op/ge_op_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,6 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/op/attr_value_util.h" #include "framework/common/util.h" -#include "framework/common/types.h" #include "graph/anchor.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/op_desc_utils.h" diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 5ed95562..9492045c 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -98,7 +98,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromAclCfg( - const std::string &config) { + const std::string &config) { #ifdef DAVINCI_SUPPORT_PROFILING try { is_load_profiling_ = false; @@ -154,7 +154,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg( - const Json &features) { + const Json &features) { #ifdef DAVINCI_SUPPORT_PROFILING try { for (size_t i = 0; i < features.size(); ++i) { @@ -353,18 +353,20 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } uint64_t module = GetProfilingModule(); int32_t device_num = static_cast(device_id_.size()); - auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); + uint32_t *device_id_ptr = new (std::nothrow) uint32_t[device_num]; if (device_id_ptr == nullptr) { - GELOGE(FAILED, "Stop profiling: device id ptr is null."); + GELOGE(FAILED, "Stop profiling device id ptr is null."); return; } for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_id_[i]); } - rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr); if (rt_ret != RT_ERROR_NONE) { GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret); } + delete[] device_id_ptr; + device_id_ptr = nullptr; for (size_t i = 0; i < prof_handle_vec_.size(); ++i) { int result = ProfMgrStop(prof_handle_vec_[i]); @@ -380,7 +382,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingTaskDescInfo( - const std::vector &task_desc_info, const int32_t &device_id) { + const std::vector &task_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); if (reporter == nullptr) { @@ -395,11 +397,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin uint32_t block_dim = task.block_dim; uint32_t task_id = task.task_id; uint32_t stream_id = task.stream_id; - data = model_name.append(" ") - .append(op_name).append(" ") - .append(std::to_string(block_dim).append(" ") - .append(std::to_string(task_id)).append(" ") - .append(std::to_string(stream_id)).append("\n")); + data = model_name.append(" ").append(op_name).append(" ").append(std::to_string(block_dim) + .append(" ") + .append(std::to_string(task_id)) + .append(" ") + .append(std::to_string(stream_id)) + .append("\n")); Msprof::Engine::ReporterData reporter_data{}; reporter_data.deviceId = device_id; @@ -423,7 +426,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ProfilingGraphDescInfo( - const std::vector &compute_graph_desc_info, const int32_t &device_id) { + const std::vector &compute_graph_desc_info, const int32_t &device_id) { #ifdef DAVINCI_SUPPORT_PROFILING Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return;); @@ -431,19 +434,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin std::string data; for (const auto &graph : compute_graph_desc_info) { data.append("model_name:") - .append(graph.model_name) - .append(" op_name:") - .append(graph.op_name) - .append(" op_type:") - .append(graph.op_type); + .append(graph.model_name) + .append(" op_name:") + .append(graph.op_name) + .append(" op_type:") + .append(graph.op_type); for (size_t i = 0; i < graph.input_format.size(); ++i) { data.append(" input_id:") - .append(std::to_string(i)) - .append(" input_format:") - .append(std::to_string(graph.input_format.at(i))) - .append(" input_data_type:") - .append(std::to_string(graph.input_data_type.at(i))) - .append(" input_shape:\""); + .append(std::to_string(i)) + .append(" input_format:") + .append(std::to_string(graph.input_format.at(i))) + .append(" input_data_type:") + .append(std::to_string(graph.input_data_type.at(i))) + .append(" input_shape:\""); size_t input_shape_len = graph.input_shape.at(i).size(); if (input_shape_len == 0) { data.append(""); @@ -461,12 +464,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin for (size_t i = 0; i < graph.output_format.size(); ++i) { data.append(" output_id:") - .append(std::to_string(i)) - .append(" output_format:") - .append(std::to_string(graph.output_format.at(i))) - .append(" output_data_type:") - .append(std::to_string(graph.output_data_type.at(i))) - .append(" output_shape:\""); + .append(std::to_string(i)) + .append(" output_format:") + .append(std::to_string(graph.output_format.at(i))) + .append(" output_data_type:") + .append(std::to_string(graph.output_data_type.at(i))) + .append(" output_shape:\""); size_t output_shape_len = graph.output_shape.at(i).size(); if (output_shape_len == 0) { data.append(""); @@ -492,8 +495,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Report( - const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, - Msprof::Engine::ReporterData &reporter_data) { + const int32_t &device_id, const string &data, Msprof::Engine::Reporter &reporter, + Msprof::Engine::ReporterData &reporter_data) { #ifdef DAVINCI_SUPPORT_PROFILING size_t index = data.size() / kReportMaxLen; if (index >= 1) { @@ -535,7 +538,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( - const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { + const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { #ifdef DAVINCI_SUPPORT_PROFILING int32_t logic_device_id = 0; rtError_t rt_ret = rtGetDevice(&logic_device_id); @@ -560,22 +563,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::SetProfilingConfig( - const std::string &profiling_cfg) { + const std::string &profiling_cfg) { recv_profiling_config_ = profiling_cfg; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t ProfilingManager::GetProfilingModule() { - uint64_t module = PROF_MODEL_EXECUTE_MASK | - PROF_RUNTIME_API_MASK | - PROF_RUNTIME_TRACE_MASK | - PROF_SCHEDULE_TIMELINE_MASK | - PROF_SCHEDULE_TRACE_MASK | - PROF_TASK_TIME_MASK | - PROF_SUBTASK_TIME_MASK | - PROF_AICPU_TRACE_MASK | - PROF_AICORE_METRICS_MASK | - PROF_AIVECTORCORE_METRICS_MASK | - PROF_MODEL_LOAD_MASK; + uint64_t module = PROF_MODEL_EXECUTE_MASK | PROF_RUNTIME_API_MASK | PROF_RUNTIME_TRACE_MASK | + PROF_SCHEDULE_TIMELINE_MASK | PROF_SCHEDULE_TRACE_MASK | PROF_TASK_TIME_MASK | + PROF_SUBTASK_TIME_MASK | PROF_AICPU_TRACE_MASK | PROF_AICORE_METRICS_MASK | + PROF_AIVECTORCORE_METRICS_MASK | PROF_MODEL_LOAD_MASK; return module; } @@ -691,8 +687,8 @@ Status ProfilingManager::ProfParseDeviceId(const std::map &config_para, - int32_t &device_num, vector &device_list) { +Status ProfilingManager::ProfParseParam(const std::map &config_para, int32_t &device_num, + vector &device_list) { #ifdef DAVINCI_SUPPORT_PROFILING // device num auto iter = config_para.find(kConfigNumsdev); @@ -726,8 +722,8 @@ Status ProfilingManager::ProfParseParam(const std::map return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfStartProfiling( - uint64_t module, const std::map &config_para) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +ProfilingManager::ProfStartProfiling(uint64_t module, const std::map &config_para) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); int32_t device_num = 0; @@ -736,21 +732,23 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGE(FAILED, "Prof start parse param failed."); return FAILED; } - - auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); - if (device_id_ptr == nullptr) { - GELOGE(FAILED, "Prof start: device id ptr is null."); + auto *device_id = new (std::nothrow) uint32_t[device_num]; + if (device_id == nullptr) { + GELOGE(FAILED, "Prof start parse param failed."); return FAILED; } for (int32_t i = 0; i < device_num; i++) { - device_id_ptr[i] = static_cast(device_list[i]); + device_id[i] = static_cast(device_list[i]); } GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); - rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); + rtError_t rt_ret = rtProfilerStart(module, device_num, device_id); if (rt_ret != RT_ERROR_NONE) { + delete[] device_id; GELOGE(FAILED, "Runtime profiler config proc failed."); return FAILED; } + delete[] device_id; + device_id = nullptr; if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { @@ -768,8 +766,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfStopProfiling(uint64_t module, - const std::map &config_para) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status +ProfilingManager::ProfStopProfiling(uint64_t module, const std::map &config_para) { #ifdef DAVINCI_SUPPORT_PROFILING std::lock_guard lock(mutex_); int32_t device_num = 0; @@ -778,20 +776,23 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt GELOGE(FAILED, "Prof stop parse param failed."); return FAILED; } - auto device_id_ptr = std::unique_ptr(new (std::nothrow) uint32_t[device_num]); - if (device_id_ptr == nullptr) { - GELOGE(FAILED, "Prof stop: device id ptr is null."); + auto *device_id = new (std::nothrow) uint32_t[device_num]; + if (device_id == nullptr) { + GELOGE(FAILED, "Prof stop parse param failed."); return FAILED; } for (int32_t i = 0; i < device_num; i++) { - device_id_ptr[i] = static_cast(device_list[i]); + device_id[i] = static_cast(device_list[i]); } GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); - rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); + rtError_t rt_ret = rtProfilerStop(module, device_num, device_id); if (rt_ret != RT_ERROR_NONE) { + delete[] device_id; GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); return FAILED; } + delete[] device_id; + device_id = nullptr; uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { for (int32_t i = 0; i < device_num; i++) { @@ -811,8 +812,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt return SUCCESS; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::UpdateDeviceIdModuleMap(string prof_type, - uint64_t module, const vector &device_list) { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::UpdateDeviceIdModuleMap( + string prof_type, uint64_t module, const vector &device_list) { #ifdef DAVINCI_SUPPORT_PROFILING if (prof_type == kProfStart) { for (uint32_t i = 0; i < device_list.size(); i++) { diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 8fb59216..a030efd3 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ using std::vector; using Json = nlohmann::json; namespace { - const std::string GE_PROFILING_MODULE = "Framework"; +const std::string GE_PROFILING_MODULE = "Framework"; } // namespace namespace ge { // register Plugin @@ -83,7 +83,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { bool ProfilingTrainingTraceOn() const { return is_training_trace_; } bool ProfilingModelLoadOn() const { return is_load_profiling_; } bool ProfilingModelExecuteOn() const; - bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern + bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern int32_t GetOpTraceIterNum() const { return op_trace_iter_num_; } void ReportProfilingData(const std::vector &task_desc_info, const std::vector &compute_graph_desc_info); @@ -93,14 +93,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { void ProfilingGraphDescInfo(const std::vector &compute_graph_desc_info, const int32_t &device_id); void SetProfilingConfig(const string &profiling_cfg); - vector GetProfilingDeviceId() const { return device_id_; } + vector GetProfilingDeviceId() const { return device_id_; } void PluginUnInit(const std::string &module) const; + private: ge::Status ParseFeaturesFromAclCfg(const Json &feature); ge::Status ProfParseParam(const std::map &config_para, int32_t &device_num, vector &device_list); - ge::Status ProfParseDeviceId(const std::map &config_para, - vector &device_list); + ge::Status ProfParseDeviceId(const std::map &config_para, vector &device_list); uint64_t GetProfilingModule(); void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector &device_list); bool is_load_profiling_ = false; @@ -121,7 +121,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { string system_trace_conf_; string task_trace_conf_; const ProfilingEngineImpl engine_; - map device_id_module_map_; // key: device_id, value: profiling on module + map device_id_module_map_; // key: device_id, value: profiling on module std::mutex mutex_; }; } // namespace ge diff --git a/ge/common/properties_manager.cc b/ge/common/properties_manager.cc index 3ca5bd27..a4879460 100644 --- a/ge/common/properties_manager.cc +++ b/ge/common/properties_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,7 +135,7 @@ std::string PropertiesManager::Trim(const std::string &str) { // Get property value, if not found, return "" FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string PropertiesManager::GetPropertyValue( - const std::string &map_key) { + const std::string &map_key) { std::lock_guard lock(mutex_); auto iter = properties_map_.find(map_key); if (properties_map_.end() != iter) { @@ -166,14 +166,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::SetProp } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &PropertiesManager::GetDumpProperties( - uint64_t session_id) { + uint64_t session_id) { std::lock_guard lock(mutex_); // If session_id is not found in dump_properties_map_, operator[] will insert one. return dump_properties_map_[session_id]; } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void PropertiesManager::AddDumpProperties( - uint64_t session_id, const DumpProperties &dump_properties) { + uint64_t session_id, const DumpProperties &dump_properties) { std::lock_guard lock(mutex_); dump_properties_map_.emplace(session_id, dump_properties); } diff --git a/ge/common/properties_manager.h b/ge/common/properties_manager.h index 634113a8..9ba7f88e 100644 --- a/ge/common/properties_manager.h +++ b/ge/common/properties_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/proto/ge_ir.proto b/ge/common/proto/ge_ir.proto index e7bfe0cb..87886c84 100644 --- a/ge/common/proto/ge_ir.proto +++ b/ge/common/proto/ge_ir.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package ge.proto; diff --git a/ge/common/proto/insert_op.proto b/ge/common/proto/insert_op.proto index bf918b20..a059e122 100644 --- a/ge/common/proto/insert_op.proto +++ b/ge/common/proto/insert_op.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package domi; @@ -45,9 +61,6 @@ message AippOpParams { // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ uint32 related_input_rank = 2; - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ diff --git a/ge/common/proto/om.proto b/ge/common/proto/om.proto index e15e5f80..dd992191 100644 --- a/ge/common/proto/om.proto +++ b/ge/common/proto/om.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/common/proto/op_mapping_info.proto b/ge/common/proto/op_mapping_info.proto index e23b7ebe..7b84a115 100644 --- a/ge/common/proto/op_mapping_info.proto +++ b/ge/common/proto/op_mapping_info.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package aicpu.dump; diff --git a/ge/common/proto/task.proto b/ge/common/proto/task.proto index d0c09840..50ea061b 100644 --- a/ge/common/proto/task.proto +++ b/ge/common/proto/task.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/common/singleton.h b/ge/common/singleton.h index 314e824e..1a347bfe 100755 --- a/ge/common/singleton.h +++ b/ge/common/singleton.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_SINGLETON_H_ #define GE_COMMON_SINGLETON_H_ diff --git a/ge/common/tbe_kernel_store.cc b/ge/common/tbe_kernel_store.cc index 2fb9a04a..9acead2d 100755 --- a/ge/common/tbe_kernel_store.cc +++ b/ge/common/tbe_kernel_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,9 +20,7 @@ namespace ge { TBEKernelStore::TBEKernelStore() {} -void TBEKernelStore::AddTBEKernel(const TBEKernelPtr &kernel) { - AddKernel(kernel); -} +void TBEKernelStore::AddTBEKernel(const TBEKernelPtr &kernel) { AddKernel(kernel); } void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr &op_desc) const { if (op_desc != nullptr) { diff --git a/ge/common/tbe_kernel_store.h b/ge/common/tbe_kernel_store.h index 6304af50..ab1ab9b4 100755 --- a/ge/common/tbe_kernel_store.h +++ b/ge/common/tbe_kernel_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/thread_pool.cc b/ge/common/thread_pool.cc index dead0127..700892f2 100644 --- a/ge/common/thread_pool.cc +++ b/ge/common/thread_pool.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/thread_pool.h b/ge/common/thread_pool.h index e173618f..92157275 100755 --- a/ge/common/thread_pool.h +++ b/ge/common/thread_pool.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/common/types.cc b/ge/common/types.cc index 0d10f8b3..de293d34 100755 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,9 +49,9 @@ const std::string RTS_PROFILE_PATH = "RTS_PATH"; const std::string PROFILE_STOP_KEY = "stop"; const std::string PROFILE_STOP_VALUE = "enable"; const std::map PROFILE_COMPONENT_MAP{ - {"ome", OME_PROFILE}, - {"cce", CCE_PROFILE}, - {"runtime", RTS_PROFILE}, + {"ome", OME_PROFILE}, + {"cce", CCE_PROFILE}, + {"runtime", RTS_PROFILE}, }; const std::string PROFILE_CONFIG = "config"; @@ -384,7 +384,6 @@ REGISTER_OPTYPE_DEFINE(HCOMREDUCESCATTER, "HcomReduceScatter"); REGISTER_OPTYPE_DEFINE(HCOMSEND, "HcomSend"); REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); -REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); @@ -398,7 +397,6 @@ REGISTER_OPTYPE_DEFINE(MEMCPYASYNC, "MemcpyAsync"); REGISTER_OPTYPE_DEFINE(MEMCPYADDRASYNC, "MemcpyAddrAsync"); REGISTER_OPTYPE_DEFINE(STREAMMERGE, "StreamMerge"); REGISTER_OPTYPE_DEFINE(ENDGRAPH, "EndGraph"); -REGISTER_OPTYPE_DEFINE(MODELEXIT, "ModelExit"); REGISTER_OPTYPE_DEFINE(SEND, "Send"); REGISTER_OPTYPE_DEFINE(RECV, "Recv"); REGISTER_OPTYPE_DEFINE(ENDOFSEQUENCE, "EndOfSequence"); @@ -796,7 +794,7 @@ const uint32_t XRGB_CHN_NUM = 4; /// const bool DEFAULT_GLOBAL_POOLING = false; -const uint32_t MODEL_VERSION = 0x10000000; ///< Model version 1.0/// +const uint32_t MODEL_VERSION = 0x10000000; ///< Model version 1.0/// // Eltwise's input size const int ELTWISE_MIN_INPUT_SIZE = 2; diff --git a/ge/common/util.cc b/ge/common/util.cc index 2ddb4b2c..4adf3ebd 100644 --- a/ge/common/util.cc +++ b/ge/common/util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,7 +54,8 @@ const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. const int kWarningThreshold = 536870912 * 2; // 536870912 represent 512M /// The maximum length of the file. -const uint32_t kMaxFileSizeLimit = UINT32_MAX; // 4G for now +/// Based on the security coding specification and the current actual (protobuf) model size, it is determined as 2G-1 +const int kMaxFileSizeLimit = INT_MAX; const int kMaxBuffSize = 256; const char *const kPathValidReason = "The path can only contain 'a-z' 'A-Z' '0-9' '-' '.' '_' and chinese character"; constexpr uint32_t kMaxConfigFileByte = 10 * 1024 * 1024; @@ -117,20 +118,19 @@ long GetFileLength(const std::string &input_file) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return -1, "input_file path '%s' not valid", input_file.c_str()); unsigned long long file_length = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, - ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); - return -1, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); + mmGetFileSize(input_file.c_str(), &file_length) != EN_OK, + ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {input_file, strerror(errno)}); + return -1, "Open file[%s] failed. %s", input_file.c_str(), strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E19015", {"filepath"}, {input_file}); return -1, "File[%s] size is 0, not valid.", input_file.c_str()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_length > kMaxFileSizeLimit, - ErrorManager::GetInstance().ATCReportErrMessage( - "E19016", {"filepath", "filesize", "maxlen"}, - {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); - return -1, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, - kMaxFileSizeLimit); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage( + "E19016", {"filepath", "filesize", "maxlen"}, + {input_file, std::to_string(file_length), std::to_string(kMaxFileSizeLimit)}); + return -1, "File[%s] size %lld is out of limit: %d.", input_file.c_str(), file_length, kMaxFileSizeLimit); return static_cast(file_length); } @@ -186,7 +186,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co std::streamsize size = file.tellg(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((size <= 0), file.close(); return false, "file length <= 0, not valid."); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > static_cast(kMaxFileSizeLimit), file.close(); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(size > kMaxFileSizeLimit, file.close(); return false, "file size %ld is out of limit: %d.", size, kMaxFileSizeLimit); file.seekg(0, std::ios::beg); // [no need to check value] @@ -263,7 +263,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromText(const ch std::string real_path = RealPath(file); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), ErrorManager::GetInstance().ATCReportErrMessage( - "E19000", {"path", "errmsg"}, {file, strerror(errno)}); + "E19000", {"path", "errmsg"}, {file, strerror(errno)}); return false, "Path[%s]'s realpath is empty, errmsg[%s]", file, strerror(errno)); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetFileLength(real_path) == -1, return false, "file size not valid."); @@ -299,12 +299,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha google::protobuf::io::IstreamInputStream input(&fs); bool ret = google::protobuf::TextFormat::Parse(&input, message); GE_IF_BOOL_EXEC( - !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); + !ret, GELOGE(ret, "Call [google::protobuf::TextFormat::Parse] func ret fail, please check your text file.")); return ret; } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() { +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestap() { struct timeval tv {}; int ret = gettimeofday(&tv, nullptr); GE_LOGE_IF(ret != 0, "Func gettimeofday may failed: ret=%d", ret); @@ -348,9 +348,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - strlen(path) >= PATH_MAX, - ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); - return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); + strlen(path) >= PATH_MAX, + ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); + return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); // Nullptr is returned when the path does not exist or there is no permission // Return absolute path when path is accessible @@ -386,10 +386,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(real_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, real_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); + !ValidateStr(real_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, real_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); // The absolute path points to a file that is not readable if (access(real_path.c_str(), R_OK) != 0) { @@ -411,9 +411,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - strlen(file_path.c_str()) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage( - "E19002", {"filepath", "size"}, {file_path, std::to_string(PATH_MAX)}); - return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); + strlen(file_path.c_str()) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage( + "E19002", {"filepath", "size"}, {file_path, std::to_string(PATH_MAX)}); + return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); // A regular matching expression to verify the validity of the input file path // Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores @@ -421,10 +421,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - !ValidateStr(file_path, mode), - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, kPathValidReason}); - return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); + !ValidateStr(file_path, mode), + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {atc_param, file_path, kPathValidReason}); + return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); std::string real_path = RealPath(file_path.c_str()); // Can get absolute path (file exists) diff --git a/ge/engine_manager/dnnengine_manager.cc b/ge/engine_manager/dnnengine_manager.cc index f8d58208..3389e1b9 100644 --- a/ge/engine_manager/dnnengine_manager.cc +++ b/ge/engine_manager/dnnengine_manager.cc @@ -216,19 +216,19 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { if (kernel_info_store != kernel_map.end()) { std::string unsupported_reason; // It will be replaced by engine' checksupport - uint64_t start_time = GetCurrentTimestamp(); + uint64_t start_time = GetCurrentTimestap(); if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) { - checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; + checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; op_desc->SetOpEngineName(it.engine); op_desc->SetOpKernelLibName(kernel_name); // set attrs for taking information when load txt to graph object - (void) AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine); - (void) AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name); + (void)AttrUtils::SetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, it.engine); + (void)AttrUtils::SetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name); GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s to op_desc %s", kernel_name.c_str(), it.engine.c_str(), op_desc->GetName().c_str()); return it.engine; } else { - checksupport_cost_[kernel_name] += GetCurrentTimestamp() - start_time; + checksupport_cost_[kernel_name] += GetCurrentTimestap() - start_time; bool is_custom_op = false; if ((ge::AttrUtils::GetBool(op_desc, kCustomOpFlag, is_custom_op)) && is_custom_op) { ErrorManager::GetInstance().ATCReportErrMessage("E13001", {"kernelname", "optype", "opname"}, @@ -237,8 +237,9 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { "The custom operator registered by the user does not support the logic function delivered by this " "network. Check support failed, kernel_name is %s, op type is %s, op name is %s", kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); - std::string error_info = "The custom operator registered by the user does not support the logic function" - "delivered by this network"; + std::string error_info = + "The custom operator registered by the user does not support the logic function" + "delivered by this network"; return ""; } unsupported_reasons.emplace(kernel_name, unsupported_reason); @@ -250,9 +251,9 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { } } else { GELOGW( - "DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s," - "op type is %s, op name is %s", - kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); + "DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s," + "op type is %s, op name is %s", + kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str()); } } @@ -260,19 +261,19 @@ std::string DNNEngineManager::GetDNNEngineName(const ge::NodePtr &node_ptr) { string reason; for (const auto &it : unsupported_reasons) { reason += it.first + ":" + it.second + ";"; - ErrorManager::GetInstance().ATCReportErrMessage( - "E13002", {"optype", "opskernel", "reason"}, {op_desc->GetType(), it.first, it.second}); + ErrorManager::GetInstance().ATCReportErrMessage("E13002", {"optype", "opskernel", "reason"}, + {op_desc->GetType(), it.first, it.second}); GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "GetDNNEngineName:Op type %s of ops kernel %s is unsupported, reason:%s", op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str()); } - analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(), - analyzer::CHECKSUPPORT, node_ptr, reason}; + analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(), analyzer::CHECKSUPPORT, + node_ptr, reason}; // do not change original process (void)Analyzer::GetInstance()->DoAnalyze(analyze_info); - ErrorManager::GetInstance().ATCReportErrMessage( - "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()}); + ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"}, + {op_desc->GetName(), op_desc->GetType()}); GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "Can't find any supported ops kernel and engine of %s, type is %s", op_desc->GetName().c_str(), op_desc->GetType().c_str()); return ""; @@ -284,13 +285,13 @@ std::string DNNEngineManager::GetHostCpuEngineName(const std::vector &op if ((it.engine == kHostCpuEngineName) && (it.opKernelLib == kHostCpuOpKernelLibName)) { op_desc->SetOpEngineName(kHostCpuEngineName); op_desc->SetOpKernelLibName(kHostCpuOpKernelLibName); - GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s", - kHostCpuOpKernelLibName, kHostCpuEngineName, op_desc->GetName().c_str()); + GELOGI("DNNEngineManager: Set OpKernelLibName %s and OpEngineName %s to %s", kHostCpuOpKernelLibName, + kHostCpuEngineName, op_desc->GetName().c_str()); return kHostCpuEngineName; } } - GELOGE(FAILED, "DNNEngineManager: HostCpuEngine not support [%s, %s].", - op_desc->GetName().c_str(), op_desc->GetType().c_str()); + GELOGE(FAILED, "DNNEngineManager: HostCpuEngine not support [%s, %s].", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); return ""; } diff --git a/ge/engine_manager/engine_conf.json b/ge/engine_manager/engine_conf.json index ad43c9ab..82360562 100755 --- a/ge/engine_manager/engine_conf.json +++ b/ge/engine_manager/engine_conf.json @@ -42,13 +42,6 @@ "attach": true }, { - "id": "DNN_VM_AICPU_ASCEND", - "name": "AICPU_ASCEND", - "independent": false, - "skip_assign_stream": false, - "attach": true - }, - { "id": "DNN_HCCL", "name": "HCCL", "independent": true, diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 12b726ab..0a247142 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,6 @@ #include "single_op/single_op_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/load/new_model_manager/davinci_model.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" using std::string; using std::vector; @@ -184,8 +183,7 @@ bool IsDynamicImageSizeMatchModel(uint64_t image_height, uint64_t image_width, return false; } -bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, - const vector> &batch_info) { +bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, const vector> &batch_info) { if (batch_info.empty()) { GELOGE(ge::FAILED, "Dynamic batch info is empty."); return false; @@ -194,8 +192,8 @@ bool IsDynmaicDimsSizeMatchModel(const vector cur_dynamic_dims, bool find_match = false; for (auto resolution : batch_info) { if (cur_dynamic_dims.size() != resolution.size()) { - GELOGE(ge::FAILED, "Cur dynamic dims param num is %zu, current resolution size is %zu.", - cur_dynamic_dims.size(), resolution.size()); + GELOGE(ge::FAILED, "Cur dynamic dims param num is %zu, current resolution size is %zu.", cur_dynamic_dims.size(), + resolution.size()); return false; } bool flag = true; @@ -243,16 +241,12 @@ Status GeExecutor::Initialize() { } std::vector mem_type(1, RT_MEMORY_HBM); - mem_type.push_back(RT_MEMORY_P2P_DDR); auto ret = MemManager::Instance().Initialize(mem_type); if (ret != SUCCESS) { GELOGE(ret, "Memory Manager init failed."); return ret; } - GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), - "Failed to initialize OpsKernelBuilders"); - // Start profiling Options profiling_options; profiling_options.device_id = 0; @@ -271,8 +265,6 @@ Status GeExecutor::Finalize() { return ge::SUCCESS; } - (void) OpsKernelBuilderManager::Instance().Finalize(); - // Stop profiling if (ProfilingManager::Instance().ProfilingOn()) { ProfilingManager::Instance().StopProfiling(); @@ -290,14 +282,11 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad return PARAM_INVALID; } - uint64_t size = sizeof(uint32_t); + uint64_t size = sizeof(uint64_t); if (length < size) { GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, size); return PARAM_INVALID; } - if (length >= sizeof(uint64_t)) { - size = sizeof(uint64_t); - } // Verify whether the input dynamic batch matches the model gear std::vector> batch_info; @@ -335,15 +324,12 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad return PARAM_INVALID; } - uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint32_t); + uint64_t dynamic_input_size = kDynamicImageSizeInputSize * sizeof(uint64_t); if (length < dynamic_input_size) { GELOGE(PARAM_INVALID, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); return PARAM_INVALID; } - uint64_t size = sizeof(uint32_t); - if (length >= kDynamicImageSizeInputSize * sizeof(uint64_t)) { - size = sizeof(uint64_t); - } + // Verify whether the input dynamic resolution matches the model gear std::vector> batch_info; std::vector batch_num{image_height, image_width}; @@ -364,19 +350,18 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad GELOGE(ret, "Set dynamic size failed"); return ret; } - - // Memcpy dynamic resolution height from host to device + // Memcpy dynamic resolution height from host to device rtError_t rt_ret = - rtMemcpy(dynamic_input_addr, size, &image_height, size, RT_MEMCPY_HOST_TO_DEVICE); + rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "memcpy dynamic resolution input data failed! ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - uint64_t remain_size = length - size; + uint64_t remain_size = length - sizeof(uint64_t); // Memcpy dynamic resolution width from host to device - if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + size), - remain_size, &image_width, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { + if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + sizeof(uint64_t)), + remain_size, &image_width, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); return FAILED; } @@ -416,19 +401,16 @@ Status GeExecutor::SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, u } size_t dynamic_dim_num = cur_dynamic_dims.size(); - uint64_t dynamic_input_size = static_cast(dynamic_dim_num * sizeof(uint32_t)); + uint64_t dynamic_input_size = static_cast(dynamic_dim_num * sizeof(uint64_t)); if (length < dynamic_input_size) { GELOGE(FAILED, "Dynamic input size [%lu] is less than [%lu]!", length, dynamic_input_size); return FAILED; } - uint64_t size = sizeof(uint32_t); - if (length >= dynamic_dim_num * sizeof(uint64_t)) { - size = sizeof(uint64_t); - } for (uint32_t i = 0; i < dynamic_dim_num; ++i) { // Memcpy dynamic dim[i] from host to device - if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + size * i), - length - size * i, &cur_dynamic_dims[i], size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { + if (rtMemcpy(reinterpret_cast(reinterpret_cast(dynamic_input_addr) + sizeof(uint64_t) * i), + length - sizeof(uint64_t) * i, &cur_dynamic_dims[i], sizeof(uint64_t), + RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { GELOGE(FAILED, "memcpy dynamic resolution input data failed!"); return FAILED; } @@ -463,17 +445,17 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector & } } } - if (dynamic_dims.size() != all_data_dims.size()){ - GELOGE(FAILED, "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", - dynamic_dims.size(), all_data_dims.size()); + if (dynamic_dims.size() != all_data_dims.size()) { + GELOGE(FAILED, "Dynamic input size [%lu] is not equal with all data dims size [%lu]!", dynamic_dims.size(), + all_data_dims.size()); return FAILED; } for (std::size_t i = 0; i < all_data_dims.size(); ++i) { if (all_data_dims[i] < 0) { cur_dynamic_dims.push_back(dynamic_dims[i]); } else if (static_cast(all_data_dims[i]) != dynamic_dims[i]) { - GELOGE(PARAM_INVALID, "Static dims should be same, index: %zu value: %d should be %d", - i, dynamic_dims[i], all_data_dims[i]); + GELOGE(PARAM_INVALID, "Static dims should be same, index: %zu value: %d should be %d", i, dynamic_dims[i], + all_data_dims[i]); return PARAM_INVALID; } } @@ -510,9 +492,9 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add uint64_t real_aippParms_size = sizeof(kAippDynamicPara) - sizeof(kAippDynamicBatchPara); uint64_t struct_len = batch_num * sizeof(kAippDynamicBatchPara) + real_aippParms_size; GELOGI( - "Get acl input dynamic aipp data, model_id is %u, length is %lu," - "batch num is %lu, struct_len is %lu", - model_id, length, batch_num, struct_len); + "Get acl input dynamic aipp data, model_id is %u, length is %lu," + "batch num is %lu, struct_len is %lu", + model_id, length, batch_num, struct_len); if (struct_len > length) { GELOGE(PARAM_INVALID, "input dynamic aipp param len [%lu] is larger than aipp_data size [%lu]", struct_len, length); return PARAM_INVALID; @@ -1058,14 +1040,6 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector(rt_err)); - return RT_FAILED; - } - ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx); return SingleOpManager::GetInstance().ReleaseResource(stream); } @@ -1139,7 +1113,7 @@ Status GeExecutor::SetDump(const DumpConfig &dump_config) { GELOGE(ret, "Set dump conf failed"); return ret; } - GELOGI("Set dump config successfully"); + GELOGI("Set dump config succ."); return SUCCESS; } } // namespace ge diff --git a/ge/executor/module.mk b/ge/executor/module.mk index a543f36b..1c3efe4c 100755 --- a/ge/executor/module.mk +++ b/ge/executor/module.mk @@ -48,10 +48,8 @@ local_ge_executor_src_files := \ ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - ../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ - ../opskernel_manager/ops_kernel_builder_manager.cc \ ../single_op/single_op_manager.cc \ ../single_op/single_op_model.cc \ ../single_op/single_op.cc \ @@ -76,7 +74,6 @@ local_ge_executor_c_include := \ $(TOPDIR)inc/framework \ $(TOPDIR)inc \ $(LOCAL_PATH)/../ \ - $(TOPDIR)graphengine/ge \ $(TOPDIR)libc_sec/include \ third_party/protobuf/include \ third_party/json/include \ @@ -108,12 +105,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files) LOCAL_C_INCLUDES := $(local_ge_executor_c_include) LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) - -LOCAL_SHARED_LIBRARIES += libascend_hal - -LOCAL_STATIC_LIBRARIES := \ - libmsprofiler \ - +LOCAL_STATIC_LIBRARIES := libmsprofiler ifeq ($(device_os),android) LOCAL_LDFLAGS += -ldl LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog @@ -150,10 +142,9 @@ LOCAL_SHARED_LIBRARIES := \ libregister \ libmsprof \ liberror_manager \ - stub/libascend_hal \ + stub/libascend_hal -LOCAL_STATIC_LIBRARIES := \ - libmsprofiler \ +LOCAL_STATIC_LIBRARIES := libmsprofiler LOCAL_LDFLAGS += $(local_ge_executor_ldflags) diff --git a/ge/executor/proto/dump_task.proto b/ge/executor/proto/dump_task.proto index b1e346cd..ecdf4792 100644 --- a/ge/executor/proto/dump_task.proto +++ b/ge/executor/proto/dump_task.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package toolkit.dumpdata; diff --git a/ge/executor/proto/ge_ir.proto b/ge/executor/proto/ge_ir.proto index e7bfe0cb..87886c84 100644 --- a/ge/executor/proto/ge_ir.proto +++ b/ge/executor/proto/ge_ir.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package ge.proto; diff --git a/ge/executor/proto/insert_op.proto b/ge/executor/proto/insert_op.proto index bf918b20..a059e122 100644 --- a/ge/executor/proto/insert_op.proto +++ b/ge/executor/proto/insert_op.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package domi; @@ -45,9 +61,6 @@ message AippOpParams { // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ uint32 related_input_rank = 2; - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ diff --git a/ge/executor/proto/om.proto b/ge/executor/proto/om.proto index e15e5f80..dd992191 100644 --- a/ge/executor/proto/om.proto +++ b/ge/executor/proto/om.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/executor/proto/op_mapping_info.proto b/ge/executor/proto/op_mapping_info.proto index e23b7ebe..7b84a115 100644 --- a/ge/executor/proto/op_mapping_info.proto +++ b/ge/executor/proto/op_mapping_info.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package aicpu.dump; diff --git a/ge/executor/proto/task.proto b/ge/executor/proto/task.proto index d0c09840..50ea061b 100644 --- a/ge/executor/proto/task.proto +++ b/ge/executor/proto/task.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index 667b2898..621e42c5 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -42,7 +42,6 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ session/session_manager.cc \ engine_manager/dnnengine_manager.cc \ opskernel_manager/ops_kernel_manager.cc \ - opskernel_manager/ops_kernel_builder_manager.cc \ graph/manager/graph_manager.cc \ graph/manager/graph_manager_utils.cc \ graph/manager/graph_context.cc \ @@ -58,11 +57,9 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ graph/partition/engine_place.cc \ graph/partition/graph_partition.cc \ graph/partition/dynamic_shape_partition.cc \ - graph/partition/stage_partition.cc \ generator/ge_generator.cc \ generator/generator_api.cc \ graph/manager/graph_var_manager.cc \ - graph/manager/host_mem_manager.cc \ graph/manager/rdma_pool_allocator.cc \ graph/manager/graph_mem_allocator.cc \ graph/manager/graph_caching_allocator.cc \ @@ -109,7 +106,6 @@ OMG_HOST_SRC_FILES := \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ graph/passes/mark_graph_unknown_status_pass.cc \ - graph/passes/mark_agnostic_pass.cc \ graph/common/omg_util.cc \ graph/common/bcast.cc \ graph/common/local_context.cc \ @@ -182,7 +178,6 @@ OMG_HOST_SRC_FILES := \ graph/passes/multi_batch_pass.cc \ graph/passes/multi_batch_clone_pass.cc \ graph/passes/subexpression_migration_pass.cc \ - graph/passes/subgraph_const_migration_pass.cc \ graph/passes/unused_args_clean_pass.cc \ graph/passes/next_iteration_pass.cc \ graph/passes/control_trigger_pass.cc \ @@ -248,7 +243,6 @@ OME_HOST_SRC_FILES := \ graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ single_op/task/op_task.cc \ @@ -349,7 +343,6 @@ DEVICE_LOCAL_C_INCLUDES := \ $(TOPDIR)inc/runtime \ $(TOPDIR)ops/built-in/op_proto/inc \ $(TOPDIR)framework/domi \ - $(TOPDIR)graphengine/ge \ $(TOPDIR)toolchain/ide/ide-daemon/external \ third_party/json/include \ third_party/protobuf/include \ diff --git a/ge/ge_local_engine/common/constant/constant.h b/ge/ge_local_engine/common/constant/constant.h index 42084f2a..c517d267 100644 --- a/ge/ge_local_engine/common/constant/constant.h +++ b/ge/ge_local_engine/common/constant/constant.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/ge_local_engine.cc b/ge/ge_local_engine/engine/ge_local_engine.cc index 58f24d45..9525e81b 100755 --- a/ge/ge_local_engine/engine/ge_local_engine.cc +++ b/ge/ge_local_engine/engine/ge_local_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/ge_local_engine.h b/ge/ge_local_engine/engine/ge_local_engine.h index 65dfe65b..e5f9a24d 100644 --- a/ge/ge_local_engine/engine/ge_local_engine.h +++ b/ge/ge_local_engine/engine/ge_local_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index 36b0eca4..fc46385b 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_cpu_engine.h" #include #include "graph/common/omg_util.h" @@ -27,72 +28,70 @@ #include "common/math/math_util.h" namespace { -#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ - case (DTYPE): { \ - GeTensorPtr ge_tensor = nullptr; \ - if (need_create_flag) { \ - GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ - std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ - if (buf == nullptr) { \ - GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ - static_cast(sizeof(TYPE) * data_num)); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor = MakeShared(out_desc); \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ - if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ - GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ - ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ - outputs.emplace_back(ge_tensor); \ - } else { \ - ge_tensor = outputs[i]; \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ - reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ - } \ - auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ - auto tensor_name = op_desc->GetOutputNameByIndex(i); \ - GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ - op_desc->GetName().c_str(), i); \ - GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ - op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ - named_outputs.emplace(tensor_name, tensor); \ - break; \ +#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ + case (DTYPE): { \ + GeTensorPtr ge_tensor = nullptr; \ + if (need_create_flag) { \ + int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \ + if (out_desc.GetShape().IsUnknownShape()) { \ + std::vector> range; \ + if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { \ + GELOGE(INTERNAL_ERROR, "Get shape range failed, node:%s", op_desc->GetName().c_str()); \ + return INTERNAL_ERROR; \ + } \ + int64_t max_range_size = 1; \ + for (const auto &item : range) { \ + FMK_INT64_MULCHECK(max_range_size, item.second); \ + max_range_size *= item.second; \ + } \ + num_size = max_range_size; \ + } \ + if (num_size < 0) { \ + GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed, num=%lld", op_desc->GetName().c_str(), i, \ + num_size); \ + return INTERNAL_ERROR; \ + } \ + auto data_num = static_cast(num_size); \ + GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ + std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ + if (buf == nullptr) { \ + GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ + static_cast(sizeof(TYPE) * data_num)); \ + return MEMALLOC_FAILED; \ + } \ + ge_tensor = MakeShared(out_desc); \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, \ + data_num * sizeof(TYPE)); \ + if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ + GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ + return MEMALLOC_FAILED; \ + } \ + ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ + ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ + outputs.emplace_back(ge_tensor); \ + } else { \ + ge_tensor = outputs[i]; \ + GE_CHECK_NOTNULL(ge_tensor); \ + GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ + reinterpret_cast(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ + } \ + auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ + auto tensor_name = op_desc->GetOutputNameByIndex(i); \ + GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ + op_desc->GetName().c_str(), i); \ + GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ + op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ + named_outputs.emplace(tensor_name, tensor); \ + break; \ } -} +} // namespace namespace ge { namespace { const char *kEnvKeyOppPath = "ASCEND_OPP_PATH"; const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu"; -} - -Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { - int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); - if (out_desc.GetShape().IsUnknownShape()) { - std::vector> range; - if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get shape range failed."); - return INTERNAL_ERROR; - } - int64_t max_range_size = 1; - for (const auto& item : range) { - FMK_INT64_MULCHECK(max_range_size, item.second); - max_range_size *= item.second; - } - num_size = max_range_size; - } - if (num_size < 0) { - GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size); - return INTERNAL_ERROR; - } - data_num = static_cast(num_size); - return SUCCESS; -} +} // namespace void HostCpuEngine::CloseSo() { for (auto handle : lib_handles_) { @@ -106,24 +105,22 @@ void HostCpuEngine::CloseSo() { ge::Status HostCpuEngine::Initialize() { std::lock_guard lock(mu_); if (initialized_) { - GELOGI("HostCpuEngine is already initialized"); - return SUCCESS; + GELOGI("HostCpuEngine is already initialized"); + return SUCCESS; } std::string lib_dir; GE_CHK_STATUS_RET_NOLOG(GetLibPath(lib_dir)); std::vector so_paths; if (ListSoFiles(lib_dir, so_paths) == SUCCESS) { - (void) LoadLibs(so_paths); + (void)LoadLibs(so_paths); } initialized_ = true; return SUCCESS; } -void HostCpuEngine::Finalize() { - GELOGI("start HostCpuEngine::Finalize"); -} +void HostCpuEngine::Finalize() { GELOGI("start HostCpuEngine::Finalize"); } bool HostCpuEngine::CheckSupported(const string &op_type) { return OpKernelRegistry::GetInstance().IsRegistered(op_type); @@ -145,14 +142,11 @@ Status HostCpuEngine::FindOpKernel(const ge::NodePtr &node, std::unique_ptr &inputs, +Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc, const vector &inputs, map &named_inputs) { auto num_inputs = op_desc->GetInputsSize(); if (num_inputs != inputs.size()) { - GELOGE(PARAM_INVALID, - "Mismatching input sizes. op_desc has %zu input(s), but given %zu", - num_inputs, + GELOGE(PARAM_INVALID, "Mismatching input sizes. op_desc has %zu input(s), but given %zu", num_inputs, inputs.size()); return PARAM_INVALID; } @@ -162,34 +156,26 @@ Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc, GE_CHECK_NOTNULL(ge_tensor); auto tensor = TensorAdapter::AsTensor(*ge_tensor); auto tensor_name = op_desc->GetInputNameByIndex(i); - GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), - "Failed to get input name. node = %s, index = %zu", op_desc->GetName().c_str(), i); - GELOGD("Successfully inserted input tensor. node = %s, index = %zu, input name = %s", - op_desc->GetName().c_str(), i, tensor_name.c_str()); + GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get input name. node = %s, index = %zu", + op_desc->GetName().c_str(), i); + GELOGD("Successfully inserted input tensor. node = %s, index = %zu, input name = %s", op_desc->GetName().c_str(), i, + tensor_name.c_str()); named_inputs.emplace(tensor_name, tensor); } return SUCCESS; } -Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, - vector &outputs, +Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector &outputs, map &named_outputs) { if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { - GELOGW("size of outputs not match, size of outputs = %zu, exactly output_num=%zu.", - outputs.size(), op_desc->GetOutputsSize()); + GELOGW("size of ouputs not match, size of outputs = %zu, exactly output_num=%zu.", outputs.size(), + op_desc->GetOutputsSize()); outputs.clear(); } bool need_create_flag = (outputs.size() != op_desc->GetOutputsSize()); for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { const auto &out_desc = op_desc->GetOutputDesc(i); - uint64_t data_num = 0; - if (need_create_flag) { - if (GetDataNumber(out_desc, data_num) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed", op_desc->GetName().c_str(), i); - return INTERNAL_ERROR; - } - } switch (out_desc.GetDataType()) { CREATE_OUTPUT_CASE(DT_BOOL, bool) CREATE_OUTPUT_CASE(DT_INT8, int8_t) @@ -213,8 +199,7 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, return SUCCESS; } -Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, - HostCpuOp &op_kernel, +Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_kernel, map &named_inputs, map &named_outputs) { GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str()); @@ -274,9 +259,7 @@ ge::Status HostCpuEngine::GetLibPath(std::string &lib_path) { return SUCCESS; } -static int RegularFileFilterFn(const mmDirent *entry) { - return entry->d_type == DT_REG; -} +static int RegularFileFilterFn(const mmDirent *entry) { return entry->d_type == DT_REG; } Status HostCpuEngine::ListSoFiles(const std::string &base_dir, std::vector &names) { std::string real_path = base_dir; @@ -329,7 +312,7 @@ Status HostCpuEngine::LoadLib(const std::string &lib_path) { return INTERNAL_ERROR; } - auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize"); + auto initialize = (Status(*)(const HostCpuContext &))dlsym(handle, "Initialize"); if (initialize != nullptr) { GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); if (initialize(HostCpuContext()) != SUCCESS) { @@ -352,4 +335,4 @@ Status HostCpuEngine::GetRealPath(std::string &path) { path = real_path; return SUCCESS; } -} // namespace ge +} // namespace ge diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h index cc6b578c..1987138d 100644 --- a/ge/ge_local_engine/engine/host_cpu_engine.h +++ b/ge/ge_local_engine/engine/host_cpu_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ @@ -20,7 +21,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "graph/node.h" #include "graph/operator.h" -#include "register/register.h" +#include "inc/register/register.h" namespace ge { class HostCpuEngine { diff --git a/ge/ge_local_engine/module.mk b/ge/ge_local_engine/module.mk index a0247da7..3307f780 100755 --- a/ge/ge_local_engine/module.mk +++ b/ge/ge_local_engine/module.mk @@ -8,12 +8,6 @@ local_lib_src_files := engine/ge_local_engine.cc \ ops_kernel_store/op/ge_deleted_op.cc \ ops_kernel_store/op/no_op.cc \ -ops_kernel_builder_src_files := ops_kernel_store/ge_local_ops_kernel_builder.cc \ - ops_kernel_store/op/op_factory.cc \ - ops_kernel_store/op/op.cc \ - ops_kernel_store/op/ge_deleted_op.cc \ - ops_kernel_store/op/no_op.cc \ - local_lib_inc_path := proto/task.proto \ ${LOCAL_PATH} \ ${TOPDIR}inc \ @@ -23,7 +17,6 @@ local_lib_inc_path := proto/task.proto \ ${TOPDIR}third_party/protobuf/include \ ${TOPDIR}inc/framework \ $(TOPDIR)framework/domi \ - $(TOPDIR)graphengine/ge \ #compiler for host include $(CLEAR_VARS) @@ -64,84 +57,3 @@ LOCAL_SRC_FILES := $(local_lib_src_files) LOCAL_C_INCLUDES := $(local_lib_inc_path) include ${BUILD_HOST_SHARED_LIBRARY} - -#compiler for libge_local_opskernel_builder.so -include $(CLEAR_VARS) -LOCAL_MODULE := libge_local_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ - libc_sec \ - libslog \ - libregister \ - libgraph - -LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_HOST_SHARED_LIBRARY} - - -#compiler for libge_local_opskernel_builder.so in atc -include $(CLEAR_VARS) -LOCAL_MODULE := atclib/libge_local_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ - libc_sec \ - libslog \ - libregister \ - libgraph - -LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_HOST_SHARED_LIBRARY} - -#compiler for libge_local_opskernel_builder.a -include $(CLEAR_VARS) -LOCAL_MODULE := libge_local_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := libprotobuf \ - libregister \ - libgraph \ - -LOCAL_SHARED_LIBRARIES := libc_sec \ - libslog \ - -LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_HOST_STATIC_LIBRARY} - -#compiler for device libge_local_opskernel_builder.a -include $(CLEAR_VARS) -LOCAL_MODULE := libge_local_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := libprotobuf \ - libregister \ - libgraph \ - -LOCAL_SHARED_LIBRARIES := libc_sec \ - libslog \ - -LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_STATIC_LIBRARY} diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc deleted file mode 100644 index 9496d0fc..00000000 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ge_local_ops_kernel_builder.h" -#include -#include "common/ge_inner_error_codes.h" -#include "common/ge/ge_util.h" -#include "framework/common/debug/ge_log.h" -#include "graph/utils/node_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" -#include "ge_local_engine/ops_kernel_store/op/op_factory.h" -#include "ge_local_engine/common/constant/constant.h" -#include "register/ops_kernel_builder_registry.h" - -namespace ge { -namespace ge_local { -REGISTER_OPS_KERNEL_BUILDER(kGeLocalOpKernelLibName, GeLocalOpsKernelBuilder); - -namespace { -const char *const kConstantOpType = "Constant"; -const char *const kConstantOpAttrName = "value"; -const char *const kDataOpType = "Data"; -} // namespace - -GeLocalOpsKernelBuilder::~GeLocalOpsKernelBuilder() { - GELOGI("GeLocalOpsKernelBuilder destroyed"); -} - -Status GeLocalOpsKernelBuilder::Initialize(const map &options) { - return SUCCESS; -} - -Status GeLocalOpsKernelBuilder::Finalize() { - return SUCCESS; -} - -Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { - GELOGD("[%s] CalcOpRunningParam In.", ge_node.GetName().c_str()); - OpDescPtr op_desc = ge_node.GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); - return FAILED; - } - - bool is_shape_unknown = false; - if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { - if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); - return SUCCESS; - } - } - - const string node_name = ge_node.GetName(); - const string node_type = ge_node.GetType(); - size_t output_size = op_desc->GetOutputsSize(); - GELOGD("Calc op[%s:%s] running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size); - - for (size_t i = 0; i < output_size; ++i) { - GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); - Format format = output_tensor.GetFormat(); - DataType data_type = output_tensor.GetDataType(); - - int64_t mem_size = 0; - graphStatus graph_status = TensorUtils::GetSize(output_tensor, mem_size); - // If mem size has been set, no need reset. - if ((graph_status == GRAPH_SUCCESS) && (mem_size > 0) && (data_type != DT_STRING)) { - GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", - node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); - continue; - } - - int64_t output_mem_size = 0; - GeShape output_shape = output_tensor.GetShape(); - if ((node_type == kConstantOpType) && (data_type == DT_STRING)) { - graph_status = CalcConstantStrMemSize(op_desc, output_mem_size); - } else if (node_type == kDataOpType) { - int64_t o_size = 0; - graph_status = TensorUtils::GetTensorMemorySizeInBytes(output_tensor, o_size); - output_mem_size = o_size; - } else { - graph_status = TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size); - } - - if (graph_status != GRAPH_SUCCESS) { - GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, format=%s, data_type=%s, error=%u.", node_name.c_str(), - node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status); - return FAILED; - } - - if (output_mem_size < 0) { - GELOGE(FAILED, - "Calc op[%s:%s] out[%zu] mem size is negative(not support)," - " format=%s, data_type=%s, mem_size=%ld.", - node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size); - return FAILED; - } - GELOGI( - "Calc op[%s:%s] out[%zu] mem size is %ld," - " format=%s, data_type=%s.", - node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); - - TensorUtils::SetSize(output_tensor, output_mem_size); - - graph_status = op_desc->UpdateOutputDesc(static_cast(i), output_tensor); - if (graph_status != GRAPH_SUCCESS) { - GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s, error=%u.", node_name.c_str(), - node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status); - return FAILED; - } - } - GELOGD("Calc op[%s:%s] running param success.", node_name.c_str(), node_type.c_str()); - return SUCCESS; -} - -Status GeLocalOpsKernelBuilder::CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size) { - if (op_desc == nullptr) { - GELOGE(FAILED, "CalcConstantStrMemSize failed, as op desc is null"); - return FAILED; - } - ConstGeTensorPtr value = MakeShared(); - if (value == nullptr) { - GELOGE(FAILED, "make shared ConstGeTensor exception."); - return FAILED; - } - // Constant op attr name is "value" - if (!AttrUtils::GetTensor(op_desc, kConstantOpAttrName, value)) { - GELOGE(FAILED, "Get Constant op attr value failed"); - return FAILED; - } - mem_size = static_cast(value->GetData().size()); - return SUCCESS; -} - -Status GeLocalOpsKernelBuilder::GenerateTask(const Node &node, RunContext &context, std::vector &tasks) { - bool is_shape_unknown = false; - if (NodeUtils::GetNodeUnknownShapeStatus(node, is_shape_unknown) == GRAPH_SUCCESS) { - if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, does not need to generate task", - node.GetName().c_str()); - return SUCCESS; - } - } - string name = node.GetName(); - string type = node.GetType(); - GELOGD("Ge local generate task for node:%s(%s) begin, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); - - auto op = OpFactory::Instance().CreateOp(node, context); - if (op == nullptr) { - GELOGE(FAILED, "CreateOp for node:%s(%s) failed.", name.c_str(), type.c_str()); - return FAILED; - } - - Status ret = op->Run(); - if (ret != SUCCESS) { - GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str()); - return ret; - } - GELOGI("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); - return ret; -} -} // namespace ge_local -} // namespace ge diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h deleted file mode 100644 index 8a7dafe2..00000000 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.h +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_UTILS_GE_LOCAL_OPS_KERNEL_UTILS_H_ -#define GE_GE_LOCAL_ENGINE_OPS_KERNEL_UTILS_GE_LOCAL_OPS_KERNEL_UTILS_H_ - -#include "external/ge/ge_api_error_codes.h" -#include "common/opskernel/ops_kernel_builder.h" - -namespace ge { -namespace ge_local { -class GeLocalOpsKernelBuilder : public OpsKernelBuilder { - public: - ~GeLocalOpsKernelBuilder() override; - Status Initialize(const map &options) override; - - Status Finalize() override; - - Status CalcOpRunningParam(Node &node) override; - - Status GenerateTask(const Node &node, RunContext &context, std::vector &tasks) override; - - private: - /** - * Calc memSize for constant which type is DT_STRING. - * @param op_desc OpDesc information - * @param mem_size output size - * @return whether this operation success - */ - Status CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size); -}; -} // namespace ge_local -} // namespace ge - -#endif // GE_GE_LOCAL_ENGINE_OPS_KERNEL_UTILS_GE_LOCAL_OPS_KERNEL_UTILS_H_ diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc index 504c3f2f..adf936c0 100755 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,11 @@ #include "op/op_factory.h" #include "proto/task.pb.h" +namespace { +const char *const kConstantOpType = "Constant"; +const char *const kConstantOpAttrName = "value"; +const char *const kDataOpType = "Data"; +} // namespace namespace ge { namespace ge_local { using domi::TaskDef; @@ -58,8 +63,136 @@ Status GeLocalOpsKernelInfoStore::Finalize() { return SUCCESS; } +Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { + OpDescPtr op_desc = ge_node.GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); + return FAILED; + } + + bool is_shape_unknown = false; + if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { + if (is_shape_unknown) { + GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); + return SUCCESS; + } + } + + const string node_name = ge_node.GetName(); + const string node_type = ge_node.GetType(); + size_t output_size = op_desc->GetOutputsSize(); + GELOGD("Calc op[%s:%s] running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size); + + for (size_t i = 0; i < output_size; ++i) { + GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); + Format format = output_tensor.GetFormat(); + DataType data_type = output_tensor.GetDataType(); + + int64_t mem_size = 0; + graphStatus graph_status = TensorUtils::GetSize(output_tensor, mem_size); + // If mem size has been set, no need reset. + if ((graph_status == GRAPH_SUCCESS) && (mem_size > 0) && (data_type != DT_STRING)) { + GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", + node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); + continue; + } + + int64_t output_mem_size = 0; + GeShape output_shape = output_tensor.GetShape(); + if ((node_type == kConstantOpType) && (data_type == DT_STRING)) { + graph_status = CalcConstantStrMemSize(op_desc, output_mem_size); + } else if (node_type == kDataOpType) { + int64_t output_size = 0; + graph_status = TensorUtils::GetTensorMemorySizeInBytes(output_tensor, output_size); + output_mem_size = output_size; + } else { + graph_status = TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size); + } + + if (graph_status != GRAPH_SUCCESS) { + GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, format=%s, data_type=%s, error=%u.", node_name.c_str(), + node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status); + return FAILED; + } + + if (output_mem_size < 0) { + GELOGE(FAILED, + "Calc op[%s:%s] out[%zu] mem size is negative(not support)," + " format=%s, data_type=%s, mem_size=%ld.", + node_name.c_str(), node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size); + return FAILED; + } + GELOGI( + "Calc op[%s:%s] out[%zu] mem size is %ld," + " format=%s, data_type=%s.", + node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + + TensorUtils::SetSize(output_tensor, output_mem_size); + + graph_status = op_desc->UpdateOutputDesc(static_cast(i), output_tensor); + if (graph_status != GRAPH_SUCCESS) { + GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s, error=%u.", node_name.c_str(), + node_type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), graph_status); + return FAILED; + } + } + GELOGD("Calc op[%s:%s] running param success.", node_name.c_str(), node_type.c_str()); + return SUCCESS; +} + +Status GeLocalOpsKernelInfoStore::CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size) { + if (op_desc == nullptr) { + GELOGE(FAILED, "CalcConstantStrMemSize failed, as op desc is null"); + return FAILED; + } + ConstGeTensorPtr value = MakeShared(); + if (value == nullptr) { + GELOGE(FAILED, "make shared ConstGeTensor exception."); + return FAILED; + } + // Constant op attr name is "value" + if (!AttrUtils::GetTensor(op_desc, kConstantOpAttrName, value)) { + GELOGE(FAILED, "Get Constant op attr value failed"); + return FAILED; + } + mem_size = static_cast(value->GetData().size()); + return GRAPH_SUCCESS; +} + void GeLocalOpsKernelInfoStore::GetAllOpsKernelInfo(map &infos) const { infos = op_info_map_; } +Status GeLocalOpsKernelInfoStore::GenerateTask(const Node &node, RunContext &context, vector &tasks) { + bool is_shape_unknown = false; + if (NodeUtils::GetNodeUnknownShapeStatus(node, is_shape_unknown) == GRAPH_SUCCESS) { + if (is_shape_unknown) { + GELOGI("op:%s is unknown shape, does not need to generate task", node.GetName().c_str()); + return SUCCESS; + } + } + string name = node.GetName(); + string type = node.GetType(); + GELOGD("Ge local generate task for node:%s(%s) begin, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); + + auto op = OpFactory::Instance().CreateOp(node, context); + if (op == nullptr) { + GELOGE(FAILED, "CreateOp for node:%s(%s) failed.", name.c_str(), type.c_str()); + return FAILED; + } + + Status ret = op->Run(); + if (ret != SUCCESS) { + GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str()); + return ret; + } + GELOGI("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); + return ret; +} + bool GeLocalOpsKernelInfoStore::CheckSupported(const OpDescPtr &op_desc, std::string &) const { if (op_desc == nullptr) { return false; diff --git a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h index cdfbeffa..ce123751 100755 --- a/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h +++ b/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,17 +59,33 @@ class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { void GetAllOpsKernelInfo(std::map &infos) const override; /** + * Calc the running size of Operator, + * then GE will alloc the mem size from runtime + * @param ge_node Node information + * @return status whether this operation success + */ + Status CalcOpRunningParam(ge::Node &ge_node) override; + + /** + * call the runtime's interface to generate the task + * @param node Node information + * @param context run context info + * @return status whether this operation success + */ + Status GenerateTask(const ge::Node &ge_node, ge::RunContext &context, std::vector &tasks) override; + + /** * Create session * @param session_options Session Options * @return status whether this operation success - */ + */ Status CreateSession(const std::map &session_options) override; /** * Destroy session * @param session_options Session Options * @return status whether this operation success - */ + */ Status DestroySession(const std::map &session_options) override; // Copy prohibited @@ -85,6 +101,13 @@ class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { GeLocalOpsKernelInfoStore &operator=(GeLocalOpsKernelInfoStore &&ops_kernel_store) = delete; private: + /** + * Calc memSize for constant which type is DT_STRING. + * @param op_desc OpDesc information + * @param mem_size output size + * @return whether this operation success + */ + Status CalcConstantStrMemSize(const OpDescPtr &op_desc, int64_t &mem_size); // store op name and OpInfo key-value pair std::map op_info_map_; diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc index b2f3d095..badca5a3 100755 --- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h index 55587b2e..ebaeef2d 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.cc b/ge/ge_local_engine/ops_kernel_store/op/no_op.cc index 51c65ce0..62fe1b5d 100755 --- a/ge/ge_local_engine/ops_kernel_store/op/no_op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/no_op.h b/ge/ge_local_engine/ops_kernel_store/op/no_op.h index 40e5766b..31199b25 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/no_op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/no_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.cc b/ge/ge_local_engine/ops_kernel_store/op/op.cc index 11229b2c..0a5625de 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op.h b/ge/ge_local_engine/ops_kernel_store/op/op.h index c5a3df7a..1b184dad 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +21,10 @@ #include #include #include "common/ge_inner_error_codes.h" +#include "common/opskernel/ops_kernel_info_types.h" #include "graph/node.h" namespace ge { -struct RunContext; namespace ge_local { /** * The base class for all op. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc index c57b4f4d..49fc1084 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc +++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h index 0faab508..6d0c16f4 100644 --- a/ge/ge_local_engine/ops_kernel_store/op/op_factory.h +++ b/ge/ge_local_engine/ops_kernel_store/op/op_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_local_engine/proto/task.proto b/ge/ge_local_engine/proto/task.proto index d0c09840..50ea061b 100644 --- a/ge/ge_local_engine/proto/task.proto +++ b/ge/ge_local_engine/proto/task.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index fa795ced..956bab0b 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -61,7 +61,6 @@ LIBGE_LOCAL_SRC_FILES := \ graph/load/new_model_manager/model_utils.cc \ graph/load/new_model_manager/aipp_utils.cc \ graph/load/new_model_manager/task_info/end_graph_task_info.cc \ - graph/load/new_model_manager/task_info/model_exit_task_info.cc \ graph/load/new_model_manager/task_info/event_record_task_info.cc \ graph/load/new_model_manager/task_info/event_wait_task_info.cc \ graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ @@ -90,9 +89,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/manager/graph_mem_allocator.cc \ graph/manager/graph_caching_allocator.cc \ graph/manager/graph_var_manager.cc \ - graph/manager/host_mem_manager.cc \ graph/manager/rdma_pool_allocator.cc \ - graph/manager/memory_api.cc \ graph/manager/model_manager/event_manager.cc \ graph/manager/trans_var_data_utils.cc \ graph/manager/util/debug.cc \ @@ -111,9 +108,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ graph/passes/mark_graph_unknown_status_pass.cc \ - graph/passes/mark_agnostic_pass.cc \ graph/partition/dynamic_shape_partition.cc \ - graph/partition/stage_partition.cc \ graph/passes/base_pass.cc \ graph/passes/bitcast_pass.cc \ graph/passes/cast_remove_pass.cc \ @@ -184,7 +179,6 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/multi_batch_pass.cc \ graph/passes/multi_batch_clone_pass.cc \ graph/passes/subexpression_migration_pass.cc \ - graph/passes/subgraph_const_migration_pass.cc \ graph/passes/unused_args_clean_pass.cc \ graph/passes/net_output_pass.cc \ graph/passes/next_iteration_pass.cc \ @@ -246,7 +240,6 @@ LIBGE_LOCAL_SRC_FILES := \ model/ge_root_model.cc \ omm/csa_interact.cc \ opskernel_manager/ops_kernel_manager.cc \ - opskernel_manager/ops_kernel_builder_manager.cc \ session/inner_session.cc \ session/session_manager.cc \ single_op/single_op.cc \ @@ -320,7 +313,6 @@ RUNNER_LOCAL_C_INCLUDES := \ $(TOPDIR)libc_sec/include \ $(TOPDIR)ops/built-in/op_proto/inc \ $(TOPDIR)framework/domi/analyzer \ - $(TOPDIR)graphengine/ge/analyzer \ $(TOPDIR)toolchain/ide/ide-daemon/external \ proto/fwk_adapter.proto \ proto/ge_ir.proto \ @@ -377,6 +369,7 @@ LOCAL_SHARED_LIBRARIES := \ libmsprof \ liberror_manager \ + LOCAL_LDFLAGS := -lrt -ldl LOCAL_SHARED_LIBRARIES += \ @@ -402,6 +395,8 @@ LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := ../../out/ge/lib64/stub/ge_api.cc \ ../../out/ge/lib64/stub/ge_prof.cc \ + + LOCAL_SHARED_LIBRARIES := LOCAL_LDFLAGS := -lrt -ldl diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index f46e2cdc..b4c7fe9e 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -1,18 +1,23 @@ +# Copyright 2019-2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ ############ libge_runtime.so ############ set(GE_SRC_LIST "model_runner.cc" "runtime_model.cc" "output.cc" - "task/aicpu_task.cc" - "task/cce_task.cc" - "task/tbe_task.cc" - "task/event_record_task.cc" - "task/event_wait_task.cc" - "task/stream_active_task.cc" - "task/stream_switch_task.cc" - "task/hccl_task.cc" - "task/memcpy_async_task.cc" - "task/profiler_task.cc" + "task/*.cc" ) add_library(ge_runtime SHARED ${GE_SRC_LIST}) @@ -22,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE -O2 ) -target_compile_definitions(ge_runtime PRIVATE +target_compile_definitions(ge_runtime PUBLIC PROTOBUF_INLINE_NOT_IN_HEADERS=0 ) @@ -42,6 +47,7 @@ target_include_directories(ge_runtime PRIVATE target_link_libraries(ge_runtime PRIVATE $ -Wl,--no-as-needed + graph slog runtime c_sec diff --git a/ge/ge_runtime/model_context.h b/ge/ge_runtime/model_context.h index 8860f0da..259ff91f 100755 --- a/ge/ge_runtime/model_context.h +++ b/ge/ge_runtime/model_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,13 @@ class ModelContext { ModelContext(uint32_t device_id, uint64_t session_id, int32_t priority, rtModel_t rt_model_handle, rtStream_t rt_model_stream, const std::vector &stream_list, const std::vector &label_list, const std::vector &event_list) - : device_id_(device_id), session_id_(session_id), priority_(priority), rt_model_handle_(rt_model_handle), - rt_model_stream_(rt_model_stream), stream_list_(stream_list), label_list_(label_list), + : device_id_(device_id), + session_id_(session_id), + priority_(priority), + rt_model_handle_(rt_model_handle), + rt_model_stream_(rt_model_stream), + stream_list_(stream_list), + label_list_(label_list), event_list_(event_list) {} ~ModelContext() {} diff --git a/ge/ge_runtime/model_runner.cc b/ge/ge_runtime/model_runner.cc index 2c2efde4..9961ab4e 100644 --- a/ge/ge_runtime/model_runner.cc +++ b/ge/ge_runtime/model_runner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ namespace ge { namespace model_runner { + using RuntimeModelPtr = std::shared_ptr; using DavinciModelPtr = std::shared_ptr; diff --git a/ge/ge_runtime/output.cc b/ge/ge_runtime/output.cc index eec8d170..5153f688 100644 --- a/ge/ge_runtime/output.cc +++ b/ge/ge_runtime/output.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ bool Output::CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_inde DataBuffer data_buf = rslt->blobs[data_begin + data_count]; bool ret = SetDataBuf(data_buf, data_begin, data_count, i, support_mem_share); if (!ret) { - GELOGE(FAILED, "Copy data to host failed. index: %lu, addr: %p", i, v_input_data_addr_[i]); + GELOGE(FAILED, "Copy data to host error. index: %lu, addr: %p", i, v_input_data_addr_[i]); return ret; } data_index = data_begin + data_count; @@ -89,5 +89,6 @@ bool Output::SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &dat bool support_mem_share) { return true; } + } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/output.h b/ge/ge_runtime/output.h index 13ea956d..1f7f91ee 100755 --- a/ge/ge_runtime/output.h +++ b/ge/ge_runtime/output.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ namespace ge { namespace model_runner { + class Output { public: Output(const OpInfoPtr &op_info, const std::shared_ptr &model); @@ -32,8 +33,7 @@ class Output { bool CopyRslt(OutputData *rslt, uint32_t data_begin, uint32_t &data_index, bool support_mem_share); - bool SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, - bool support_mem_share); + bool SetDataBuf(DataBuffer &data_buf, uint32_t data_begin, uint32_t &data_count, size_t i, bool support_mem_share); // Copy assignment operator and copy constructor are deleted Output &operator=(const Output &output) = delete; diff --git a/ge/ge_runtime/proto/task.pb.h b/ge/ge_runtime/proto/task.pb.h deleted file mode 100644 index 490289ac..00000000 --- a/ge/ge_runtime/proto/task.pb.h +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Generated by the protocol buffer compiler. DO NOT EDIT! -// source: task.proto - -#ifndef STUB_TASK_PROTO_H -#define STUB_TASK_PROTO_H - -namespace domi { -class TaskDef; -} - -#endif // STUB_TASK_PROTO_H diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc index 9fc708c7..9f549313 100644 --- a/ge/ge_runtime/runtime_model.cc +++ b/ge/ge_runtime/runtime_model.cc @@ -74,8 +74,8 @@ bool RuntimeModel::InitStream(std::shared_ptr &davinci_model) { for (uint32_t i = 0; i < davinci_model->GetStreamNum(); ++i) { rtStream_t stream = nullptr; uint32_t flag = (force_copy_streams.find(i) != force_copy_streams.end()) - ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) - : (RT_STREAM_PERSISTENT); + ? (RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY) + : (RT_STREAM_PERSISTENT); rtError_t rt_ret = rtStreamCreateWithFlags(&stream, davinci_model->GetPriority(), flag); if (rt_ret != RT_ERROR_NONE) { @@ -115,23 +115,34 @@ bool RuntimeModel::InitEvent(uint32_t event_num) { return true; } -bool RuntimeModel::InitLabel(uint32_t batch_num) { - GELOGI("batch number:%u.", batch_num); - for (uint32_t i = 0; (batch_num != 0 && i <= batch_num); ++i) { - rtLabel_t rt_lLabel = nullptr; - rtError_t rt_ret = rtLabelCreate(&rt_lLabel); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, i; %u; ret: 0x%X", i, rt_ret); - return false; +bool RuntimeModel::InitLabel(std::shared_ptr &davinci_model) { + GELOGI("batch number:%u.", davinci_model->GetBatchNum()); + label_list_.resize(davinci_model->GetBatchNum()); + for (auto &task_info : davinci_model->GetTaskInfoList()) { + if (task_info == nullptr) { + GELOGE(PARAM_INVALID, "task_info is null."); + continue; } - if (rt_lLabel == nullptr) { - GELOGE(RT_FAILED, "rtLabel is nullptr!"); + if (task_info->type() != TaskInfoType::LABEL_SET) { + continue; + } + auto label_set_task_info = std::static_pointer_cast(task_info); + + if (label_set_task_info->stream_id() >= stream_list_.size()) { + GELOGE(PARAM_INVALID, "Invalid stream id."); return false; } - label_list_.emplace_back(rt_lLabel); + rtLabel_t rt_label = nullptr; + rtError_t rt_ret = rtLabelCreateEx(&rt_label, stream_list_[label_set_task_info->stream_id()]); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api rtLabelCreate failed, ret: 0x%X", rt_ret); + return false; + } + label_list_[label_set_task_info->label_id()] = rt_label; } + return true; } @@ -163,7 +174,7 @@ bool RuntimeModel::InitResource(std::shared_ptr &davinci_model) { return false; } - if (!InitLabel(davinci_model->GetBatchNum())) { + if (!InitLabel(davinci_model)) { return false; } @@ -281,7 +292,6 @@ bool RuntimeModel::DistributeTask() { GELOGE(FAILED, "DistributeTask failed"); return false; } - return true; } @@ -293,10 +303,14 @@ bool RuntimeModel::Run() { return false; } - GELOGI("Run rtModelExecute success"); + GELOGI("Run rtModelExecute success, ret = 0x%X", ret); ret = rtStreamSynchronize(rt_model_stream_); if (ret != RT_ERROR_NONE) { + if (ret == RT_ERROR_END_OF_SEQUENCE) { + GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret); + return true; + } GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret); return false; } @@ -459,7 +473,7 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model } if (constant->output_tensors[0].size < constant->weight_data.size()) { - GELOGE(PARAM_INVALID, "Output size:%u is less than weight data size:%zu", constant->output_tensors[0].size, + GELOGE(PARAM_INVALID, "Output size:%u less than weight data size:%zu", constant->output_tensors[0].size, constant->weight_data.size()); return false; } @@ -474,11 +488,8 @@ bool RuntimeModel::InitConstantInfo(std::shared_ptr &davinci_model /// The logic of GetShapeSize is wrong, the scaler tensor's GetShapeSize is zero /// and that of unknown shape is zero too. /// Unknown shape will not appear here, so we can use zero judge a tensor is scaler or not. - int64_t elem_num = constant->weight_tensors[0].GetShapeSize(); - if (elem_num == 0 && constant->weight_tensors[0].size == 0) { - elem_num = 1; - } - + int64_t elem_num = + (constant->weight_tensors[0].GetShapeSize() == 0) ? 1 : constant->weight_tensors[0].GetShapeSize(); if (constant->weight_data.size() < sizeof(uint64_t)) { GELOGE(FAILED, "weight_data size is smaller than sizeof(uint64_t)"); return false; diff --git a/ge/ge_runtime/runtime_model.h b/ge/ge_runtime/runtime_model.h index 6109915f..d0c466d4 100644 --- a/ge/ge_runtime/runtime_model.h +++ b/ge/ge_runtime/runtime_model.h @@ -40,13 +40,11 @@ class RuntimeModel { const std::vector &GetTaskIdList() const; const std::vector &GetStreamIdList() const; const std::map> &GetRuntimeInfoMap() const { return runtime_info_map_; } - const rtModel_t GetModelHandle() const { return rt_model_handle_; } + rtModel_t GetModelHandle() const { return rt_model_handle_; } bool Run(); bool CopyInputData(const InputData &input_data); - bool GetInputOutputDescInfo(bool zero_copy, - std::vector *input_desc, - std::vector *output_desc, - std::vector *input_format, + bool GetInputOutputDescInfo(bool zero_copy, std::vector *input_desc, + std::vector *output_desc, std::vector *input_format, std::vector *output_format); private: @@ -55,7 +53,7 @@ class RuntimeModel { bool LoadTask(); bool InitStream(std::shared_ptr &davinci_model); bool InitEvent(uint32_t event_num); - bool InitLabel(uint32_t batch_num); + bool InitLabel(std::shared_ptr &davinci_model); bool InitDataInfo(std::shared_ptr &davinci_model); bool InitOutputInfo(std::shared_ptr &davinci_model); bool InitConstantInfo(std::shared_ptr &davinci_model); @@ -87,6 +85,7 @@ class RuntimeModel { std::vector stream_id_list_{}; std::map> runtime_info_map_; }; + } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/aicpu_task.cc b/ge/ge_runtime/task/aicpu_task.cc index 61ef7a3c..5b3d8e82 100755 --- a/ge/ge_runtime/task/aicpu_task.cc +++ b/ge/ge_runtime/task/aicpu_task.cc @@ -26,6 +26,7 @@ AicpuTask::AicpuTask(const ModelContext &model_context, const std::shared_ptr(io_addrs.size()); auto io_addrs_size = static_cast(io_addrs_num * sizeof(void *)); constexpr uint32_t io_addr_offset = sizeof(aicpu::AicpuParamHead); - uint32_t node_def_addr_offset = io_addr_offset + io_addrs_size; - uint32_t args_size = - sizeof(aicpu::AicpuParamHead) + io_addrs_size + static_cast(task_info_->node_def().size()); - aicpu::AicpuParamHead aicpu_param_head = {args_size, io_addrs_num}; + uint32_t node_def_len_offset = io_addr_offset + io_addrs_size; + uint32_t node_def_addr_offset = node_def_len_offset + sizeof(uint32_t); + uint32_t args_size = sizeof(aicpu::AicpuParamHead) + io_addrs_size + + static_cast(task_info_->node_def().size()) + sizeof(uint32_t); + + aicpu::AicpuParamHead aicpu_param_head; + aicpu_param_head.length = args_size; + aicpu_param_head.ioAddrNum = io_addrs_num; + auto ext_info = task_info_->ext_info(); + uint32_t ext_size = ext_info.size(); + if (ext_info.empty()) { + aicpu_param_head.extInfoLength = 0; + aicpu_param_head.extInfoAddr = 0; + } else { + rtError_t flag = rtMalloc(&ext_info_, ext_size, RT_MEMORY_HBM); + if (flag != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X.", flag); + return false; + } + + flag = rtMemcpy(ext_info_, ext_size, const_cast(reinterpret_cast(ext_info.data())), ext_size, + RT_MEMCPY_HOST_TO_DEVICE); + if (flag != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMemCpy) failed, ret: 0x%X.", flag); + return false; + } + + GELOGI("ext info size:", ext_size); + aicpu_param_head.extInfoLength = ext_size; + aicpu_param_head.extInfoAddr = reinterpret_cast(ext_info_); + } // Malloc device memory for args rtError_t rt_ret = rtMalloc(&args_, args_size, RT_MEMORY_HBM); @@ -80,6 +111,17 @@ bool AicpuTask::Distribute() { return false; } } + + // Memcpy node def + auto size = task_info_->node_def().size(); + rt_ret = + rtMemcpy(reinterpret_cast(reinterpret_cast(args_) + node_def_len_offset), sizeof(uint32_t), + reinterpret_cast(&size), sizeof(uint32_t), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X.", rt_ret); + return false; + } + // Memcpy node def rt_ret = rtMemcpy(reinterpret_cast(reinterpret_cast(args_) + node_def_addr_offset), task_info_->node_def().size(), reinterpret_cast(task_info_->node_def().data()), diff --git a/ge/ge_runtime/task/aicpu_task.h b/ge/ge_runtime/task/aicpu_task.h index cc21af8a..2d3c5040 100755 --- a/ge/ge_runtime/task/aicpu_task.h +++ b/ge/ge_runtime/task/aicpu_task.h @@ -41,6 +41,7 @@ class AicpuTask : public TaskRepeater { std::shared_ptr task_info_; void *stream_; void *args_; + void *ext_info_; void *input_output_addr_; }; } // namespace model_runner diff --git a/ge/ge_runtime/task/cce_task.cc b/ge/ge_runtime/task/cce_task.cc index 1c1807b5..04fd5610 100755 --- a/ge/ge_runtime/task/cce_task.cc +++ b/ge/ge_runtime/task/cce_task.cc @@ -103,9 +103,9 @@ bool CceTask::Distribute() { // Modify flowtable addr in args auto args = const_cast(task_info_->args().data()); auto task_offset = reinterpret_cast(const_cast(task_info_->args_offset().data())); + if (task_info_->args().size() < (task_offset[0] + sizeof(uint64_t))) { - GELOGE(FAILED, - "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", + GELOGE(FAILED, "(context.args_offset().data()))[0]:%u + sizeof(uint64_t):%zu > kernelDef.args().size():%zu", static_cast(task_offset[0]), sizeof(uint64_t), task_info_->args().size()); return false; } @@ -136,8 +136,7 @@ bool CceTask::Distribute() { return false; } - rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), - task_info_->sm_desc().data(), + rt_ret = rtMemcpy(sm_desc_, task_info_->sm_desc().size(), task_info_->sm_desc().data(), task_info_->sm_desc().size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); @@ -146,12 +145,8 @@ bool CceTask::Distribute() { } // Kernel launch - rt_ret = rtKernelLaunch(stub_func_, - task_info_->block_dim(), - args_, - task_info_->args_size(), - static_cast(sm_desc_), - stream_); + rt_ret = rtKernelLaunch(stub_func_, task_info_->block_dim(), args_, task_info_->args_size(), + static_cast(sm_desc_), stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return false; diff --git a/ge/ge_runtime/task/event_record_task.h b/ge/ge_runtime/task/event_record_task.h index b9ae5dba..7c1d4f80 100755 --- a/ge/ge_runtime/task/event_record_task.h +++ b/ge/ge_runtime/task/event_record_task.h @@ -33,7 +33,7 @@ class EventRecordTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/event_wait_task.cc b/ge/ge_runtime/task/event_wait_task.cc index 5f1ffaad..558c2a59 100644 --- a/ge/ge_runtime/task/event_wait_task.cc +++ b/ge/ge_runtime/task/event_wait_task.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/ge_runtime/task/event_wait_task.h b/ge/ge_runtime/task/event_wait_task.h index 685be897..9104bbf8 100755 --- a/ge/ge_runtime/task/event_wait_task.h +++ b/ge/ge_runtime/task/event_wait_task.h @@ -33,7 +33,7 @@ class EventWaitTask : public TaskRepeater { private: std::shared_ptr task_info_; rtStream_t stream_; - rtEvent_t event_; + rtEvent_t event_; }; } // namespace model_runner } // namespace ge diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc index 771341c1..3d5f8504 100644 --- a/ge/ge_runtime/task/hccl_task.cc +++ b/ge/ge_runtime/task/hccl_task.cc @@ -115,7 +115,6 @@ bool HcclTask::Distribute() { rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); - (void)rtStreamDestroy(stream); return false; } @@ -129,8 +128,6 @@ bool HcclTask::Distribute() { ge_task.type = static_cast(RT_MODEL_TASK_HCCL); ge_task.stream = stream_; - GETaskKernelHcclInfo kernel_hccl_info; - ge_task.kernelHcclInfo.emplace_back(kernel_hccl_info); ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type(); ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr(); ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr(); diff --git a/ge/ge_runtime/task/label_goto_task.cc b/ge/ge_runtime/task/label_goto_task.cc new file mode 100644 index 00000000..d357accb --- /dev/null +++ b/ge/ge_runtime/task/label_goto_task.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge_runtime/task/label_goto_task.h" +#include "ge_runtime/task/task_factory.h" + +namespace ge { +namespace model_runner { +LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info) + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + label_(nullptr) { + if (task_info_ == nullptr) { + GELOGW("task_info_ is null!"); + return; + } + auto stream_list = model_context.stream_list(); + auto label_list = model_context.label_list(); + uint32_t stream_id = task_info->stream_id(); + uint32_t label_id = task_info->label_id(); + GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); + GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); + if (stream_id >= stream_list.size() || label_id >= label_list.size()) { + GELOGW("Stream/Label id invalid."); + return; + } + stream_ = stream_list[stream_id]; + label_ = label_list[label_id]; +} + +LabelGotoTask::~LabelGotoTask() {} + +bool LabelGotoTask::Distribute() { + GELOGI("LabelGotoTask Distribute start."); + if (stream_ == nullptr) { + GELOGE(PARAM_INVALID, "stream is null!"); + return false; + } + if (label_ == nullptr) { + GELOGE(PARAM_INVALID, "label is null!"); + return false; + } + rtError_t rt_ret = rtLabelGotoEx(label_, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + GELOGI("DistributeTask end."); + return true; +} + +REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo); + +} // namespace model_runner +} // namespace ge diff --git a/ge/ge_runtime/task/label_goto_task.h b/ge/ge_runtime/task/label_goto_task.h new file mode 100644 index 00000000..4fd6d1bc --- /dev/null +++ b/ge/ge_runtime/task/label_goto_task.h @@ -0,0 +1,41 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ +#define GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ + +#include +#include "ge_runtime/task/task.h" + +namespace ge { +namespace model_runner { +class LabelGotoTask : public TaskRepeater { + public: + LabelGotoTask(const ModelContext &model_context, const std::shared_ptr &task_info); + + ~LabelGotoTask() override; + + bool Distribute() override; + + private: + std::shared_ptr task_info_; + void *stream_; + void *label_; +}; +} // namespace model_runner +} // namespace ge + +#endif // GE_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_ diff --git a/ge/ge_runtime/task/label_set_task.cc b/ge/ge_runtime/task/label_set_task.cc new file mode 100644 index 00000000..3ab5802c --- /dev/null +++ b/ge/ge_runtime/task/label_set_task.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge_runtime/task/label_set_task.h" +#include "ge_runtime/task/task_factory.h" + +namespace ge { +namespace model_runner { +LabelSetTask::LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info) + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + label_(nullptr) { + if (task_info_ == nullptr) { + GELOGW("task_info_ is null!"); + return; + } + auto stream_list = model_context.stream_list(); + auto label_list = model_context.label_list(); + uint32_t stream_id = task_info->stream_id(); + uint32_t label_id = task_info->label_id(); + GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); + GELOGI("Label list size:%zu, label id:%u.", label_list.size(), label_id); + if (stream_id >= stream_list.size() || label_id >= label_list.size()) { + GELOGW("Stream/Label id invalid."); + return; + } + stream_ = stream_list[stream_id]; + label_ = label_list[label_id]; +} + +LabelSetTask::~LabelSetTask() {} + +bool LabelSetTask::Distribute() { + GELOGI("LabelSetTask Distribute start."); + if (stream_ == nullptr) { + GELOGE(PARAM_INVALID, "stream is null!"); + return false; + } + if (label_ == nullptr) { + GELOGE(PARAM_INVALID, "label is null!"); + return false; + } + rtError_t rt_ret = rtLabelSet(label_, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + GELOGI("DistributeTask end."); + return true; +} + +REGISTER_TASK(TaskInfoType::LABEL_SET, LabelSetTask, LabelSetTaskInfo); + +} // namespace model_runner +} // namespace ge diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h b/ge/ge_runtime/task/label_set_task.h similarity index 54% rename from ge/graph/load/new_model_manager/task_info/model_exit_task_info.h rename to ge/ge_runtime/task/label_set_task.h index c219fcc8..70bf1584 100644 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.h +++ b/ge/ge_runtime/task/label_set_task.h @@ -14,24 +14,28 @@ * limitations under the License. */ -#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ -#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ +#ifndef GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ +#define GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ -#include "graph/load/new_model_manager/task_info/task_info.h" +#include +#include "ge_runtime/task/task.h" namespace ge { -class ModelExitTaskInfo : public TaskInfo { +namespace model_runner { +class LabelSetTask : public TaskRepeater { public: - ModelExitTaskInfo() {} + LabelSetTask(const ModelContext &model_context, const std::shared_ptr &task_info); - ~ModelExitTaskInfo() override { model_ = nullptr; } + ~LabelSetTask() override; - Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; - - Status Distribute() override; + bool Distribute() override; private: - rtModel_t model_{nullptr}; + std::shared_ptr task_info_; + void *stream_; + void *label_; }; +} // namespace model_runner } // namespace ge -#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ + +#endif // GE_GE_RUNTIME_TASK_LABEL_SET_TASK_H_ diff --git a/ge/ge_runtime/task/label_switch_task.cc b/ge/ge_runtime/task/label_switch_task.cc new file mode 100644 index 00000000..a3c2d41a --- /dev/null +++ b/ge/ge_runtime/task/label_switch_task.cc @@ -0,0 +1,131 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ge_runtime/task/label_switch_task.h" +#include "ge_runtime/task/task_factory.h" + +namespace ge { +namespace model_runner { +LabelSwitchTask::LabelSwitchTask(const ModelContext &model_context, + const std::shared_ptr &task_info) + : TaskRepeater(model_context, task_info), + task_info_(task_info), + stream_(nullptr), + all_label_resource_(), + label_info_(nullptr) { + if (task_info_ == nullptr) { + GELOGW("task_info_ is null!"); + return; + } + + all_label_resource_ = model_context.label_list(); + auto stream_list = model_context.stream_list(); + uint32_t stream_id = task_info->stream_id(); + GELOGI("Stream list size:%zu, stream id:%u.", stream_list.size(), stream_id); + if (stream_id >= stream_list.size()) { + GELOGW("Stream id invalid."); + return; + } + stream_ = stream_list[stream_id]; +} + +LabelSwitchTask::~LabelSwitchTask() { + if (label_info_ != nullptr) { + rtError_t rt_ret = rtFree(label_info_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtFree fwkOpBuf failed! ret: 0x%X.", rt_ret); + } + label_info_ = nullptr; + } +} + +bool LabelSwitchTask::Distribute() { + GELOGI("LabelSwitchTask Distribute start."); + if (!CheckParamValid()) { + return false; + } + + const std::vector &label_index_list = task_info_->label_list(); + std::vector label_list(task_info_->label_size(), nullptr); + + for (size_t i = 0; i < task_info_->label_size(); ++i) { + uint32_t label_index = label_index_list[i]; + if (label_index >= all_label_resource_.size()) { + GELOGE(PARAM_INVALID, "label %zu index is %u, but there are %zu labels in total.", i, label_index, + all_label_resource_.size()); + return false; + } + label_list[i] = all_label_resource_[label_index]; + GELOGI("Case %zu: label id %zu.", i, label_index); + } + + uint32_t label_info_size = sizeof(rtLabelDevInfo) * task_info_->label_size(); + rtError_t rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + rt_ret = rtLabelSwitchByIndex(task_info_->cond(), label_list.size(), label_info_, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return false; + } + + GELOGI("DistributeTask end."); + return true; +} + +bool LabelSwitchTask::CheckParamValid() { + if (stream_ == nullptr) { + GELOGE(PARAM_INVALID, "stream is null!"); + return false; + } + + if (task_info_->label_list().empty()) { + GELOGE(PARAM_INVALID, "label_list is empty."); + return false; + } + + if (task_info_->label_size() != task_info_->label_list().size()) { + GELOGE(PARAM_INVALID, "label_list size %zu but label_size is %u.", task_info_->label_list().size(), + task_info_->label_size()); + return false; + } + + if (task_info_->label_size() >= UINT32_MAX / sizeof(rtLabelDevInfo)) { + GELOGE(PARAM_INVALID, "label_size %u will overflow.", task_info_->label_size()); + return false; + } + + if (label_info_ != nullptr) { + GELOGE(PARAM_INVALID, "label_info_ has dirty data."); + return false; + } + + return true; +} + +REGISTER_TASK(TaskInfoType::LABEL_SWITCH, LabelSwitchTask, LabelSwitchTaskInfo); + +} // namespace model_runner +} // namespace ge diff --git a/ge/ge_runtime/task/label_switch_task.h b/ge/ge_runtime/task/label_switch_task.h new file mode 100644 index 00000000..463faa31 --- /dev/null +++ b/ge/ge_runtime/task/label_switch_task.h @@ -0,0 +1,44 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ +#define GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ + +#include +#include "ge_runtime/task/task.h" + +namespace ge { +namespace model_runner { +class LabelSwitchTask : public TaskRepeater { + public: + LabelSwitchTask(const ModelContext &model_context, const std::shared_ptr &task_info); + + ~LabelSwitchTask() override; + + bool Distribute() override; + + private: + bool CheckParamValid(); + + std::shared_ptr task_info_; + void *stream_; + std::vector all_label_resource_; + void *label_info_; +}; +} // namespace model_runner +} // namespace ge + +#endif // GE_GE_RUNTIME_TASK_LABEL_SWITCH_TASK_H_ diff --git a/ge/ge_runtime/task/stream_switch_task.cc b/ge/ge_runtime/task/stream_switch_task.cc index 91141139..2adcb4bd 100644 --- a/ge/ge_runtime/task/stream_switch_task.cc +++ b/ge/ge_runtime/task/stream_switch_task.cc @@ -51,7 +51,7 @@ bool StreamSwitchTask::Distribute() { } if (static_cast(task_info_->true_stream_id()) >= stream_list_.size()) { - GELOGE(PARAM_INVALID, "true_stream_id %ld must be less than stream_list_ size %zu!", task_info_->true_stream_id(), + GELOGE(PARAM_INVALID, "true_stream_id %ld must less than stream_list_ size %zu!", task_info_->true_stream_id(), stream_list_.size()); return false; } diff --git a/ge/ge_runtime/task/stream_switch_task.h b/ge/ge_runtime/task/stream_switch_task.h index 2caad200..81c12507 100755 --- a/ge/ge_runtime/task/stream_switch_task.h +++ b/ge/ge_runtime/task/stream_switch_task.h @@ -37,6 +37,7 @@ class StreamSwitchTask : public TaskRepeater { void *stream_; std::vector stream_list_; }; + } // namespace model_runner } // namespace ge #endif // GE_GE_RUNTIME_TASK_STREAM_SWITCH_TASK_H_ diff --git a/ge/ge_runtime/task/task.h b/ge/ge_runtime/task/task.h index b8a937b7..6c4df248 100755 --- a/ge/ge_runtime/task/task.h +++ b/ge/ge_runtime/task/task.h @@ -42,7 +42,7 @@ class Task { template class TaskRepeater : public Task { - static_assert(std::is_base_of(), "Wrong TaskInfo Type!"); /*lint !e30*/ + static_assert(std::is_base_of(), "Wrong TaskInfo Type!"); public: TaskRepeater(const ModelContext &model_context, std::shared_ptr task_info) {} diff --git a/ge/ge_runtime/task/task_factory.h b/ge/ge_runtime/task/task_factory.h index 29da1388..670d1fef 100644 --- a/ge/ge_runtime/task/task_factory.h +++ b/ge/ge_runtime/task/task_factory.h @@ -81,6 +81,7 @@ class TaskFactory { std::shared_ptr concrete_task_info = std::static_pointer_cast(task_info); \ return std::make_shared(model_context, concrete_task_info); \ }); + } // namespace model_runner } // namespace ge #endif // GE_GE_RUNTIME_TASK_TASK_FACTORY_H_ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index f60561c7..bef93333 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ const char *const kAIcoreEngine = "AIcoreEngine"; const char *const kFileNameSuffix = "online"; std::map engine_type_map{ - {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; + {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { @@ -222,7 +222,7 @@ static void GetOpsProtoPath(string &opsproto_path) { class GeGenerator::Impl { public: - Impl(OmgContext &omg_context) : omg_context_(omg_context) {} + Impl(OmgContext &omg_context) : omg_context_(omg_context), graph_manager_(omg_context) {} ~Impl() = default; Status BuildModel(const Graph &graph, const vector &inputs, GeRootModelPtr &ge_models); @@ -251,9 +251,7 @@ class GeGenerator::Impl { bool SetOppVersionInfo(AttrHolder &obj); }; -Status GeGenerator::Initialize(const map &options) { - return Initialize(options, domi::GetContext()); -} +Status GeGenerator::Initialize(const map &options) { return Initialize(options, domi::GetContext()); } Status GeGenerator::Initialize(const map &options, OmgContext &omg_context) { impl_ = ge::MakeShared(omg_context); @@ -491,9 +489,7 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr if ((impl_->build_mode_ == BUILD_MODE_TUNING) && (impl_->build_step_ == BUILD_STEP_BEFORE_UB_MATCH || impl_->build_step_ == BUILD_STEP_AFTER_BUILDER || impl_->build_step_ == BUILD_STEP_AFTER_BUILDER_SUB)) { - GELOGI("Build mode:%s with step:%s no need SaveModel.", - impl_->build_mode_.c_str(), - impl_->build_step_.c_str()); + GELOGI("Build mode:%s with step:%s no need SaveModel.", impl_->build_mode_.c_str(), impl_->build_step_.c_str()); return SUCCESS; } @@ -528,19 +524,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr return SUCCESS; } -namespace { - bool IsNeedConnectInputOpForSingleOp(GeTensorDesc &tensor_desc) { - bool is_need = true; - // format and dtype is all reserved, stand for Optional input. When singleop scene - if (tensor_desc.GetFormat() == FORMAT_RESERVED && tensor_desc.GetDataType() == DT_UNDEFINED) { - is_need = false; - } - return is_need; - } -} - -Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, - const vector &outputs) { +Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, + const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, + bool is_offline) { GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); if (!inputs.empty() && (inputs.size() != op_desc->GetAllInputsSize())) { GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetAllInputsSize()); @@ -550,17 +536,7 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size: %zu", outputs.size(), op_desc->GetOutputsSize()); return PARAM_INVALID; } - return SUCCESS; -} - -Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, - const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, - bool is_offline) { - if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { - GELOGE(PARAM_INVALID, "input param is invalid when build single op!"); - return PARAM_INVALID; - } OmgContext &omg_context = (impl_ == nullptr) ? domi::GetContext() : impl_->omg_context_; omg_context.is_dynamic_input = ContainsDynamicInpus(*op_desc); @@ -595,18 +571,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in if (inputs.empty()) { for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) { GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR); - if (!IsNeedConnectInputOpForSingleOp(*input_desc)) { - continue; - } GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false)); arg_index++; } } else { for (const auto &in_desc : inputs) { GeTensorDesc input_desc = in_desc.GetTensorDesc(); - if (!IsNeedConnectInputOpForSingleOp(input_desc)) { - continue; - } GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); arg_index++; } @@ -709,7 +679,7 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector static std::atomic atomic_graph_id(0); auto graph_id = atomic_graph_id.fetch_add(1); const std::map options; - Status ret = graph_manager_.AddGraph(graph_id, graph, options, omg_context_); + Status ret = graph_manager_.AddGraph(graph_id, graph, options); if (ret != SUCCESS) { GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph fail, graph id: %u", graph_id); (void)graph_manager_.Finalize(); @@ -742,7 +712,7 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph) { static std::atomic atomic_graph_id(0); auto graph_id = atomic_graph_id.fetch_add(1); const std::map options; - Status ret = graph_manager_.AddGraph(graph_id, graph, options, omg_context_); + Status ret = graph_manager_.AddGraph(graph_id, graph, options); if (ret != SUCCESS) { GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, graph id: %u", graph_id); (void)graph_manager_.Finalize(); diff --git a/ge/generator/generator_api.cc b/ge/generator/generator_api.cc index 675b8811..3f92f1a2 100644 --- a/ge/generator/generator_api.cc +++ b/ge/generator/generator_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "generator/generator_api.h" #include "common/ge/ge_util.h" #include "common/util.h" @@ -115,7 +116,7 @@ Status_t OpTaskGernerator(const char *op_type, const OpTensor_t *in_tensor, int CHECK_PARAM_NOT_NULL(om_file); const std::string om_file_name(om_file); - std::string op_name = std::string(op_type) + "_" + std::to_string(ge::GetCurrentTimestamp()); + std::string op_name = std::string(op_type) + "_" + std::to_string(ge::GetCurrentTimestap()); ge::OpDescPtr op_desc = ge::MakeShared(op_name, op_type); if (op_desc == nullptr) { return ge::FAILED; diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 1da84991..27d0b13f 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,77 +17,25 @@ #include "graph/build/graph_builder.h" #include "common/ge/ge_util.h" #include "common/helper/model_helper.h" +#include "common/opskernel/ops_kernel_info_types.h" #include "graph/build/logical_stream_allocator.h" #include "graph/build/run_context.h" #include "graph/build/stream_graph_optimizer.h" -#include "graph/common/ge_call_wrapper.h" -#include "graph/ge_context.h" #include "graph/manager/graph_var_manager.h" #include "graph/passes/mark_same_addr_pass.h" #include "graph/utils/node_utils.h" #include "graph/utils/type_utils.h" +#include "graph/common/ge_call_wrapper.h" #include "init/gelib.h" #include "model/ge_model.h" #include "graph/ge_context.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" using domi::BuildMode; namespace { const int32_t kInvalidPerfLevel = -1; -enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; } // namespace namespace ge { -NodeType TransferNodeType(const NodePtr &node) { - const std::string type = node->GetType(); - if (type == ge::DATA) { - if (node->GetOwnerComputeGraph()->GetParentNode() == nullptr) { - GELOGD("access src data node:%s", node->GetName().c_str()); - return kOthers; - } - GELOGD("access subgraph input node:%s", node->GetName().c_str()); - return kSubgraphData; - } else if (type == PARTITIONEDCALL) { - GELOGD("access subgraph node:%s", node->GetName().c_str()); - return kSubgraphNode; - } - GELOGD("access other node:%s", node->GetName().c_str()); - return kOthers; -} - -Status HandleSubgraphNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) { - auto subgraph = NodeUtils::GetSubgraph(*src_node, 0); - GE_CHECK_NOTNULL(subgraph); - const NodePtr &net_output_node = subgraph->FindFirstNodeMatchType(NETOUTPUT); - GE_CHECK_NOTNULL(net_output_node); - const InDataAnchorPtr &in_data_anchor = net_output_node->GetInDataAnchor(src_out_anchor->GetIdx()); - GE_CHECK_NOTNULL(in_data_anchor); - const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_anchor); - - src_node = peer_out_anchor->GetOwnerNode(); - src_out_anchor = peer_out_anchor; - return SUCCESS; -} - -Status HandleSubgraphDataNode(NodePtr &src_node, OutDataAnchorPtr &src_out_anchor) { - uint32_t index = 0; - if (!AttrUtils::GetInt(src_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, index)) { - GELOGE(FAILED, "Get attr ATTR_NAME_PARENT_NODE_INDEX failed, node:%s.", src_node->GetName().c_str()); - return FAILED; - } - const NodePtr &parent_node = src_node->GetOwnerComputeGraph()->GetParentNode(); - GE_CHECK_NOTNULL(parent_node); - const InDataAnchorPtr &in_data_anchor = parent_node->GetInDataAnchor(index); - GE_CHECK_NOTNULL(in_data_anchor); - const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_anchor); - - src_node = peer_out_anchor->GetOwnerNode(); - src_out_anchor = peer_out_anchor; - return SUCCESS; -} - GraphBuilder::GraphBuilder() : build_mode_(BuildMode::GEN_TASK_WITH_FUSION), hcom_parallel_(false) {} void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { @@ -124,18 +72,23 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { } } - auto ret = SetInputSize(node_ptr); - if (ret != SUCCESS) { - GELOGE(ret, "Set node inputDesc size failed, node name is %s", node_ptr->GetName().c_str()); - return ret; - } - - ret = OpsKernelBuilderManager::Instance().CalcOpRunningParam(*node_ptr); - if (ret != SUCCESS) { - GELOGE(ret, "Calculate op running param failed, node name is %s", node_ptr->GetName().c_str()); - return ret; + OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kernel_lib_name); + if (kernel_info != nullptr) { + auto ret = SetInputSize(node_ptr); + if (ret != SUCCESS) { + GELOGE(ret, "Set node inputDesc size failed, node name is %s", node_ptr->GetName().c_str()); + return ret; + } + ret = kernel_info->CalcOpRunningParam(*node_ptr); + if (ret != SUCCESS) { + GELOGE(ret, "Calculate op running param failed, node name is %s", node_ptr->GetName().c_str()); + return ret; + } + GE_CHK_STATUS_RET(AddOutputMemTypeForNode(node_ptr)); + } else { + GELOGE(GE_GRAPH_PARAM_NULLPTR, "Get op %s ops kernel info store failed", node_ptr->GetName().c_str()); + return INTERNAL_ERROR; } - GE_CHK_STATUS_RET(AddOutputMemTypeForNode(node_ptr)); } auto parent_node = graph->GetParentNode(); @@ -204,8 +157,8 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector(reinterpret_cast(var_manager->GetVarMemMaxSize())); uint8_t *get_weight_mem_base = get_mem_base; if (weight_size > 0) { - get_weight_mem_base = get_mem_base + memory_size + p2p_memory_size; - } - std::map mem_type_to_data_mem_base; - mem_type_to_data_mem_base[RT_MEMORY_HBM] = get_mem_base; - if (p2p_memory_size == 0) { - mem_type_to_data_mem_base[RT_MEMORY_P2P_DDR] = nullptr; - } else { - mem_type_to_data_mem_base[RT_MEMORY_P2P_DDR] = get_mem_base + memory_size; + get_weight_mem_base = get_mem_base + memory_size; } - std::map mem_type_to_data_mem_size; - mem_type_to_data_mem_size[RT_MEMORY_HBM] = memory_size; - mem_type_to_data_mem_size[RT_MEMORY_P2P_DDR] = p2p_memory_size; + RunContextUtil run_context; - Status ret = run_context.InitMemInfo(get_mem_base, memory_size, mem_type_to_data_mem_base, mem_type_to_data_mem_size, - get_weight_mem_base, weight_size); + Status ret = run_context.InitMemInfo(get_mem_base, memory_size, get_weight_mem_base, weight_size); if (ret != SUCCESS) { GELOGE(ret, "task_generator init mem info fail."); return ret; @@ -562,50 +500,22 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vectorGetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - uint32_t mem_type; - if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) { - return SUCCESS; - } - GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type); - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); - bool valid_flag = false; - auto src_node = peer_out_anchor->GetOwnerNode(); - auto src_out_anchor = peer_out_anchor; - while (true) { - const auto &src_desc = src_node->GetOpDesc(); - GE_IF_BOOL_EXEC(src_desc == nullptr, continue); - GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), - mem_type); - if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE, - mem_type)) { - GELOGE(INTERNAL_ERROR, "Set out_memory_type attr for [%s:%d] failed.", src_desc->GetName().c_str(), - src_out_anchor->GetIdx()); + int64_t mem_type; + if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_INPUT_MEMORY_TYPE, mem_type)) { + GELOGD("[%s] has attr input_memory_type %ld", node->GetName().c_str(), mem_type); + for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { + const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + const auto &src_node = peer_out_anchor->GetOwnerNode(); + const auto &src_op = src_node->GetOpDesc(); + GE_IF_BOOL_EXEC(src_op == nullptr, continue); + if (!AttrUtils::SetInt(src_op, ATTR_OUTPUT_MEMORY_TYPE, mem_type)) { + GELOGE(INTERNAL_ERROR, "Set out_memory_type attr failed."); return INTERNAL_ERROR; } - switch (TransferNodeType(src_node)) { - case kSubgraphNode: - GE_CHK_STATUS_RET(HandleSubgraphNode(src_node, src_out_anchor), "Handle subgraph node %s failed", - src_node->GetName().c_str()); - break; - case kSubgraphData: - GE_CHK_STATUS_RET(HandleSubgraphDataNode(src_node, src_out_anchor), "Handle Data node %s in subgraph failed", - src_node->GetName().c_str()); - break; - case kOthers: - default: - valid_flag = true; - break; - } - if (valid_flag) { - break; - } + return SUCCESS; } } - return SUCCESS; } } // namespace ge diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h index 329f3ebc..a70a5464 100644 --- a/ge/graph/build/graph_builder.h +++ b/ge/graph/build/graph_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/label_allocator.cc b/ge/graph/build/label_allocator.cc index 0f3eff16..f8fbe28b 100644 --- a/ge/graph/build/label_allocator.cc +++ b/ge/graph/build/label_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/label_allocator.h b/ge/graph/build/label_allocator.h index 7c7b2f00..01811e1d 100644 --- a/ge/graph/build/label_allocator.h +++ b/ge/graph/build/label_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/logical_stream_allocator.cc b/ge/graph/build/logical_stream_allocator.cc index 5b8ce824..d1866584 100644 --- a/ge/graph/build/logical_stream_allocator.cc +++ b/ge/graph/build/logical_stream_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,10 +25,10 @@ #include "graph/common/ge_call_wrapper.h" using std::map; +using std::queue; using std::set; using std::string; using std::vector; -using std::queue; namespace ge { LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {} @@ -210,8 +210,8 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr } LogicalStreamPass::SubgraphPtr AssignByDependencyPass::GetReusableSubgraph( - const SubgraphPtr &subgraph, const map &end_subgraph_map, - const map &pld_subgraph_map) { + const SubgraphPtr &subgraph, const map &end_subgraph_map, + const map &pld_subgraph_map) { const SubGraphInfo &subgraph_info = subgraph->subgraph_info; for (const auto &pld_2_end : subgraph_info.GetPld2EndMap()) { const NodePtr &peer_end = pld_2_end.second; @@ -481,7 +481,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vectorGetOpDesc(), ATTR_NAME_STREAM_LABEL, out_stream_label); // normally, Allreduce do not have streamLabel. when in horovod scenario Allreduce will have streamLabel bool isSuccessorParallel = - (out_stream_label == reduce_stream_label) || (!reduce_stream_label.empty() && out_stream_label.empty()); + (out_stream_label == reduce_stream_label) || (!reduce_stream_label.empty() && out_stream_label.empty()); if (isSuccessorParallel) { all_reduce_succs.emplace(out_node); all_out_data_nodes.emplace(out_node); @@ -671,7 +671,6 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra int64_t stream_num = context_.next_stream; vector stream_has_node(stream_num); - for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { if (node != nullptr) { auto op_desc = node->GetOpDesc(); diff --git a/ge/graph/build/logical_stream_allocator.h b/ge/graph/build/logical_stream_allocator.h index e09d7cd6..280a4104 100644 --- a/ge/graph/build/logical_stream_allocator.h +++ b/ge/graph/build/logical_stream_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 61dd3462..8668e81e 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/build/memory/binary_block_mem_assigner.h" #include #include "framework/common/debug/ge_log.h" diff --git a/ge/graph/build/memory/binary_block_mem_assigner.h b/ge/graph/build/memory/binary_block_mem_assigner.h index 96a31aac..de6cae0d 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.h +++ b/ge/graph/build/memory/binary_block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 2d30c57e..773eac6a 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ #include "omg/omg_inner_types.h" #include "runtime/mem.h" -using std::map; -using std::set; using std::list; +using std::map; using std::pair; +using std::set; using std::string; using std::stringstream; using std::unordered_map; @@ -168,10 +168,10 @@ void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLi auto it_block = std::max_element(std::begin(block->NoAlignSizeList()), std::end(block->NoAlignSizeList())); auto it_this = std::max_element(std::begin(NoAlignSizeList()), std::end(NoAlignSizeList())); if (it_block != std::end(block->NoAlignSizeList()) && it_this != std::end(NoAlignSizeList())) { - if ((continuous_block_ && block->continuous_block_) || - (continuous_block_ && (*it_this < *it_block)) || (block->continuous_block_ && (*it_this > *it_block))) { - GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", - *it_this, continuous_block_, *it_block, block->continuous_block_); + if ((continuous_block_ && block->continuous_block_) || (continuous_block_ && (*it_this < *it_block)) || + (block->continuous_block_ && (*it_this > *it_block))) { + GELOGD("Conflict current block size:%zu continuous:%d, reuse block max size:%zu continuous:%d", *it_this, + continuous_block_, *it_block, block->continuous_block_); return; } } @@ -189,10 +189,11 @@ void MemoryBlock::AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLi parent->child_blocks_.emplace_back(child); parent->child_offset_ += child->AlignSize(); child->deleted_block_ = true; - GELOGI("Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" - " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", child, child->block_size_, - child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, parent->block_size_, - parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); + GELOGI( + "Add continuous block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" + " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", + child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, + parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); } } @@ -220,10 +221,11 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ parent->child_blocks_.emplace_back(child); parent->child_offset_ += child->AlignSize(); child->deleted_block_ = true; - GELOGI("Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" - " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", child, child->block_size_, - child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, parent->block_size_, - parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); + GELOGI( + "Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" + " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", + child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, + parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); } } @@ -260,9 +262,9 @@ size_t MemoryBlock::GetDependLifeBegin(int64_t stream_id, DependStreamLife &tota void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t stream_id, std::map &depend_stream_life, DependStreamLife &total_node_depend_stream_life) { - GE_CHECK_NOTNULL_EXEC(node, return); + GE_CHECK_NOTNULL_EXEC(node, return ); auto node_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL_EXEC(node_desc, return); + GE_CHECK_NOTNULL_EXEC(node_desc, return ); auto node_id = node_desc->GetId(); auto stream_life = total_node_depend_stream_life.find(node_id); if (stream_life != total_node_depend_stream_life.end()) { @@ -292,8 +294,8 @@ void AddDependLife(const ge::NodePtr &org_node, const ge::NodePtr &node, int64_t depend_stream_life[peer_node_stream_id] = peer_node_life_time; if (peer_node_stream_id != stream_id) { GELOGI("Node:%s stream id:%ld depend node:%s stream id:%ld index[%d] life time[%zu].", - org_node->GetName().c_str(), stream_id, peer_node_desc->GetName().c_str(), - peer_node_stream_id, peer_out_anchor->GetIdx(), peer_node_life_time); + org_node->GetName().c_str(), stream_id, peer_node_desc->GetName().c_str(), peer_node_stream_id, + peer_out_anchor->GetIdx(), peer_node_life_time); } AddDependLife(org_node, peer_node, stream_id, depend_stream_life, total_node_depend_stream_life); } @@ -358,9 +360,9 @@ Status GetNoAlignSize(const ge::OpDesc &desc, uint32_t index, size_t &size) { // calculate tensor real size auto output_op_desc = desc.GetOutputDescPtr(index); if (output_op_desc == nullptr) { - GELOGI("GetNoAlignSize failed. OpName: %s, OpType: %s, index: %d", - desc.GetName().c_str(), desc.GetType().c_str(), index); - return FAILED; + GELOGI("GetNoAlignSize failed. OpName: %s, OpType: %s, index: %d", desc.GetName().c_str(), desc.GetType().c_str(), + index); + return FAILED; } int64_t tensor_size = 0; GeShape shape = output_op_desc->GetShape(); @@ -396,17 +398,19 @@ string MemoryBlock::String() { for (auto x : NodeTypeIndexList()) { ss << "__node: " << ToString(x) << " "; } - for (const auto& symbol : SymbolList()) { + for (const auto &symbol : SymbolList()) { ss << "__symbol: " << symbol << " "; } - ss << "memory_type: " << memory_type_ << " "; return ss.str(); } BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map &anchor_to_symbol, const map> &symbol_to_anchors) - : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), - symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} + : mem_offset_(0), + compute_graph_(std::move(compute_graph)), + symbol_to_anchors_(symbol_to_anchors), + anchor_to_symbol_(anchor_to_symbol), + life_time_(0) {} BlockMemAssigner::~BlockMemAssigner() { GELOGD("blocks_store_ size : %lu", blocks_store_.size()); @@ -504,7 +508,6 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { void AddReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { string key = std::to_string(mem_block.Size()); key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); auto it = reusable_block_counts.find(key); if (it != reusable_block_counts.end()) { it->second++; @@ -516,7 +519,6 @@ void AddReusableBlockCount(const MemoryBlock &mem_block, map & void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { string key = std::to_string(mem_block.Size()); key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); auto it = reusable_block_counts.find(key); if (it != reusable_block_counts.end()) { if (it->second > 0) { @@ -533,20 +535,17 @@ bool CanReuseBySize(const map &reusable_block_counts, const Me } else { string key = std::to_string(reusable_block.Size()); key += "_" + std::to_string(reusable_block.stream_id_); - key += "_" + std::to_string(reusable_block.memory_type_); auto it = reusable_block_counts.find(key); - GE_IF_BOOL_EXEC((it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) && - (reusable_block.Size() > block_size), - can_reuse = true; - GELOGD("Less size mem reuse, reuse block size:%zu, current block size:%zu", - reusable_block.Size(), block_size);); + GE_IF_BOOL_EXEC( + (it != reusable_block_counts.end() && (it->second > kReuseMaxCount)) && (reusable_block.Size() > block_size), + can_reuse = true; + GELOGD("Less size mem reuse, reuse block size:%zu, current block size:%zu", reusable_block.Size(), block_size);); } return can_reuse; } bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index, - bool &no_need_assign_memory, bool &reset_zero_copy_flag) { + uint32_t &peer_input_index, bool &no_need_assign_memory) { if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } @@ -572,19 +571,11 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false;); // If GetBool fail, is_input_continuous is false. - bool is_input_continuous_no_padding = false; - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, - is_input_continuous_no_padding); - if (is_input_continuous_no_padding) { - reset_zero_copy_flag = true; - return false; - } (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); - no_need_assign_memory = true; - return false;); + no_need_assign_memory = true; return false;); if (is_input_continuous) { if (n->GetOwnerComputeGraph() != nullptr) { @@ -613,27 +604,15 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou /// @return void /// void BlockMemAssigner::InitReuseFlag() { - static const std::set kPreReuseTypes = { ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ANN_DATA_TYPE, - ge::NETOUTPUT, ge::PROPOSAL, ge::ZEROSLIKE, - ge::CONSTANT, ge::CONSTANTOP }; - static const std::set kPostReuseTypes = { ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, ge::REFENTER, - ge::NEXTITERATION, ge::REFNEXTITERATION }; + static const std::set kPreReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ANN_DATA_TYPE, + ge::NETOUTPUT, ge::PROPOSAL, ge::ZEROSLIKE, + ge::CONSTANT, ge::CONSTANTOP}; + static const std::set kPostReuseTypes = {ge::DATA_TYPE, ge::AIPP_DATA_TYPE, ge::ENTER, + ge::REFENTER, ge::NEXTITERATION, ge::REFNEXTITERATION}; for (const auto &pair : symbol_to_anchors_) { std::string symbol = pair.first; bool pre_reuse_flag = true; bool post_reuse_flag = true; - // default memory type - int64_t mem_type = RT_MEMORY_HBM; - GetSymbolMemType(pair.second, mem_type); - GELOGD("The memory type of symbol[%s] is [%ld]].", symbol.c_str(), mem_type); - if (mem_type == RT_MEMORY_P2P_DDR) { - UpdateOpTensorMemType(pair.second, mem_type); - } - // Only the memory with special requirements is processed. The HBM uses the default processing mode. - if (mem_type == RT_MEMORY_P2P_DDR) { - symbol_to_mem_type_[symbol] = mem_type; - } - for (const auto &node_index_io : pair.second) { if (node_index_io.io_type_ == kIn) { continue; @@ -749,66 +728,6 @@ void BlockMemAssigner::PrintSymbolMap() { } } -void BlockMemAssigner::GetSymbolMemType(std::list node_index_io_list, int64_t &memory_type) { - memory_type = RT_MEMORY_HBM; - vector memory_types; - for (auto &node_index_io : node_index_io_list) { - auto op_desc = node_index_io.node_->GetOpDesc(); - if (op_desc == nullptr) { - GELOGW("Node[%s] op desc is null.", node_index_io.node_->GetName().c_str()); - return; - } - - if (node_index_io.io_type_ == kIn) { - vector input_memory_types; - (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); - if (!input_memory_types.empty() && node_index_io.index_ < input_memory_types.size()) { - int64_t input_memory_type = input_memory_types[node_index_io.index_]; - GELOGD("Node[%s]: the memory type of input index [%u] is [%ld]].", op_desc->GetName().c_str(), - node_index_io.index_, input_memory_type); - memory_types.emplace_back(input_memory_type); - } - } - if (node_index_io.io_type_ == kOut) { - vector output_memory_types; - (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, output_memory_types); - if (!output_memory_types.empty() && node_index_io.index_ < output_memory_types.size()) { - int64_t output_memory_type = output_memory_types[node_index_io.index_]; - GELOGD("Node[%s]: the memory type of output index [%u] is [%ld]].", op_desc->GetName().c_str(), - node_index_io.index_, output_memory_type); - memory_types.emplace_back(output_memory_type); - } - } - } - - // memory priority - for (auto node_memory_type : memory_types) { - if (node_memory_type > memory_type) { - memory_type = node_memory_type; - } - } -} - -void BlockMemAssigner::UpdateOpTensorMemType(std::list node_index_io_list, int64_t memory_type) { - for (auto &node_index_io : node_index_io_list) { - auto op_desc = node_index_io.node_->GetOpDesc(); - if (op_desc == nullptr) { - GELOGW("Node[%s] op desc is null.", node_index_io.node_->GetName().c_str()); - return; - } - - if (node_index_io.io_type_ == kIn) { - auto input_desc = op_desc->MutableInputDesc(node_index_io.index_); - (void) AttrUtils::SetInt(input_desc, ATTR_NAME_TENSOR_MEM_TYPE, memory_type); - } - - if (node_index_io.io_type_ == kOut) { - auto output_desc = op_desc->MutableOutputDesc(node_index_io.index_); - (void) AttrUtils::SetInt(output_desc, ATTR_NAME_TENSOR_MEM_TYPE, memory_type); - } - } -} - bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { if (n == nullptr) { GELOGE(FAILED, "Node is null."); @@ -828,8 +747,8 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) { if (is_output_continuous) { if (n->GetOwnerComputeGraph() != nullptr) { string graph_name = n->GetOwnerComputeGraph()->GetName(); - GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), - n->GetName().c_str(), n->GetAllOutDataAnchorsSize()); + GELOGI("%s name[%s] set continuous, output size[%u].", graph_name.c_str(), n->GetName().c_str(), + n->GetAllOutDataAnchorsSize()); return true; } } @@ -855,9 +774,9 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) { } MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, - OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, + MemoryType mem_type, const NodePtr &n, uint32_t out_index, const vector &workspace_reuse_flag, const bool is_op_reuse_mem, - const bool continuous, int64_t memory_type) { + const bool continuous) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); @@ -866,14 +785,12 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, string ge_disable_reuse_mem_env = "0"; (void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env); if (ge_disable_reuse_mem_env != "1") { - bool reuse_mem_flag = (mem_type == kOutput) ? IsPreReuse(n, out_index) : - !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); - is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && - !node_op_desc->HasAttr(kOpNoReuseMem) && reuse_mem_flag && is_op_reuse_mem; + bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]); + is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) && + reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index)); auto stream_id = node_op_desc->GetStreamId(); - if (is_reuse_memory && !continuous && !reusable_blocks_[memory_type].empty()) { - for (auto it = reusable_blocks_[memory_type][stream_id].begin(); - it != reusable_blocks_[memory_type][stream_id].end(); ++it) { + if (is_reuse_memory && !continuous) { + for (auto it = reusable_blocks_[stream_id].begin(); it != reusable_blocks_[stream_id].end(); ++it) { MemoryBlock *reusable_block = *it; if (!IsPostReuse(reusable_block)) { reusable_block->reuse_mem_ = false; @@ -893,14 +810,14 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); - reusable_blocks_[memory_type][stream_id].erase(it); + reusable_blocks_[stream_id].erase(it); return reusable_block; } } } } - auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); + auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); // Data and netoutput need zero copy block @@ -917,8 +834,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } } memory_blocks_.emplace_back(block); - // cause memory_blocks_ may reduce when swap after, - // create blocks_store_ to assure blocks deleted finally blocks_store_.emplace_back(block); return block; } @@ -930,13 +845,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); MemoryBlock *block = nullptr; int64_t total_size = 0; - int64_t memory_type = RT_MEMORY_HBM; for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc == nullptr) { return nullptr; } - int64_t size = 0; if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { GELOGI("Get size failed"); @@ -950,27 +863,14 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec if (index != 0) { zero_memory_list_.emplace_back(n, kOutput, index); } - - if (index == 0) { - NodeIndexIO node_index_io(n, index, kOut); - auto iter = anchor_to_symbol_.find(node_index_io.ToString()); - if (iter != anchor_to_symbol_.end()) { - string symbol = iter->second; - if (symbol_to_mem_type_.find(symbol) != symbol_to_mem_type_.end()) { - memory_type = symbol_to_mem_type_[symbol]; - GELOGD("Continuous out memory symbol is [%s], memory type is [%ld]", symbol.c_str(), memory_type); - } - } - } } auto block_size = GetBlockSize(total_size, ranges); - GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), - total_size, block_size); + GELOGI("Node[%s] continuous out memory size[%ld] block size[%zu]", node_op_desc->GetName().c_str(), total_size, + block_size); vector workspace_reuse_flag; - block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true, - memory_type); + block = ApplyMemory(block_size, total_size, total_size, kOutput, n, 0, workspace_reuse_flag, is_op_reuse_mem, true); if (block != nullptr) { // hccl task need align header and tail block->first_continuous_block_ = true; @@ -992,8 +892,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } size_t no_align_size = 0; - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, - return nullptr, "Get no align size failed"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, return nullptr, + "Get no align size failed"); std::string symbol; if (IsSymbolExist(node_index_io, symbol)) { @@ -1002,23 +902,17 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, block->ref_count_++; } else { int64_t max_size = size; - int64_t memory_type = RT_MEMORY_HBM; auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); if (iter1 != anchor_to_symbol_.end()) { auto iter2 = symbol_size_.find(iter1->second); if (iter2 != symbol_size_.end()) { max_size = iter2->second; } - auto iter3 = symbol_to_mem_type_.find(iter1->second); - if (iter3 != symbol_to_mem_type_.end()) { - memory_type = iter3->second; - } } - auto block_size = GetBlockSize(max_size, ranges); vector workspace_reuse_flag; - block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, - workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); + block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, workspace_reuse_flag, is_op_reuse_mem, + continuous); } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); int out_count_reuse_input = block->ref_count_; @@ -1233,8 +1127,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); // Allocate memory for the current node and release node memory of the same size in the workspace GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", - for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); - ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); + ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_[stream_id])); if (IsContinuousOutput(node)) { (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); return SUCCESS; @@ -1248,20 +1141,19 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { - GELOGI("fusion: node[%s], output[%s], output memory type [%d]", - op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); + GELOGI("fusion: node[%s], output[%s], output memory type [%d]", op_desc->GetName().c_str(), + op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } std::string peer_name; uint32_t peer_input_index = 0; bool out_node_set_continuous_input = false; - bool reset_zero_copy_flag = false; bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); if (!no_need_assign_memory) { out_node_set_continuous_input = - IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory, reset_zero_copy_flag); + IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index, no_need_assign_memory); GE_IF_BOOL_EXEC(!no_need_assign_memory, - no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); + no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input);); } no_need_assign_memory = (no_need_assign_memory || IsKnownSubgraphData(node)); if (no_need_assign_memory) { @@ -1273,12 +1165,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector if (need_change) { is_op_reuse_mem_ = false; } - MemoryBlock *mem_block = ApplyOutMemory(node, i, ranges, is_op_reuse_mem_, out_node_set_continuous_input); if (mem_block != nullptr) { - GE_IF_BOOL_EXEC(reset_zero_copy_flag, - mem_block->is_zero_copy_ = false; - GELOGI("Node[%s] output[%u] need assign memory before reassign.", op_desc->GetName().c_str(), i);); node_out_blocks_[node->GetName()].emplace_back(mem_block); if (out_node_set_continuous_input) { node_continuous_input_blocks_[peer_name][peer_input_index] = mem_block; @@ -1316,54 +1204,45 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { return; } - for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { - iter->second[stream_id].clear(); - } + + stream_workspace_blocks_[stream_id].clear(); vector temp; GetNodeWorkSpaceSize(n, temp); vector workspace_bytes; - vector tvm_workspace_memory_type; - bool has_tvm_workspace_mem_type_attr = - ge::AttrUtils::GetListInt(node_op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, tvm_workspace_memory_type); + vector workspace_memory_type; + bool has_workspace_mem_type_attr = + ge::AttrUtils::GetListInt(node_op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); vector workspace_reuse_flag; GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag), GELOGD("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str())); GELOGI("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(), - temp.size(), tvm_workspace_memory_type.size()); + temp.size(), workspace_memory_type.size()); - if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { - GELOGE(INTERNAL_ERROR, "fusion: node[%s], tvm workspace memory size error![v_temp:%zu, workspace:%zu]", - n->GetName().c_str(), temp.size(), tvm_workspace_memory_type.size()); + if (has_workspace_mem_type_attr && (temp.size() != workspace_memory_type.size())) { + GELOGE(INTERNAL_ERROR, "fusion: node[%s], workspace_memory size err![v_temp:%zu, workspace:%zu]", + n->GetName().c_str(), temp.size(), workspace_memory_type.size()); return; } for (size_t i = 0; i < temp.size(); i++) { // fusion: other type's size not means malloc HBM memory bool workspace_skip_flag = false; - if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) { + if (has_workspace_mem_type_attr && workspace_memory_type[i] == RT_MEMORY_L1) { GELOGI( - "fusion: node[%s]workspace index[%d] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", - node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]); + "fusion: node[%s]workspace index[%d] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", + node_op_desc->GetName().c_str(), i, workspace_memory_type[i]); workspace_skip_flag = true; } if (temp[i] == 0 || workspace_skip_flag) { zero_memory_list_.emplace_back(n, kWorkspace, static_cast(i), false); continue; } - int64_t memory_type = RT_MEMORY_HBM; - if (!GetWorkSpaceMemoryType(n, i, memory_type)) { - GELOGW("Get workspace memory type failed."); - return; - } MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast(temp[i]), ranges), - static_cast(temp[i]), static_cast(temp[i]), - kWorkspace, n, static_cast(i), workspace_reuse_flag, - is_op_reuse_mem_, false, memory_type); + static_cast(temp[i]), static_cast(temp[i]), kWorkspace, n, + static_cast(i), workspace_reuse_flag, is_op_reuse_mem_, false); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); - CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); - } - for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { - ReleaseInputNodeOutMemory(node_out_blocks_, it->second[stream_id], n); + CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block); } + ReleaseInputNodeOutMemory(node_out_blocks_, reusable_blocks_[stream_id], n); } GELOGD("Assigned memory blocks:"); @@ -1386,11 +1265,11 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { } void BlockMemAssigner::CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block, int64_t memory_type) { + MemoryBlock *mem_block) { bool reuse_mem_flag = - ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; + ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; if (reuse_mem_flag) { - stream_workspace_blocks_[memory_type][stream_id].emplace_back(mem_block); + stream_workspace_blocks_[stream_id].emplace_back(mem_block); } } @@ -1398,10 +1277,10 @@ void BlockMemAssigner::GetNodeWorkSpaceSize(const NodePtr &node, vector GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node->GetOpDesc() == nullptr, return, "Op desc is null."); vector workspace_byte_nums = node->GetOpDesc()->GetWorkspaceBytes(); - GELOGD("node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size()); + GELOGD("GetNodeWorkSpaceSize: node[%s] size:%zu", node->GetOpDesc()->GetName().c_str(), workspace_byte_nums.size()); for (int64_t byte_size : workspace_byte_nums) { workspace_memory.emplace_back(byte_size); - GELOGD("push back size:%ld", byte_size); + GELOGD("GetNodeWorkSpaceSize: push back size:%ld", byte_size); } } @@ -1427,16 +1306,15 @@ void MergeBlocks(std::vector &dest, std::vector &s } if (dest[i] != nullptr && src[i] != nullptr) { if (!dest[i]->reuse_mem_ || !src[i]->reuse_mem_) { - GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", - i, dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); + GELOGD("Diff batch's workspace can't be reused, i: %zu, dest[i]: %s, stream: %ld, src[i]: %s, stream: %ld.", i, + dest[i]->String().c_str(), dest[i]->stream_id_, src[i]->String().c_str(), src[i]->stream_id_); continue; } for (auto &symbol : src[i]->SymbolList()) { dest[i]->AddSymbol(symbol); } for (size_t j = 0; j < src[i]->NodeTypeIndexList().size(); ++j) { - dest[i]->AddNodeTypeIndex(src[i]->NodeTypeIndexList()[j], - src[i]->RealSizeList()[j], + dest[i]->AddNodeTypeIndex(src[i]->NodeTypeIndexList()[j], src[i]->RealSizeList()[j], src[i]->NoAlignSizeList()[j]); src[i]->deleted_block_ = true; } @@ -1599,28 +1477,16 @@ void BlockMemAssigner::ResizeMemoryBlocks() { if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) { continue; } - if (memory_block->memory_type_ == RT_MEMORY_HBM) { - if (memory_block->first_continuous_block_) { - mem_offset_ += MEM_ALIGN_SIZE; - } - - memory_block->Resize(); - memory_block->SetHeadOffset(mem_offset_); - mem_offset_ += memory_block->Size(); - memory_block->SetTailOffset(mem_offset_ - 1); - } else if (memory_block->memory_type_ == RT_MEMORY_P2P_DDR) { - if (memory_block->first_continuous_block_) { - p2p_mem_offset_ += MEM_ALIGN_SIZE; - } - - memory_block->Resize(); - memory_block->SetHeadOffset(p2p_mem_offset_); - p2p_mem_offset_ += memory_block->Size(); - memory_block->SetTailOffset(p2p_mem_offset_ - 1); + if (memory_block->first_continuous_block_) { + mem_offset_ += MEM_ALIGN_SIZE; } + + memory_block->Resize(); + memory_block->SetHeadOffset(mem_offset_); + mem_offset_ += memory_block->Size(); + memory_block->SetTailOffset(mem_offset_ - 1); } - GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", - mem_offset_, p2p_mem_offset_); + GELOGI("mem_offset_ exclude zero_copy_memory is %zu.", mem_offset_); } /// @@ -1632,8 +1498,8 @@ void BlockMemAssigner::ResizeMemoryBlocks() { /// @param [in] real_size memory size in need /// @return Status result /// -void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, - size_t real_size, size_t no_align_size, bool child_block) { +void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, size_t real_size, size_t no_align_size, + bool child_block) { ge::OpDescPtr op_desc = node_type.node->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(op_desc == nullptr, return, "op_desc is null."); string graph_name = node_type.node->GetOwnerComputeGraph()->GetName(); @@ -1651,7 +1517,7 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, return; } - static const set kSetOffsetTypes = { DATA_TYPE, AIPP_DATA_TYPE, MULTISHAPE, NETOUTPUT }; + static const set kSetOffsetTypes = {DATA_TYPE, AIPP_DATA_TYPE, MULTISHAPE, NETOUTPUT}; if ((kSetOffsetTypes.count(op_desc->GetType()) > 0) && !IsKnownSubgraphData(node_type.node)) { if ((output_list[node_type.index] == kInvalidOffset) || (output_list[node_type.index] < offset)) { output_list.at(node_type.index) = offset; @@ -1659,7 +1525,7 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, } else { // fusion: keep the original other type offset value from op_desc bool set_out_offset = (!has_mem_type_attr) || - (memorys_type.size() > node_type.index && memorys_type[node_type.index] != RT_MEMORY_L1); + (memorys_type.size() > node_type.index && memorys_type[node_type.index] != RT_MEMORY_L1); if (set_out_offset) { output_list.at(node_type.index) = offset; } @@ -1674,18 +1540,19 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, vector workspace_mem_type; bool has_workspace_mem_type = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_mem_type); // fusion: keep the original other type offset value from op_desc - bool set_workspace_offset = (!has_workspace_mem_type) || - (workspace_mem_type.size() > node_type.index && workspace_mem_type[node_type.index] != RT_MEMORY_L1); + bool set_workspace_offset = (!has_workspace_mem_type) || (workspace_mem_type.size() > node_type.index && + workspace_mem_type[node_type.index] != RT_MEMORY_L1); if (set_workspace_offset) { workspace_list.at(node_type.index) = offset; } op_desc->SetWorkspace(workspace_list); } - GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" - " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(), - op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), - block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_, - block->continuous_block_, block->deleted_block_, node_type.ref_input); + GELOGI( + "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" + " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", + graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, + op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, + block->reuse_mem_, block->continuous_block_, block->deleted_block_, node_type.ref_input); } void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { @@ -1746,23 +1613,8 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || + (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || + (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || (node_type == HVDCALLBACKBROADCAST); } - -bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { - memory_type = RT_MEMORY_HBM; - vector workspace_memory_type; - auto op_desc = node->GetOpDesc(); - bool has_workspace_mem_type_attr = - ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); - if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { - GELOGE(INTERNAL_ERROR, "node[%s], workspace_memory size error![index:%zu, workspace:%zu]", - node->GetName().c_str(), index, workspace_memory_type.size()); - return false; - } - memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; - return true; -} } // namespace ge diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index f3d26c1d..6137911c 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,14 +36,14 @@ const size_t kMaxLifeTime = 0xffffffff; using DependStreamLife = std::map>; -enum OpMemoryType { kOutput, kWorkspace }; +enum MemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) + NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index, bool ref_input = false) : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} ge::NodePtr node = nullptr; - OpMemoryType mem_type = kOutput; + MemoryType mem_type = kOutput; uint32_t index = 0; size_t life_time_end = kMaxLifeTime; bool ref_input = false; @@ -59,8 +59,7 @@ struct NodeTypeIndex { class MemoryBlock { public: - explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true, - int64_t memory_type = RT_MEMORY_HBM) + explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true) : ref_count_(0), stream_id_(stream_id), deleted_block_(false), @@ -70,7 +69,6 @@ class MemoryBlock { first_continuous_block_(false), last_continuous_block_(false), is_zero_copy_(false), - memory_type_(memory_type), block_size_(block_size), head_offset_(0), tail_offset_(0), @@ -85,7 +83,7 @@ class MemoryBlock { symbol_list_.clear(); } - void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { + void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { real_size_list_.emplace_back(real_size); no_align_size_list_.emplace_back(no_align_size); node_type_index_list_.emplace_back(node, type, out_index, false); @@ -108,9 +106,7 @@ class MemoryBlock { no_align_size_list_.emplace_back(no_align_size); } - void AddSymbol(const std::string &symbol) { - symbol_list_.emplace_back(symbol); - } + void AddSymbol(const std::string &symbol) { symbol_list_.emplace_back(symbol); } const std::vector &NodeTypeIndexList() const { return node_type_index_list_; } const std::vector &SymbolList() const { return symbol_list_; } @@ -148,7 +144,7 @@ class MemoryBlock { bool last_continuous_block_; bool is_zero_copy_; std::map depend_stream_life_; - int64_t memory_type_; + private: size_t block_size_; std::vector real_size_list_; @@ -174,13 +170,11 @@ class BlockMemAssigner : public MemAssigner { Status Assign() override; - size_t GetMemOffset() const { return mem_offset_; } + size_t GetMemOffset() const { return mem_offset_; }; - size_t GetP2PMemOffset() const { return p2p_mem_offset_; } + int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; }; - int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; } - - std::vector GetMemoryBlocks() const { return memory_blocks_; } + std::vector GetMemoryBlocks() const { return memory_blocks_; }; /// /// @ingroup domi @@ -260,26 +254,7 @@ class BlockMemAssigner : public MemAssigner { /// void PrintSymbolMap(); - /// - /// @ingroup GE - /// @brief Get the memory type corresponding to the current symbol. - /// @param [in] node_index_io_list - /// @param [out] memory_type - /// @return void - /// - void GetSymbolMemType(std::list node_index_io_list, int64_t &memory_type); - - /// - /// @ingroup GE - /// @brief Update input tensor or output tensor of op to new memory type attr. - /// @param [in] node_index_io_list - /// @param [in] memory_type - /// @return void - /// - void UpdateOpTensorMemType(std::list node_index_io_list, int64_t memory_type); - size_t mem_offset_; - size_t p2p_mem_offset_; ge::ComputeGraphPtr compute_graph_; @@ -294,17 +269,14 @@ class BlockMemAssigner : public MemAssigner { std::map pre_reuse_flag_; std::map post_reuse_flag_; std::map symbol_size_; - std::map symbol_to_mem_type_; private: /// /// @ingroup GE /// @brief Traversing the compute_graph_ to apply for output memory while considering reuse - /// @param [in] n: node in compute_graph_ - /// @param [in] index: output node index - /// @param [in] ranges: available memory specifications - /// @param [in] is_op_reuse_mem: Whether the op reuses the memory, true: reuse; false: not reuse - /// @param [in] continuous: Whether the op uses continuous memory + /// @param [in] n node in compute_graph_ + /// @param [in] index output node index + /// @param [in] ranges available memory specifications /// @return MemoryBlock* /// @author /// @@ -321,15 +293,12 @@ class BlockMemAssigner : public MemAssigner { /// @param [in] n node in compute_graph_ /// @param [in] out_index output node index /// @param [in] workspace_reuse_flag reuse flag for workspace - /// @param [in] is_op_reuse_mem whether the op reuses memory - /// @param [in] continuous whether the memory of op is continuous - /// @param [in] memory_type device memory type /// @return MemoryBlock* /// @author /// - MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, + MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, MemoryType mem_type, const ge::NodePtr &n, uint32_t out_index, const std::vector &workspace_reuse_flag, - const bool is_op_reuse_mem, const bool continuous, int64_t memory_type); + const bool is_op_reuse_mem, const bool continuous); /// /// @ingroup GE @@ -338,12 +307,11 @@ class BlockMemAssigner : public MemAssigner { /// @param [in] index out index /// @param [in] stream_id which stream op in /// @param [in] mem_block node workspace mem_block - /// @param [in] memory_type workspace memory type /// @return void /// @author /// void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block, int64_t memory_type); + MemoryBlock *mem_block); /// /// @ingroup GE @@ -390,7 +358,7 @@ class BlockMemAssigner : public MemAssigner { bool IsZeroCopyBlock(const NodePtr &node, bool continuous); bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, - uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); + uint32_t &peer_input_index, bool &no_need_assign_memory); /// /// @ingroup GE @@ -405,15 +373,13 @@ class BlockMemAssigner : public MemAssigner { bool IsContinuousOutput(const NodePtr &n); - bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); - MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); - std::unordered_map>> reusable_blocks_; + std::unordered_map> reusable_blocks_; std::map reusable_block_counts_; - std::unordered_map>> stream_workspace_blocks_; + std::unordered_map> stream_workspace_blocks_; std::unordered_map> node_out_blocks_; diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f4674a07..1cdb2efa 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,23 +90,18 @@ Status VariableMemoryAssigner::AssignVarAttr2Nodes() { } Status GraphMemoryAssigner::AssignMemory() { - ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_)); + ge::HybridMemAssignerPtr mem_assigner(new (std::nothrow) HybridMemAssigner(compute_graph_)); if (mem_assigner->Assign() != ge::SUCCESS) { GELOGE(ge::FAILED, "Memory assigner failed"); return ge::FAILED; } MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); - memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); - - if (mem_assigner->GetP2PMemOffset() > 0) { - MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); - memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); - } + memory_offset_.push_back(memory_offset); auto session_id = compute_graph_->GetSessionID(); int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM); auto variable_assigner = - std::unique_ptr(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); if (variable_assigner == nullptr) { GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); return ge::FAILED; @@ -125,7 +120,7 @@ Status GraphMemoryAssigner::AssignMemory() { ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { auto variable_assigner = - std::unique_ptr(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + std::unique_ptr(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); if (variable_assigner == nullptr) { GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); return ge::FAILED; @@ -203,7 +198,7 @@ Status GraphMemoryAssigner::GetMaxBatchLabel(const map> if (i == 0) { // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); max_shape_dims = input_output_desc->GetShape().GetDims(); } else { vector current_shape_dims = input_output_desc->GetShape().GetDims(); @@ -224,7 +219,7 @@ Status GraphMemoryAssigner::GetMaxBatchLabel(const map> if (current_shape_dims[j] > max_shape_dims[j]) { max_shape_dims[j] = current_shape_dims[j]; // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); + (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); } // Only compare the first different dim in shape. break; @@ -237,7 +232,7 @@ Status GraphMemoryAssigner::GetMaxBatchLabel(const map> return SUCCESS; } -Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { +Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offset) { if (memory_offset_.empty()) { GELOGE(FAILED, "memory_offset_ is empty."); return ge::FAILED; @@ -253,31 +248,26 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, mapGetSessionID(); - if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { - GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset, + if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) { + GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize()); - for (auto iter : mem_type_to_offset) { - ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"}, - {std::to_string(iter.first), std::to_string(iter.second), "featuremap", - std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); - } + ErrorManager::GetInstance().ATCReportErrMessage( + "E19022", {"size", "item", "maxsize"}, + {std::to_string(mem_offset), "featuremap", + std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}); return ge::FAILED; } return SUCCESS; } -Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size) { +Status GraphMemoryAssigner::AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size) { BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;); - size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM]; + size_t mem_offset_tmp = mem_offset; // set offset for zero copy block for (auto &memory_block : priority_assigner->GetMemoryBlocks()) { @@ -285,24 +275,18 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offse continue; } memory_block->Resize(); - memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]); - mem_offset[RT_MEMORY_HBM] += memory_block->Size(); - memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1); + memory_block->SetHeadOffset(mem_offset); + mem_offset += memory_block->Size(); + memory_block->SetTailOffset(mem_offset - 1); } - GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset[RT_MEMORY_HBM]); + GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset); // set offset for zero copy nodes priority_assigner->SetOpMemOffset(true); - zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp; - auto iter = memory_offset_.find(RT_MEMORY_HBM); - if (iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[hbm]."); - return FAILED; - } - iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; + zero_mem_copy_size = mem_offset - mem_offset_tmp; + memory_offset_[0].mem_offset_ = mem_offset; - GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp, - zero_mem_copy_size); + GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset, mem_offset_tmp, zero_mem_copy_size); return SUCCESS; } @@ -315,15 +299,13 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { bool is_input_continuous = false; GE_CHECK_NOTNULL(node->GetOpDesc()); // If GetBool fail, is_input_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); // Assign continuous input memory if (is_input_continuous) { - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); + ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; @@ -332,7 +314,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { // Clean up atomic address, eg, hcom node vector input_indexes; // If GetListInt fail, input_indexes is empty. - (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); + (void)ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { // check whether there is an atomic conflict between the current node and the peer out node @@ -360,12 +342,12 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { // Get the reference type of the node, default is false bool is_ref = false; // If GetBool fail, is_ref is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); + (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); // Get the continuous output type of the node, default is false bool is_output_continuous = false; // If GetBool fail, is_output_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); + (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); // If the output is ref type and refers to the ref of an input, the name of the output // and the input are the same. Ge encounters ref type, finds matching relationship according @@ -378,23 +360,17 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } } - for (auto pair : memory_offset_) { - GELOGI("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, - pair.second.mem_offset_); - } + + GELOGI("After reassign continuous memory, memoffset = %zu.", memory_offset_[0].mem_offset_); return ge::SUCCESS; } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type) { + int64_t &continuous_mem_size) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); + continuous_mem_start = memory_offset_[0].mem_offset_; bool continuous_input_alloc = false; - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); - return FAILED; - } + (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); @@ -403,7 +379,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); bool is_peer_output_continuous = false; // If GetBool fail, is_peer_output_continuous is false. - (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); + (void)ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and // continuous output of the previous node is the same, we can support it. If size != 1, there may be @@ -414,22 +390,22 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, "Current node %s requires continuous input, while the previous node %s requires " "continuous output. There may be conflict between the two. This node is not supported now.", node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str()); - return PARAM_INVALID;); + return PARAM_INVALID;); bool is_peer_reference = false; // If GetBool fail, is_peer_reference is false. - (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); + (void)AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); GE_IF_BOOL_EXEC(is_peer_reference, GELOGE(PARAM_INVALID, "Current node %s requires continuous input, while the previous node %s requires " "reference. There may be conflict between the two. This node is not supported now.", node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str()); - return PARAM_INVALID;); + return PARAM_INVALID;); vector output_list = peer_op_desc->GetOutputOffset(); std::vector offsets_for_fusion = {}; bool has_offset_attr = - AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); + AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); if (peer_out_data_anchor->GetIdx() < static_cast(output_list.size())) { if (continuous_input_alloc && !has_offset_attr) { if (in_data_anchor->GetIdx() == 0) { @@ -444,30 +420,29 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; continuous_mem_size = - output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; + output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; } GELOGI( - "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " - "real_size[%u].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), - 0, 0); + "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " + "real_size[%u].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), + 0, 0); continue; } - - output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; + output_list.at(peer_out_data_anchor->GetIdx()) = memory_offset_[0].mem_offset_; } else { GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); return FAILED; } peer_op_desc->SetOutputOffset(output_list); - size_t pre_mem_offset = iter->second.mem_offset_; + size_t pre_mem_offset = memory_offset_[0].mem_offset_; int64_t tensor_desc_size = 0; if (has_offset_attr) { if (peer_out_data_anchor->GetIdx() < static_cast(offsets_for_fusion.size())) { auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; - iter->second.mem_offset_ += offset_for_fusion; + memory_offset_[0].mem_offset_ += offset_for_fusion; } else { GELOGE(FAILED, "fusion: peer node %s index : %d is out of range.", peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); @@ -475,28 +450,28 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, } } else { Status ret = - TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); + TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - iter->second.mem_offset_ += tensor_desc_size; + memory_offset_[0].mem_offset_ += tensor_desc_size; } // If set tensor_actual_size, Memory alignment is not required. int32_t is_tensor_actual_size = 0; ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); if (is_tensor_actual_size == 0) { - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + AlignMemOffset(MEM_ALIGN_SIZE); } GELOGI( - "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " - "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), - (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); + "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(), + pre_mem_offset, peer_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size); } - iter->second.mem_offset_ += MEM_ALIGN_SIZE; + memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE; if (!continuous_input_alloc) { - continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; + continuous_mem_size = memory_offset_[0].mem_offset_ - continuous_mem_start; } return SUCCESS; } @@ -528,10 +503,10 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node } mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; GELOGI( - "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); + "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); } out_op_desc->SetOutputOffset(output_list); return ge::SUCCESS; @@ -586,11 +561,11 @@ Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t mem_offset_reuse += output_mem_size; extra_memory_size = extra_memory_size + out_size - output_mem_size; - GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, - output_mem_size); + GELOGI( + "[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(), + pre_mem_offset, peer_op_desc->GetStreamId(), out_size, output_mem_size); } mem_offset_reuse += extra_memory_size; size_t after_mem_offset = mem_offset_reuse; @@ -601,7 +576,6 @@ Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { map> mem_reuse_virtual_input_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; for (const auto &n : compute_graph_->GetAllNodes()) { OpDescPtr op_desc = n->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -611,6 +585,7 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { bool attr_reuse = false; bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); GE_IF_BOOL_EXEC(!get_reuse_flag, continue); + if (attr_reuse && attr_continuous) { if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. @@ -618,19 +593,13 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { op_desc->GetOutputsSize()); return FAILED; } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); - return FAILED; - } - GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); + + GELOGD("Start to reassign memory for virtual input node, memory offset = %zu.", memory_offset_[0].mem_offset_); string batch_label_string; // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; + size_t node_mem_offset = memory_offset_[0].mem_offset_; // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); if (status != SUCCESS) { @@ -638,10 +607,9 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { return FAILED; } - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); + memory_offset_[0].mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE); + GELOGD("After reassign memory for virtual input node, align memory = %zu.", memory_offset_[0].mem_offset_); } else { // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. string current_node_full_name = op_desc->GetName(); @@ -719,8 +687,8 @@ Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t int64_t out_size; if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", - op_desc->GetName().c_str(), out_data_anchor->GetIdx()); + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", op_desc->GetName().c_str(), + out_data_anchor->GetIdx()); return FAILED; } @@ -741,7 +709,6 @@ Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { map> mem_reuse_virtual_output_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; for (const auto &n : compute_graph_->GetAllNodes()) { OpDescPtr op_desc = n->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -760,29 +727,22 @@ Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { in_data_anchor_list.size()); return FAILED; } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); - return FAILED; - } - GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); + + GELOGD("Start to reassign memory for virtual output node, memory offset = %zu.", memory_offset_[0].mem_offset_); string batch_label_string; // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; + size_t node_mem_offset = memory_offset_[0].mem_offset_; // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); if (status != SUCCESS) { GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); return FAILED; } - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); + memory_offset_[0].mem_offset_ = node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE); + GELOGD("After reassign memory for virtual output node, align memory = %zu.", memory_offset_[0].mem_offset_); } else { // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. string current_node_full_name = op_desc->GetName(); @@ -815,29 +775,26 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map nodes_mem_offset_list; for (auto &i_map : mem_reuse_nodes_map) { - vector virtual_nodes_list = i_map.second; - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), - "Get node list memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", memory_type); - return FAILED; - } - size_t max_batch_node_mem_offset = iter->second.mem_offset_; + size_t max_batch_node_mem_offset = memory_offset_[0].mem_offset_; nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); + + vector virtual_nodes_list = i_map.second; for (auto &i_node : virtual_nodes_list) { // Op_desc is not nullptr, it has been checked. OpDescPtr op_desc = i_node->GetOpDesc(); string batch_label_string; // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); + (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); if (batch_label_string == max_batch_label) { Status status = SUCCESS; if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { @@ -853,16 +810,18 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(mapGetName().c_str()); return FAILED; } - iter->second.mem_offset_ = max_batch_node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); + memory_offset_[0].mem_offset_ = max_batch_node_mem_offset; + AlignMemOffset(MEM_ALIGN_SIZE); + GELOGD("After reassign memory for virtual node, align memory = %zu.", memory_offset_[0].mem_offset_); // Only assign memory of max batch nodes. break; } } } - PrintMemoryOffset(); + + // Assign memory of remaining nodes that have the same fixed_name. + GELOGD("Start to reassign memory for remaining batch virtual nodes, memory offset = %zu.", + memory_offset_[0].mem_offset_); size_t memory_reuse_index = 0; for (auto &i_map : mem_reuse_nodes_map) { vector virtual_nodes_list = i_map.second; @@ -897,16 +856,11 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { return status; } - auto mem_iter = memory_offset_.find(RT_MEMORY_HBM); - if (mem_iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); - return FAILED; - } - for (auto &iter : normal_atomic_and_clean_nodes_map) { - int64_t atomic_mem_start = static_cast(mem_iter->second.mem_offset_); + int64_t atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start); + for (auto &atomic_node : iter.second) { vector mem_offset_end; status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end); if (status != SUCCESS) { @@ -916,10 +870,11 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { } } - int64_t atomic_mem_size = static_cast(mem_iter->second.mem_offset_) - atomic_mem_start; - if (atomic_mem_size != 0) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}), - "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); + int64_t atomic_mem_size = static_cast(memory_offset_[0].mem_offset_) - atomic_mem_start; + status = SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}); + if (status != SUCCESS) { + GELOGE(status, "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); + return status; } } @@ -946,11 +901,11 @@ Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(mapGetName().c_str()); @@ -959,7 +914,7 @@ Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map is_connecting_output; // If GetBool fail, attr is_connecting_output is an empty vector. - (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output); + (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output); if (is_connecting_output.empty()) { tmp_normal_atomic_nodes.emplace_back(peer_in_node); continue; @@ -996,7 +951,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP if (!atomic_workspace_info.empty()) { bool is_fusion_node = false; // If GetBool fail, is_fusion_node is false. - (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); + (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node); if (is_fusion_node) { // Assign fusion atomic node workspace memory @@ -1017,11 +972,6 @@ Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodeP } Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes) { - auto iter = memory_offset_.find(RT_MEMORY_HBM); - if (iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); - return FAILED; - } for (auto &node : connect_netoutput_nodes) { GE_CHECK_NOTNULL(node); if (node->GetOpDesc() == nullptr) { @@ -1030,7 +980,7 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector & } // Atomic memory start addr - int64_t original_atomic_mem_start = static_cast(iter->second.mem_offset_); + int64_t original_atomic_mem_start = static_cast(memory_offset_[0].mem_offset_); GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.", node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start); vector mem_offset_end; @@ -1053,7 +1003,7 @@ Status GraphMemoryAssigner::AssignReferenceMemory() { // Get the reference type of the node, default is false bool is_ref = false; // If GetBool fail, is_ref is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); + (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); if (!is_ref) { continue; } @@ -1137,7 +1087,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve vector atomic_output_index; // If GetListInt fail, atomic_output_index is empty. - (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); + (void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index); // Check atomic output vector output_list = op_desc->GetOutputOffset(); @@ -1146,11 +1096,6 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve return ge::FAILED; } auto output_list_size = static_cast(output_list.size()); - auto iter = memory_offset_.find(RT_MEMORY_HBM); - if (iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); - return FAILED; - } for (auto &output_index : atomic_output_index) { if (output_index >= output_list_size) { GELOGE(ge::PARAM_INVALID, "The output index %ld is more than the size %ld of output_list.", output_index, @@ -1168,9 +1113,9 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve // If you have already assigned an atomic address, skip it, and you don't need to reassign it. if (is_assigned_mem) { GELOGI( - "Node %s atomic output : we have assigned atomic memory as the input of next node in " - "ReAssignContinuousMemory function.", - op_desc->GetName().c_str()); + "Node %s atomic output : we have assigned atomic memory as the input of next node in " + "ReAssignContinuousMemory function.", + op_desc->GetName().c_str()); continue; } @@ -1180,14 +1125,14 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve GELOGI("Get size failed"); } - output_list[output_index] = iter->second.mem_offset_; + output_list[output_index] = memory_offset_[0].mem_offset_; GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, - iter->second.mem_offset_, op_desc->GetStreamId(), size, size); + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, memory_offset_[0].mem_offset_, + op_desc->GetStreamId(), size, size); - iter->second.mem_offset_ += size; - AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM); - mem_offset_end.emplace_back(iter->second.mem_offset_); + memory_offset_[0].mem_offset_ += size; + AlignMemOffset(MEM_ALIGN_SIZE); + mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); } op_desc->SetOutputOffset(output_list); @@ -1210,7 +1155,7 @@ Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, i /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address /// has been assigned vector atomic_input_index; - (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); + (void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index); if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) { is_mem_assigned = true; break; @@ -1223,11 +1168,6 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc map> &workspace_info, vector &mem_offset_end) { GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str()); - auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); - if (mem_type_iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); - return FAILED; - } vector workspace_vector = op_desc->GetWorkspace(); for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) { @@ -1250,15 +1190,15 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc return ge::PARAM_INVALID; } - workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_; + workspace_vector[workspace_index] = memory_offset_[0].mem_offset_; GELOGI( - "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "size[%ld] real_size[%ld].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size); + "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "size[%ld] real_size[%ld].", + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_, + op_desc->GetStreamId(), workspace_size, workspace_size); - mem_type_iter->second.mem_offset_ += workspace_size; - mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); + memory_offset_[0].mem_offset_ += workspace_size; + mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); } } op_desc->SetWorkspace(workspace_vector); @@ -1270,11 +1210,6 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt map> &workspace_info, vector &mem_offset_end) { GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str()); - auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM); - if (mem_type_iter == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset don't have memory type[%ld].", RT_MEMORY_HBM); - return FAILED; - } map> sub_node_workspace_offset; for (auto &iter : workspace_info) { @@ -1287,14 +1222,15 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt auto workspace_index = static_cast(info_iter.first); auto workspace_size = info_iter.second; - size_t workspace_offset = mem_type_iter->second.mem_offset_; + size_t workspace_offset = memory_offset_[0].mem_offset_; GELOGI( - "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size); + "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " + "real_size[%ld].", + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_, + op_desc->GetStreamId(), workspace_size, workspace_size); - mem_type_iter->second.mem_offset_ += workspace_size; - mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); + memory_offset_[0].mem_offset_ += workspace_size; + mem_offset_end.emplace_back(memory_offset_[0].mem_offset_); index_offset.insert(std::make_pair(workspace_index, workspace_offset)); } sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset)); @@ -1359,11 +1295,8 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { GELOGE(FAILED, "memory_offset_ is empty."); return FAILED; } - for (auto pair : memory_offset_) { - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(), - pair.second.mem_offset_, pair.first); - } - + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu]", compute_graph_->GetName().c_str(), + memory_offset_[0].mem_offset_); for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) { if (UpdateOpInputOffset(node) != ge::SUCCESS) { GELOGE(ge::FAILED, "Update op input offset failed"); @@ -1396,8 +1329,8 @@ ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vecto const auto &in_node = NodeUtils::GetParentInput(node); if (NodeUtils::GetConstOpType(in_node, op_type)) { input_list = in_node->GetOpDesc()->GetOutputOffset(); - node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output. - return SUCCESS; // Constant input. + node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output. + return SUCCESS; // Constant input. } // Memory allocated for dynamic shape subgraph Data. @@ -1415,7 +1348,7 @@ ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vecto } input_list = {parent_inputs[parent_index]}; - node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input. + node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input. return SUCCESS; } @@ -1441,7 +1374,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< auto out_index = static_cast(peer_out_anchor->GetIdx()); if (output_list.size() > static_cast(out_index)) { int64_t input_offset = output_list.at(out_index); - if (has_mem_type_attr && !origin_input_list.empty()) { + if (has_mem_type_attr) { auto input_size = tmp_op_desc->GetInputsSize(); auto ori_input_offset_list_size = origin_input_list.size(); auto mem_type_size = memory_type.size(); @@ -1454,8 +1387,9 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< } // not hbm keep orignal inputoffest // hbm inputoffset = original inputoffset + outputoffset - input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index] - : origin_input_list[valid_input_index] + output_list.at(out_index)); + input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 + ? origin_input_list[valid_input_index] + : origin_input_list[valid_input_index] + output_list.at(out_index)); } const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode()); if (in_node->GetType() == CONSTANT) { @@ -1464,12 +1398,8 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< } GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", - has_mem_type_attr == true ? "Fusion" : "", - tmp_op_desc->GetName().c_str(), - valid_input_index, - peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), - out_index, - input_offset); + has_mem_type_attr == true ? "Fusion" : "", tmp_op_desc->GetName().c_str(), valid_input_index, + peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index, input_offset); input_list.emplace_back(input_offset); valid_input_index++; } @@ -1540,6 +1470,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in memory_offset_size.emplace_back(size); } memory_offset_start.pop_back(); + const auto &in_control_anchor = node->GetInControlAnchor(); if (!memory_offset_size.empty() && in_control_anchor != nullptr) { for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { @@ -1579,7 +1510,7 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve std::vector mem_start_vector; // If GetListInt fail, mem_start_vector is empty. - (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); + (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector); mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end()); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector), GELOGE(FAILED, "SetListInt failed."); @@ -1587,7 +1518,7 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve std::vector mem_size_vector; // If GetListInt fail, mem_size_vector is empty. - (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); + (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector); mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end()); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector), GELOGE(FAILED, "SetListInt failed."); @@ -1612,93 +1543,11 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve return SUCCESS; } -void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) { +void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size) { if (mem_align_size <= 0) { return; } - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - GELOGW("Memory offset don't have memory type[%ld].", memory_type); - return; - } - iter->second.mem_offset_ = - (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size; -} - -ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector &nodes, int32_t mem_reuse_model, - int64_t &memory_type) { - memory_type = RT_MEMORY_HBM; - // In the dynamic batch scenario, the memory attributes of nodes are the same. - for (auto &n : nodes) { - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.") - break; - } - - if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); - break; - } - } - return SUCCESS; -} - -ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) { - memory_type = RT_MEMORY_HBM; - vector mem_type_list; - if (input_or_output == "input") { - (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list); - } - if (input_or_output == "output") { - (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list); - } - if (mem_type_list.empty()) { - if (memory_offset_.find(memory_type) == memory_offset_.end()) { - GELOGE(FAILED, "Memory offset map does not have memory type[%ld].", memory_type); - return FAILED; - } - return SUCCESS; - } - - if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) { - GELOGE(FAILED, "The size[%zu] of mem type list is not equal to the size of in data anchor[%u].", - mem_type_list.size(), node->GetAllInDataAnchorsSize()); - return FAILED; - } - - if (!CheckContinuousMemType(mem_type_list)) { - GELOGE(FAILED, "Check continuous memory type failed."); - return FAILED; - } - // It is continuous memory and memory type is the same, so use the first memory. - memory_type = mem_type_list[0]; - return SUCCESS; -} - -bool GraphMemoryAssigner::CheckContinuousMemType(vector mem_type_list) { - if (mem_type_list.size() == 0) { - return true; - } - int64_t mem_type_tmp = mem_type_list[0]; - for (auto mem_type : mem_type_list) { - if (mem_type != mem_type_tmp) { - GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].", - mem_type_tmp, mem_type); - return false; - } - } - if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) { - GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp); - return false; - } - return true; -} - -void GraphMemoryAssigner::PrintMemoryOffset() { - for (auto pair : memory_offset_) { - // Assign memory of max batch nodes that have the same batch label. - GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.", - pair.first, pair.second.mem_offset_); - } + memory_offset_[0].mem_offset_ = + (memory_offset_[0].mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size; } } // namespace ge diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index ab677417..201e6d01 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ struct MemoryOffset { size_t mem_offset_; }; -using MemoryOffsetMap = std::map; +using MemoryOffsetList = vector; class VariableMemoryAssigner { public: @@ -71,12 +71,10 @@ using VariableMemoryAssignerPtr = std::shared_ptr; using BlockMemAssignerPtr = std::shared_ptr; using HybridMemAssignerPtr = std::shared_ptr; - class GraphMemoryAssigner { public: explicit GraphMemoryAssigner(ge::ComputeGraphPtr compute_graph) - : compute_graph_(std::move(compute_graph)), - mem_assigner_(nullptr) {} + : compute_graph_(std::move(compute_graph)), mem_assigner_(nullptr) {} GraphMemoryAssigner(const GraphMemoryAssigner &) = delete; @@ -99,9 +97,9 @@ class GraphMemoryAssigner { /// ge::Status AssignVarAttr2Nodes(); - ge::Status ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset); + ge::Status ReAssignMemory(bool is_loop_graph, size_t &mem_offset); - ge::Status AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size); + ge::Status AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size); ge::Status SetInputOffset(); @@ -129,25 +127,19 @@ class GraphMemoryAssigner { ge::Status ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, int32_t mem_reuse_model); - ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, - int32_t mem_reuse_model, string &max_batch_label); + ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, int32_t mem_reuse_model, + string &max_batch_label); ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); ge::Status ReAssignAtomicMemory(bool is_loop_graph); - - ge::Status FilterAtomicNodesForMemoryAssign(std::map> &normal_atomic_nodes_map, - std::vector &connecting_output_atomic_nodes); - - ge::Status FilterAtomicNodesForMemoryAssign(std::map> &normal_atomic_nodes_map, - std::vector &connecting_output_atomic_nodes); ge::Status FilterAtomicNodesForMemoryAssign(std::map> &normal_atomic_nodes_map, std::vector &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type); + int64_t &continuous_mem_size); ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); @@ -182,7 +174,7 @@ class GraphMemoryAssigner { ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); - void AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type); + void AlignMemOffset(const int64_t &mem_align_size); ge::Status UpdateOpInputOffset(const NodePtr &node, vector &input_list) const; @@ -190,14 +182,7 @@ class GraphMemoryAssigner { NodePtr GetKnownInputNode(const NodePtr &node) const; - ge::Status GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output); - ge::Status GetNodeListMemoryType(const vector &nodes, int32_t mem_reuse_model, int64_t &memory_type); - - bool CheckContinuousMemType(vector mem_type_list); - - void PrintMemoryOffset(); - - MemoryOffsetMap memory_offset_; + MemoryOffsetList memory_offset_; ge::ComputeGraphPtr compute_graph_; HybridMemAssignerPtr mem_assigner_; }; diff --git a/ge/graph/build/memory/hybrid_mem_assigner.cc b/ge/graph/build/memory/hybrid_mem_assigner.cc index 8a36ad62..a75487de 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.cc +++ b/ge/graph/build/memory/hybrid_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ namespace ge { HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph) - : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} + : mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} Status HybridMemAssigner::AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size) { vector ranges; @@ -46,12 +46,12 @@ Status HybridMemAssigner::Assign() { return FAILED; } - std::unique_ptr binary_assigner(new (std::nothrow) BinaryBlockMemAssigner( - compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); + std::unique_ptr binary_assigner( + new (std::nothrow) BinaryBlockMemAssigner(compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); GE_CHECK_NOTNULL(binary_assigner); - std::unique_ptr max_assigner(new (std::nothrow) MaxBlockMemAssigner( - compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); + std::unique_ptr max_assigner( + new (std::nothrow) MaxBlockMemAssigner(compute_graph_, anchor_to_symbol_, symbol_to_anchors_)); GE_CHECK_NOTNULL(max_assigner); size_t bin_mem_size = 0; @@ -73,7 +73,6 @@ Status HybridMemAssigner::Assign() { priority_assigner->SetOpMemOffset(false); mem_offset_ = priority_assigner->GetMemOffset(); - p2p_mem_offset_ = priority_assigner->GetP2PMemOffset(); priority_assigner_ = std::move(priority_assigner); return SUCCESS; diff --git a/ge/graph/build/memory/hybrid_mem_assigner.h b/ge/graph/build/memory/hybrid_mem_assigner.h index 7baece44..fba70a59 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.h +++ b/ge/graph/build/memory/hybrid_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,6 @@ class HybridMemAssigner : public MemAssigner { Status Assign() override; size_t GetMemOffset() const { return mem_offset_; } - size_t GetP2PMemOffset() const { return p2p_mem_offset_; } BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; } @@ -51,7 +50,6 @@ class HybridMemAssigner : public MemAssigner { Status AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size); size_t mem_offset_; - size_t p2p_mem_offset_; ge::ComputeGraphPtr compute_graph_; diff --git a/ge/graph/build/memory/max_block_mem_assigner.cc b/ge/graph/build/memory/max_block_mem_assigner.cc index 15edae3d..db6befeb 100644 --- a/ge/graph/build/memory/max_block_mem_assigner.cc +++ b/ge/graph/build/memory/max_block_mem_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/max_block_mem_assigner.h b/ge/graph/build/memory/max_block_mem_assigner.h index c4d67953..f5626ebf 100644 --- a/ge/graph/build/memory/max_block_mem_assigner.h +++ b/ge/graph/build/memory/max_block_mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/mem_assigner.h b/ge/graph/build/memory/mem_assigner.h index 7d0252d9..b1cb4627 100755 --- a/ge/graph/build/memory/mem_assigner.h +++ b/ge/graph/build/memory/mem_assigner.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc index 055103a9..e36f082e 100755 --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ #include "graph/build/memory/graph_mem_assigner.h" namespace ge { -Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size) { +Status MemoryAssigner::AssignMemory(bool is_loop_graph, size_t &mem_offset, size_t &zero_copy_mem_size) { GraphMemoryAssigner graph_mem_assigner(compute_graph_); if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) { diff --git a/ge/graph/build/memory/module.mk b/ge/graph/build/memory/module.mk index 47c9e5cc..2b77e40e 100755 --- a/ge/graph/build/memory/module.mk +++ b/ge/graph/build/memory/module.mk @@ -17,7 +17,6 @@ local_lib_inc_path := ${LOCAL_PATH} \ ${TOPDIR}third_party/protobuf/include \ ${TOPDIR}inc/framework \ $(TOPDIR)framework/domi \ - $(TOPDIR)graphengine/ge \ #compiler for host include $(CLEAR_VARS) diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index 639bfaa0..a352cf65 100755 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,17 +62,17 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr GE_CHECK_NOTNULL(tensor_desc); if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { GE_CHK_STATUS_RET( - VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); + VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) - ->SetAllocatedGraphId(node_name, compute_graph->GetGraphID())); + ->SetAllocatedGraphId(node_name, compute_graph->GetGraphID())); } uint8_t *dev_ptr = nullptr; rtMemType_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) - ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); + GE_CHK_STATUS_RET( + VarManager::Instance(compute_graph->GetSessionID())->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); vector output_list = n->GetOpDesc()->GetOutputOffset(); GE_IF_BOOL_EXEC(output_list.empty(), return FAILED); output_list[0] = static_cast(reinterpret_cast(dev_ptr)); @@ -90,9 +90,9 @@ Status VarMemAssignUtil::AssignData2Fp32Var(const ge::NodePtr &node, uint64_t se rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetCurVarDesc(src_var_name, cur_tensor_desc)); GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->GetVarAddr(src_var_name, cur_tensor_desc, &dev_ptr, memory_type)); + VarManager::Instance(session_id)->GetVarAddr(src_var_name, cur_tensor_desc, &dev_ptr, memory_type)); GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->SetVarAddr(node->GetName(), cur_tensor_desc, dev_ptr, memory_type)); + VarManager::Instance(session_id)->SetVarAddr(node->GetName(), cur_tensor_desc, dev_ptr, memory_type)); } return SUCCESS; } @@ -122,7 +122,7 @@ Status VarMemAssignUtil::SetOutVariableAttr(const ge::NodePtr &node, const ge::N GeTensorDesc var_tensor_desc = var_node->GetOpDesc()->GetOutputDesc(0); rtMemType_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET( - VarManager::Instance(session_id)->GetVarAddr(var_node->GetName(), var_tensor_desc, &dev_ptr, memory_type)); + VarManager::Instance(session_id)->GetVarAddr(var_node->GetName(), var_tensor_desc, &dev_ptr, memory_type)); int out_list_size = static_cast(output_list.size()); GE_CHK_BOOL_RET_STATUS(index < out_list_size, FAILED, "index %d >= output_list.size() %d", index, out_list_size); @@ -171,7 +171,7 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr "Get broadcast op %s input tensor desc size [%zu] < idx [%d]", node->GetName().c_str(), input_tensor_desc_ptr_vistor.size(), broad_cast_info.idx); const ge::GeTensorDescPtr input_tensor_desc = - input_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); + input_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); int64_t input_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*input_tensor_desc, input_size), "get input size failed."); broad_cast_info.input_size = input_size; @@ -190,7 +190,7 @@ Status VarMemAssignUtil::DealBroadCastNode(uint32_t graph_id, const ge::NodePtr "Get broadcast op %s output tensor desc size [%zu] < idx [%d]", node->GetName().c_str(), output_tensor_desc_ptr_vistor.size(), broad_cast_info.idx); const ge::GeTensorDescPtr output_tensor_desc = - output_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); + output_tensor_desc_ptr_vistor.at(static_cast(broad_cast_info.idx)); int64_t output_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(*output_tensor_desc, output_size), "get input size failed."); broad_cast_info.output_size = output_size; @@ -220,7 +220,7 @@ Status VarMemAssignUtil::DealVariableNode(uint32_t graph_id, const ge::NodePtr & } auto dst_type = dst_node->GetType(); bool is_trans_node = - (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); + (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); if (is_trans_node) { NodePtr final_trans_node = GetFinalTransNode(dst_node); GE_CHK_STATUS_RET(DealTransNode(final_trans_node)); @@ -238,7 +238,7 @@ ge::NodePtr VarMemAssignUtil::GetFinalTransNode(const ge::NodePtr &trans_node) { NodePtr dst_node = dst_in_anchor->GetOwnerNode(); auto dst_type = dst_node->GetType(); bool is_trans_node = - (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); + (dst_type == TRANSDATA) || (dst_type == CAST) || (dst_type == TRANSPOSE) || (dst_type == PERMUTE); if (is_trans_node && (dst_in_anchor->GetIdx() == 0)) { final_ref_node = GetFinalTransNode(dst_node); } @@ -319,11 +319,11 @@ Status VarMemAssignUtil::AssignData2VarRef(const ge::NodePtr &has_ref_attr_node, ge::NodePtr var_ref_src_var = root_graph->FindNode(src_var_name); if (var_ref_src_var == nullptr) { for (auto sub_graph : root_graph->GetAllSubgraphs()) { - auto node_ptr = sub_graph->FindNode(src_var_name); - if (node_ptr != nullptr) { - var_ref_src_var = node_ptr; - break; - } + auto node_ptr = sub_graph->FindNode(src_var_name); + if (node_ptr != nullptr) { + var_ref_src_var = node_ptr; + break; + } } } GE_IF_BOOL_EXEC(var_ref_src_var == nullptr || var_ref_src_var->GetOpDesc() == nullptr, return FAILED); diff --git a/ge/graph/build/memory/var_mem_assign_util.h b/ge/graph/build/memory/var_mem_assign_util.h index f0e6270d..cb38af29 100644 --- a/ge/graph/build/memory/var_mem_assign_util.h +++ b/ge/graph/build/memory/var_mem_assign_util.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,6 @@ class VarMemAssignUtil { static Status AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_graph); static Status AssignVarAttr2Nodes(ge::ComputeGraphPtr &compute_graph); static Status AssignMemory2HasRefAttrNode(ge::ComputeGraphPtr &compute_graph); - static Status AssignData2Fp32Var(const ge::NodePtr &node, uint64_t session_id); private: static Status AssignMemory2VariableNode(ge::ComputeGraphPtr &compute_graph); @@ -41,6 +40,7 @@ class VarMemAssignUtil { static Status DealBroadCastNode(uint32_t graph_id, const ge::NodePtr &node, const ge::InDataAnchorPtr &in_data_anchor, const ge::NodePtr &var_node, uint64_t session_id); + static Status AssignData2Fp32Var(const ge::NodePtr &node, uint64_t session_id); static ge::NodePtr GetFinalTransNode(const ge::NodePtr &ref_node); diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index a78bbdcc..9a37478d 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/build/model_builder.h" #include #include @@ -92,6 +93,7 @@ ModelBuilder::ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr compute_grap const Graph2SubGraphInfoList &subgraphs, const map &stream_max_parallel_num, bool hcom_parallel, int mode) : session_id_(session_id), + mem_offset_(0), weight_offset_(kWeightsStartOffset), compute_graph_(std::move(compute_graph)), subgraphs_(subgraphs), @@ -102,7 +104,6 @@ ModelBuilder::ModelBuilder(uint64_t session_id, ge::ComputeGraphPtr compute_grap hcom_parallel_(hcom_parallel), build_mode_(mode), max_mem_offset_(0), - p2p_mem_offset_(0), zero_copy_mem_size_(0), platform_type_(0), is_loop_graph_(false), @@ -144,7 +145,7 @@ Status ModelBuilder::CalcOutputSize(const ge::NodePtr &n) { bool ModelBuilder::SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_node, size_t index, vector &is_input_const) { - GELOGI("SetIsInputConst const: %s, source node: %s", op_desc->GetName().c_str(), src_node->GetName().c_str()); + GELOGI("SetIsInputConst const: %s", op_desc->GetName().c_str()); for (size_t i = is_input_const.size(); i <= index; ++i) { is_input_const.push_back(false); } @@ -152,7 +153,7 @@ bool ModelBuilder::SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_no vector weights = OpDescUtils::MutableWeights(src_node); if (weights.empty()) { - GELOGW("SetInputIsConst weights is empty, node: %s", src_node->GetName().c_str()); + GELOGW("SetInputIsConst weights is empty"); return false; } GeTensorPtr weight = weights[0]; @@ -191,7 +192,6 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) { GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); const auto &src_node = peer_out_anchor->GetOwnerNode(); if (!NodeUtils::GetConstOpType(src_node, const_type)) { - GELOGI("Node %s:%zu, sorce node: %s Not Const", n->GetName().c_str(), index, src_node->GetName().c_str()); continue; } @@ -385,16 +385,10 @@ void ModelBuilder::InitL1FusionOption() { Status ModelBuilder::BuildModelDef(ge::Model &model) { ClearOriginalFormat(); - max_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_HBM]; + max_mem_offset_ = mem_offset_; GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_MEMORY_SIZE, max_mem_offset_), GELOGE(FAILED, "SetInt of ATTR_MODEL_MEMORY_SIZE failed."); return FAILED); - if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) { - p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR]; - } - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_), - GELOGE(FAILED, "SetInt of ATTR_MODEL_P2P_MEMORY_SIZE failed."); - return FAILED); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_WEIGHT_SIZE, weight_offset_), GELOGE(FAILED, "SetInt of ATTR_MODEL_WEIGHT_SIZE failed."); return FAILED); @@ -416,8 +410,7 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, ATTR_MODEL_OUT_NODES_NAME, GetLocalOmgContext().net_out_nodes), GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed."); return FAILED); - GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, - p2p_mem_offset_, zero_copy_mem_size_); + GELOGI("For model, max_mem_offset_: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, zero_copy_mem_size_); string ge_core_type; Status ret = ge::GetContext().GetOption(kCoreType, ge_core_type); @@ -539,8 +532,8 @@ Status ModelBuilder::MergeWeights() { if (weight_data.data() != nullptr) { GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED); if (weight_offset_ - offset < weight_data.size()) { - GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", - weight_offset_ - offset, weight_data.size()); + GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", weight_offset_ - offset, + weight_data.size()); return FAILED; } uintptr_t dst_ptr = reinterpret_cast(base_addr) + offset; @@ -550,7 +543,8 @@ Status ModelBuilder::MergeWeights() { auto err = memcpy_s(reinterpret_cast(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast(src_ptr), SECUREC_MEM_MAX_LEN); if (err != EOK) { - GELOGE(FAILED, "mem copy failed. errret:%u, " + GELOGE(FAILED, + "mem copy failed. errret:%u, " "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); return FAILED; @@ -561,7 +555,8 @@ Status ModelBuilder::MergeWeights() { } auto err = memcpy_s(reinterpret_cast(dst_ptr), left_size, reinterpret_cast(src_ptr), left_size); if (err != EOK) { - GELOGE(FAILED, "mem copy failed. errret:%u, " + GELOGE(FAILED, + "mem copy failed. errret:%u, " "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); return FAILED; @@ -587,8 +582,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { if (tbe_kernel == nullptr) { std::string kernel_name; GeAttrValue::BYTES kernel_buffer; - (void) AttrUtils::GetStr(node_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); - (void) AttrUtils::GetBytes(node_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); + (void)AttrUtils::GetStr(node_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name); + (void)AttrUtils::GetBytes(node_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer); if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) { GE_CHECK_NOTNULL(kernel_buffer.GetData()); std::vector data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize()); @@ -609,7 +604,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); CustAICPUKernelPtr cust_aicpu_kernel = - node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); + node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); GE_IF_BOOL_EXEC(cust_aicpu_kernel == nullptr, continue); if (aicpu_name_set.count(cust_aicpu_kernel->GetName()) > 0) { GELOGE(FAILED, "aicpu_kernel name %s can't be the same", cust_aicpu_kernel->GetName().c_str()); @@ -718,7 +713,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GE_TIMESTAMP_START(AssignMemory); MemoryAssigner mem_assigner(compute_graph_); - GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_type_to_mem_offset_, zero_copy_mem_size_), + GE_CHK_STATUS_RET(mem_assigner.AssignMemory(is_loop_graph_, mem_offset_, zero_copy_mem_size_), "Assign Memory Failed!"); GE_TIMESTAMP_END(AssignMemory, "GraphBuilder::AssignMemory"); diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index e75521c7..e54d6695 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,7 @@ class ModelBuilder { uint64_t session_id_; - map mem_type_to_mem_offset_; + size_t mem_offset_; size_t weight_offset_; @@ -106,7 +106,6 @@ class ModelBuilder { int build_mode_; size_t max_mem_offset_; - size_t p2p_mem_offset_; size_t zero_copy_mem_size_; TBEKernelStore tbe_kernel_store_; diff --git a/ge/graph/build/run_context.cc b/ge/graph/build/run_context.cc index 3aa4d3de..cece31ea 100644 --- a/ge/graph/build/run_context.cc +++ b/ge/graph/build/run_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/build/run_context.h" #include "common/util.h" @@ -22,9 +23,7 @@ namespace ge { RunContextUtil::~RunContextUtil() { DestroyRtModelResources(); } -Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, - std::map mem_type_to_data_mem_base, - std::map mem_type_to_data_mem_size, uint8_t *weight_mem_base, +Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, uint8_t *weight_mem_base, uint64_t weight_mem_size) { if ((data_mem_size > 0) && (data_mem_base == nullptr)) { GELOGE(PARAM_INVALID, "InitMemInfo param data_mem_base is null but data_mem_size = %lu.", data_mem_size); @@ -34,20 +33,10 @@ Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_siz GELOGE(PARAM_INVALID, "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu.", weight_mem_size); return PARAM_INVALID; } - if (mem_type_to_data_mem_base.empty() || mem_type_to_data_mem_size.empty() || - mem_type_to_data_mem_base.size() != mem_type_to_data_mem_size.size()) { - GELOGE(PARAM_INVALID, - "InitMemInfo param mem_type_to_data_mem_base size[%zu] is not equal to the size of " - "mem_type_to_data_mem_size[%zu].", - mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size()); - return PARAM_INVALID; - } data_mem_base_ = data_mem_base; data_mem_size_ = data_mem_size; weight_mem_base_ = weight_mem_base; weight_mem_size_ = weight_mem_size; - mem_type_to_data_mem_base_ = mem_type_to_data_mem_base; - mem_type_to_data_mem_size_ = mem_type_to_data_mem_size; return SUCCESS; } @@ -178,33 +167,10 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra GELOGI("CreateRunContext: data_mem_base_ = %p, weight_mem_base_ = %p, memory_size = %lu, weight_size = %lu", data_mem_base_, weight_mem_base_, data_mem_size_, weight_mem_size_); - PrintMemInfo(); - - run_context_ = {rt_model_, - nullptr, - session_id, - data_mem_size_, - data_mem_base_, - mem_type_to_data_mem_size_, - mem_type_to_data_mem_base_, - weight_mem_size_, - weight_mem_base_, - buffer, - stream_list_, - event_list_, - label_list_}; + run_context_ = {rt_model_, nullptr, session_id, data_mem_size_, data_mem_base_, weight_mem_size_, + weight_mem_base_, buffer, stream_list_, event_list_, label_list_}; return SUCCESS; } -void RunContextUtil::PrintMemInfo() { - for (auto iter : mem_type_to_data_mem_base_) { - GELOGI("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second); - } - - for (auto iter : mem_type_to_data_mem_size_) { - GELOGI("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second); - } -} - RunContext &RunContextUtil::GetRunContext() { return run_context_; } } // namespace ge diff --git a/ge/graph/build/run_context.h b/ge/graph/build/run_context.h index 82f799aa..5b24f343 100755 --- a/ge/graph/build/run_context.h +++ b/ge/graph/build/run_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,6 @@ #include "runtime/rt.h" namespace ge { -/*lint -e148*/ class RunContextUtil { public: RunContextUtil() = default; @@ -34,18 +33,14 @@ class RunContextUtil { virtual ~RunContextUtil(); // Init mem info. - ge::Status InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, - std::map mem_type_to_data_mem_base, - std::map mem_type_to_data_mem_size, - uint8_t *weight_mem_base, uint64_t weight_mem_size); + ge::Status InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size, uint8_t *weight_mem_base, + uint64_t weight_mem_size); ge::Status CreateRunContext(Model &model_def, const ComputeGraphPtr &graph, Buffer &buffer, const uint64_t session_id); RunContext &GetRunContext(); - void PrintMemInfo(); - RunContext run_context_; private: @@ -66,8 +61,6 @@ class RunContextUtil { uint64_t data_mem_size_ = 0; uint8_t *weight_mem_base_ = nullptr; uint64_t weight_mem_size_ = 0; - std::map mem_type_to_data_mem_base_; - std::map mem_type_to_data_mem_size_; }; } // namespace ge #endif // GE_GRAPH_BUILD_RUN_CONTEXT_H_ diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 3aba8fd1..bcfea1d8 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -221,9 +221,9 @@ Status StreamAllocator::AssignSingleStream() { huge_streams_.emplace_back(huge_stream); } else { GELOGW( - "The estimated task count %ld is greater than the max count of normal stream," - " but the huge stream is not supported.", - task_count); + "The estimated task count %ld is greater than the max count of normal stream," + " but the huge stream is not supported.", + task_count); } } @@ -742,9 +742,9 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { if (NeedSpiltNewStream(stream_node_num_vec[stream_id], max_node_num_one_stream, op_desc, is_stream_first_node)) { last_stream_id++; GELOGI( - "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " - "It's time to split the stream, split newly-added stream id is %ld", - stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); + "stream_node_num_vec[%ld]= %ld > max_node_num_one_stream : %ld, " + "It's time to split the stream, split newly-added stream id is %ld", + stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); NodePtr pre_node = pre_node_vec[stream_id]; stream_node_num_vec[stream_id] = 0; AddNodeNum(cur_node, stream_node_num_vec[stream_id]); @@ -770,8 +770,8 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { cur_continuous_stream_label.c_str()); auto iter = std::find(stream_2_nodes_map[stream_id].begin(), stream_2_nodes_map[stream_id].end(), not_cur); GE_RETURN_WITH_LOG_IF_FALSE( - (iter != stream_2_nodes_map[stream_id].end()) && (iter != stream_2_nodes_map[stream_id].begin()), - "split stream with continuous stream label %s failed", cur_continuous_stream_label.c_str()); + (iter != stream_2_nodes_map[stream_id].end()) && (iter != stream_2_nodes_map[stream_id].begin()), + "split stream with continuous stream label %s failed", cur_continuous_stream_label.c_str()); iter--; pre_node = *iter; } diff --git a/ge/graph/build/stream_allocator.h b/ge/graph/build/stream_allocator.h index a21b2f77..0158e6b0 100644 --- a/ge/graph/build/stream_allocator.h +++ b/ge/graph/build/stream_allocator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/stream_graph_optimizer.cc b/ge/graph/build/stream_graph_optimizer.cc index 21625a1e..49ecc674 100644 --- a/ge/graph/build/stream_graph_optimizer.cc +++ b/ge/graph/build/stream_graph_optimizer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "stream_graph_optimizer.h" #include "common/util.h" #include "framework/common/debug/ge_log.h" diff --git a/ge/graph/build/stream_graph_optimizer.h b/ge/graph/build/stream_graph_optimizer.h index b0eea135..3133d32d 100644 --- a/ge/graph/build/stream_graph_optimizer.h +++ b/ge/graph/build/stream_graph_optimizer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 9246fa6e..58a8bf7b 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,6 @@ #include "init/gelib.h" #include "graph/ge_local_context.h" #include "ge/ge_api_types.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" using domi::LogTimeStampDef; using domi::ModelTaskDef; @@ -127,13 +126,13 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t Status TaskGenerator::AddModelTaskToModel(const ModelTaskDef &model_task_def, uint64_t session_id, ge::Model &model, RunContext &run_context) { GE_CHK_BOOL_EXEC( - AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast(run_context.dataMemBase)), - GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed."); - return FAILED); + AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, reinterpret_cast(run_context.dataMemBase)), + GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_BASE_ADDR failed."); + return FAILED); GE_CHK_BOOL_EXEC( - AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast(run_context.weightMemBase)), - GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed."); - return FAILED); + AttrUtils::SetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, reinterpret_cast(run_context.weightMemBase)), + GELOGE(FAILED, "SetInt MODEL_ATTR_TASK_GEN_WEIGHT_ADDR failed."); + return FAILED); GE_CHK_BOOL_EXEC(AttrUtils::SetInt(model, ATTR_MODEL_TASK_GEN_VAR_ADDR, reinterpret_cast(var_mem_base_)), GELOGE(FAILED, "SetInt ATTR_MODEL_TASK_GEN_VAR_ADDR failed."); return FAILED); @@ -293,8 +292,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra // For fusion ddb pass, task def must be continuous. // Part2: Call auto fusion_task_info = - FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, - ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; + FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, + ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); // continue directly @@ -306,11 +305,9 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); continue; } - auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); + OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); if (kernel_info_store == nullptr) { - GELOGE(INTERNAL_ERROR, - "No ops kernel store or ops kernel builder found. node:%s(%s), op_kernel_lib_name=%s.", - name.c_str(), + GELOGE(INTERNAL_ERROR, "No ops kernel store found. node:%s(%s), op_kernel_lib_name=%s.", name.c_str(), type.c_str(), op_kernel_lib_name.c_str()); return INTERNAL_ERROR; } @@ -330,7 +327,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id); GE_TIMESTAMP_RESTART(GenerateTask); - auto ret = OpsKernelBuilderManager::Instance().GenerateTask(*node, run_context, task_def_list); + auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list); GE_TIMESTAMP_ADD(GenerateTask); if (ret != SUCCESS) { GELOGE(ret, "Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task failed.", @@ -407,8 +404,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info size_t task_list_size_before = task_def_list.size(); OpsKernelInfoStorePtr kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); if (kernel_info_store == nullptr) { - GELOGE(INTERNAL_ERROR, - "Fusion: No ops kernel store or ops kernel builder found. fusion_node:%s(%s), op_kernel_lib_name=%s.", + GELOGE(INTERNAL_ERROR, "Fusion: No ops kernel store found. fusion_node:%s(%s), op_kernel_lib_name=%s.", fusion_node_name.c_str(), fusion_node_type.c_str(), op_kernel_lib_name.c_str()); return INTERNAL_ERROR; } @@ -432,7 +428,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info run_context.stream = run_context.graphStreamList[stream_id]; GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); - ret = OpsKernelBuilderManager::Instance().GenerateTask(*fusion_node, run_context, task_def_list); + ret = kernel_info_store->GenerateTask(*fusion_node, run_context, task_def_list); if (ret != SUCCESS) { GELOGE(ret, "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), " @@ -464,10 +460,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info } GELOGI( - "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" - " task finished, generate %u task(s).", - op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, - task_list_size_after - task_list_size_before); + "Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" + " task finished, generate %u task(s).", + op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, + task_list_size_after - task_list_size_before); // record nodes which have call generate task successfully fusion_nodes_seen.insert(fusion_node.get()); @@ -562,7 +558,7 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { Status TaskGenerator::MarkFirstAndLastOps(const vector &ops, bool is_single_stream) const { vector> continuous_op_lists(1); const set separator_types( - {LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN}); + {LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN}); for (auto &op_desc : ops) { bool attr_notask = false; if (ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask) && attr_notask) { @@ -685,8 +681,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP } if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { profiling_point.end_index.insert(current_idx); - GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", - op_desc->GetName().c_str(), current_idx); + GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); } } else { if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { @@ -782,8 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin } if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGN) { profiling_point.end_index.insert(current_idx); - GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", - op_desc->GetName().c_str(), current_idx); + GELOGI("Iter end name %s, idx %u, from FlowCtrl_LoopCond_ASSIGN", op_desc->GetName().c_str(), current_idx); } } else { if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { @@ -809,11 +803,10 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, vector &all_reduce_nodes, std::string &fp_point_str, std::string &bp_point_str) const { - if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_FPPONIT_OPTIONS, fp_point_str) == SUCCESS && ge::GetContext().GetOption(OPTION_EXEC_PROFILING_BPPONIT_OPTIONS, bp_point_str) == SUCCESS && !fp_point_str.empty() && !bp_point_str.empty()) { - return SUCCESS; + return SUCCESS; } Status ret = SUCCESS; @@ -893,7 +886,6 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi return SUCCESS; } - Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, vector &task_def_list) { diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index c93b2007..0d482afe 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -120,7 +120,7 @@ class TaskGenerator { vector &all_reduce_nodes) const; Status GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, vector &all_reduce_nodes, - std::string& fp_point_str, std::string& bp_point_str) const; + std::string &fp_point_str, std::string &bp_point_str) const; Status FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, std::vector &all_reduce_nodes) const; diff --git a/ge/graph/common/ge_call_wrapper.h b/ge/graph/common/ge_call_wrapper.h index 55a93951..305c6c15 100644 --- a/ge/graph/common/ge_call_wrapper.h +++ b/ge/graph/common/ge_call_wrapper.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,42 +13,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GE_CALL_WRAPPER_H_ #define GE_GE_CALL_WRAPPER_H_ #include "framework/common/debug/ge_log.h" /*lint --emacro((773),GE_TIMESTAMP_START)*/ /*lint -esym(773,GE_TIMESTAMP_START)*/ -#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestamp() +#define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap() -#define GE_TIMESTAMP_END(stage, stage_name) \ - do { \ - uint64_t endUsec_##stage = ge::GetCurrentTimestamp(); \ - GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ - (endUsec_##stage - startUsec_##stage)); \ +#define GE_TIMESTAMP_END(stage, stage_name) \ + do { \ + uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \ + GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ + (endUsec_##stage - startUsec_##stage)); \ } while (0); #define GE_TIMESTAMP_EVENT_END(stage, stage_name) \ do { \ - uint64_t endUsec_##stage = ge::GetCurrentTimestamp(); \ + uint64_t endUsec_##stage = ge::GetCurrentTimestap(); \ GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second.", (stage_name), \ (endUsec_##stage - startUsec_##stage)); \ } while (0); #define GE_TIMESTAMP_CALLNUM_START(stage) \ - uint64_t startUsec_##stage = ge::GetCurrentTimestamp(); \ + uint64_t startUsec_##stage = ge::GetCurrentTimestap(); \ uint64_t call_num_of##stage = 0; \ uint64_t time_of##stage = 0 -#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestamp()) +#define GE_TIMESTAMP_RESTART(stage) (startUsec_##stage = ge::GetCurrentTimestap()) #define GE_TIMESTAMP_ADD(stage) \ - time_of##stage += ge::GetCurrentTimestamp() - startUsec_##stage; \ + time_of##stage += ge::GetCurrentTimestap() - startUsec_##stage; \ call_num_of##stage++ #define GE_TIMESTAMP_CALLNUM_END(stage, stage_name) \ GELOGI("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \ - call_num_of##stage) + call_num_of##stage) #define GE_TIMESTAMP_CALLNUM_EVENT_END(stage, stage_name) \ GEEVENT("[GEPERFTRACE] The time cost of %s is [%lu] micro second, call num is %lu", (stage_name), time_of##stage, \ @@ -65,11 +66,11 @@ } \ } while (0) -#define RUN_WITH_PERF_TIMESTAMP_NAME(var_name, prefix, func, ...) \ +#define RUN_WITH_PERF_TIMESTAMP_NAME(var_name, prefix, func, ...) \ do { \ GE_TIMESTAMP_START(var_name); \ auto ret_inner_macro = func(__VA_ARGS__); \ - GE_TIMESTAMP_EVENT_END(var_name, #prefix "::" #func) \ + GE_TIMESTAMP_EVENT_END(var_name, #prefix "::" #func) \ if (ret_inner_macro != ge::SUCCESS) { \ GELOGE(ret_inner_macro, "Failed to process " #prefix "_" #func); \ return ret_inner_macro; \ diff --git a/ge/graph/common/local_context.cc b/ge/graph/common/local_context.cc index d3e66861..43d3bc7c 100644 --- a/ge/graph/common/local_context.cc +++ b/ge/graph/common/local_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,9 +25,7 @@ namespace { thread_local OmgContext *omg_context = nullptr; } -void SetLocalOmgContext(OmgContext &context) { - omg_context = &context; -} +void SetLocalOmgContext(OmgContext &context) { omg_context = &context; } OmgContext &GetLocalOmgContext() { if (omg_context != nullptr) { @@ -37,4 +35,4 @@ OmgContext &GetLocalOmgContext() { return domi::GetContext(); } } -} +} // namespace ge diff --git a/ge/graph/common/local_context.h b/ge/graph/common/local_context.h index 83367766..1cdd2ca1 100644 --- a/ge/graph/common/local_context.h +++ b/ge/graph/common/local_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/common/transop_util.cc b/ge/graph/common/transop_util.cc index 684ef3dc..eb80fb69 100644 --- a/ge/graph/common/transop_util.cc +++ b/ge/graph/common/transop_util.cc @@ -28,9 +28,8 @@ std::map precision_loss_transfer_map = {{ge::DT_FLOA namespace ge { TransOpUtil::TransOpUtil() { - transop_index_map_ = {{TRANSDATA, 0}, {TRANSPOSE, 0}, {TRANSPOSED, 0}, - {RESHAPE, 0}, {REFORMAT, 0}, {CAST, 0}, - {SQUEEZE, 0}, {EXPANDDIMS, 0}}; + transop_index_map_ = {{TRANSDATA, 0}, {TRANSPOSE, 0}, {TRANSPOSED, 0}, {RESHAPE, 0}, + {REFORMAT, 0}, {CAST, 0}, {SQUEEZE, 0}, {EXPANDDIMS, 0}}; } TransOpUtil::~TransOpUtil() {} @@ -75,8 +74,7 @@ bool TransOpUtil::CheckPrecisionLoss(const ge::NodePtr &src_node) { auto iter = precision_loss_transfer_map.find(src_dtype); if (iter != precision_loss_transfer_map.end() && iter->second == dst_dtype) { GELOGW("Node %s transfer data type from %s to %s ,it will cause precision loss. ignore pass.", - src_node->GetName().c_str(), - TypeUtils::DataTypeToSerialString(src_dtype).c_str(), + src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_dtype).c_str(), TypeUtils::DataTypeToSerialString(dst_dtype).c_str()); return false; } diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 052d20a0..e1322180 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -293,8 +293,8 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vectorGetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); + model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); if (ret != SUCCESS) { GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed."); return ret; diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h index efc30743..242103f8 100755 --- a/ge/graph/execute/graph_execute.h +++ b/ge/graph/execute/graph_execute.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/case_label_maker.cc b/ge/graph/label/case_label_maker.cc index ea4b2a03..88b7ee8b 100644 --- a/ge/graph/label/case_label_maker.cc +++ b/ge/graph/label/case_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "case_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/case_label_maker.h b/ge/graph/label/case_label_maker.h index 1078a906..2e3b584b 100644 --- a/ge/graph/label/case_label_maker.h +++ b/ge/graph/label/case_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/if_label_maker.cc b/ge/graph/label/if_label_maker.cc index d07f7984..62722e7c 100644 --- a/ge/graph/label/if_label_maker.cc +++ b/ge/graph/label/if_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "if_label_maker.h" #include "common/util.h" diff --git a/ge/graph/label/if_label_maker.h b/ge/graph/label/if_label_maker.h index 0807f549..9ffe8fca 100644 --- a/ge/graph/label/if_label_maker.h +++ b/ge/graph/label/if_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker.cc b/ge/graph/label/label_maker.cc index 3f643fb2..88b90199 100644 --- a/ge/graph/label/label_maker.cc +++ b/ge/graph/label/label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker.h b/ge/graph/label/label_maker.h index 847c7904..759bf5cf 100644 --- a/ge/graph/label/label_maker.h +++ b/ge/graph/label/label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/label_maker_factory.h b/ge/graph/label/label_maker_factory.h index e0a48c6b..0a87ec66 100644 --- a/ge/graph/label/label_maker_factory.h +++ b/ge/graph/label/label_maker_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,12 +56,13 @@ class LabelMakerFactory { LabelMakerFactory::Instance().RegisterCreator(node_type, func); } - ~Registerar() = default; + ~Registerar() {} }; private: - LabelMakerFactory() = default; - ~LabelMakerFactory() = default; + LabelMakerFactory() {} + + ~LabelMakerFactory() {} // register creator, this function will call in the constructor void RegisterCreator(const std::string &node_type, const LabelCreatorFun func) { diff --git a/ge/graph/label/partitioned_call_label_maker.cc b/ge/graph/label/partitioned_call_label_maker.cc index 0be738f0..64db223b 100644 --- a/ge/graph/label/partitioned_call_label_maker.cc +++ b/ge/graph/label/partitioned_call_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "partitioned_call_label_maker.h" #include "common/util.h" @@ -49,7 +50,7 @@ Status PartitionedCallLabelMaker::Run(uint32_t &label_index) { return FAILED; } - const std::string stream_active_name = parent_node_->GetName() + "/StreamActive"; // rtStreamActive + const std::string stream_active_name = parent_node_->GetName() + "/StreamActive"; // rtStreamActive NodePtr stream_active = AddStreamActive(sub_graph, stream_active_name); if (stream_active == nullptr) { GELOGE(INTERNAL_ERROR, "Subgraph: %s add stream active node failed.", sub_graph->GetName().c_str()); @@ -70,4 +71,3 @@ Status PartitionedCallLabelMaker::Run(uint32_t &label_index) { REGISTER_LABEL_MAKER(PARTITIONEDCALL, PartitionedCallLabelMaker); REGISTER_LABEL_MAKER(STATEFULPARTITIONEDCALL, PartitionedCallLabelMaker); } // namespace ge - diff --git a/ge/graph/label/partitioned_call_label_maker.h b/ge/graph/label/partitioned_call_label_maker.h index b89cb94c..1c0f0890 100644 --- a/ge/graph/label/partitioned_call_label_maker.h +++ b/ge/graph/label/partitioned_call_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/label/while_label_maker.cc b/ge/graph/label/while_label_maker.cc index 83aad7c9..c5e0abb7 100644 --- a/ge/graph/label/while_label_maker.cc +++ b/ge/graph/label/while_label_maker.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "while_label_maker.h" #include "common/util.h" @@ -116,8 +117,8 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) { // link Data input. const auto &all_in_data = cond_out_node->GetAllInDataAnchors(); if (all_in_data.size() != kCondOutputNum) { - GELOGE(FAILED, "Node: %s Cond sbugraph output size:%zu should equal size:%u.", - switch_node->GetName().c_str(), all_in_data.size(), kCondOutputNum); + GELOGE(FAILED, "Node: %s Cond sbugraph output size:%zu should equal size:%u.", switch_node->GetName().c_str(), + all_in_data.size(), kCondOutputNum); return FAILED; } diff --git a/ge/graph/label/while_label_maker.h b/ge/graph/label/while_label_maker.h index 0eb0deee..42e6a490 100644 --- a/ge/graph/label/while_label_maker.h +++ b/ge/graph/label/while_label_maker.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index cffd07e5..c173d67a 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -141,7 +141,7 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string } return ret; } - return SUCCESS; + return SUCCESS; } Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, @@ -224,14 +224,13 @@ Status GraphLoader::CommandHandle(const Command &command) { return SUCCESS; } -Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, - size_t memsize, void *weight_ptr, size_t weightsize) { +Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t memsize, + void *weight_ptr, size_t weightsize) { GELOGI("Load model begin, model_id:%u.", model_id); // For ACL, Open Device from App. auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->LoadModelOffline( - model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); + Status ret = model_manager->LoadModelOffline(model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); if (ret != SUCCESS) { GELOGE(ret, "Load model failed, model_id:%u.", model_id); return ret; @@ -310,8 +309,8 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) { return RT_FAILED; } // Add small page memory size - free = static_cast(free_mem + VarManager::Instance(GetContext().SessionId())->GetUseMaxMemorySize() - - total_mem); + free = + static_cast(free_mem + VarManager::Instance(GetContext().SessionId())->GetUseMaxMemorySize() - total_mem); GELOGI("GetMemoryInfo free[%zu], total[%zu], return free[%ld]", free_mem, total_mem, free); return SUCCESS; } diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h index b581f2fa..c887c06b 100755 --- a/ge/graph/load/graph_loader.h +++ b/ge/graph/load/graph_loader.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/aipp_utils.cc b/ge/graph/load/new_model_manager/aipp_utils.cc index e0e60d2b..0a348109 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.cc +++ b/ge/graph/load/new_model_manager/aipp_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,11 +29,11 @@ namespace ge { #define AIPP_CONVERT_TO_AIPP_INFO(KEY) aipp_info.KEY = aipp_params->KEY() -#define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \ - do { \ - if (aipp_params->KEY##_size() > 0) { \ - aipp_info.KEY = aipp_params->KEY(INDEX); \ - } \ +#define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \ + do { \ + if (aipp_params->KEY##_size() > 0) { \ + aipp_info.KEY = aipp_params->KEY(INDEX); \ + } \ } while (0) Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) { diff --git a/ge/graph/load/new_model_manager/aipp_utils.h b/ge/graph/load/new_model_manager/aipp_utils.h index 78107f3e..2534b9fb 100755 --- a/ge/graph/load/new_model_manager/aipp_utils.h +++ b/ge/graph/load/new_model_manager/aipp_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc b/ge/graph/load/new_model_manager/cpu_queue_schedule.cc index 7f406985..01e1cfa8 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/cpu_queue_schedule.h b/ge/graph/load/new_model_manager/cpu_queue_schedule.h index 8999e975..cea00613 100644 --- a/ge/graph/load/new_model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/new_model_manager/cpu_queue_schedule.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_CPU_QUEUE_SCHEDULE_H_ @@ -27,24 +28,24 @@ namespace ge { // For AICPU task "modelDequeue" / "modelEnqueue" struct MbufQueueInfo { - uint32_t queue_id; // Op queue id - uintptr_t in_mbuf; // addr for input mbuf + uint32_t queue_id; // Op queue id + uintptr_t in_mbuf; // addr for input mbuf }; // For AICPU task "modelPrepareInput" struct PrepareInputInfo { - uintptr_t in_mbuf; // input mbuf from dequeue - uint32_t mbuf_offset; // offset of mbuf(current is 0) - uint32_t data_size; // input Tensor size - uintptr_t data_addr; // input Tensor addr + uintptr_t in_mbuf; // input mbuf from dequeue + uint32_t mbuf_offset; // offset of mbuf(current is 0) + uint32_t data_size; // input Tensor size + uintptr_t data_addr; // input Tensor addr }; // For AICPU task "modelPrepareOutput" struct PrepareOutputInfo { - uint32_t data_size; // output Tensor size - uintptr_t data_addr; // output Tensor addr - uintptr_t in_mbuf; // input mbuf, for fill output mbuf header - uintptr_t out_mbuf; // output mbuf addr + uint32_t data_size; // output Tensor size + uintptr_t data_addr; // output Tensor addr + uintptr_t in_mbuf; // input mbuf, for fill output mbuf header + uintptr_t out_mbuf; // output mbuf addr }; // For AICPU task "modelZeroCopy" @@ -96,7 +97,8 @@ class CpuTaskZeroCopy : public CpuTaskInfo { Status Init(std::vector &mbuf_list, std::map &outside_addrs); Status Distribute() override; -private: + + private: void *src_addr_ = nullptr; void *dst_addr_ = nullptr; }; diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index cbd27c7d..c6283d92 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,44 +83,36 @@ static uint64_t GetNowTime() { return ret; } - -static void ReplaceStringElem(std::string &str) { - for_each(str.begin(), str.end(), [](char &ch) { - if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) { - ch = '_'; - } - }); -} } // namespace static int32_t GetIrDataType(ge::DataType data_type) { static const std::map data_type_map = { - {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, - {ge::DT_FLOAT, ge::proto::DT_FLOAT}, - {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, - {ge::DT_INT8, ge::proto::DT_INT8}, - {ge::DT_UINT8, ge::proto::DT_UINT8}, - {ge::DT_INT16, ge::proto::DT_INT16}, - {ge::DT_UINT16, ge::proto::DT_UINT16}, - {ge::DT_INT32, ge::proto::DT_INT32}, - {ge::DT_INT64, ge::proto::DT_INT64}, - {ge::DT_UINT32, ge::proto::DT_UINT32}, - {ge::DT_UINT64, ge::proto::DT_UINT64}, - {ge::DT_BOOL, ge::proto::DT_BOOL}, - {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, - {ge::DT_DUAL, ge::proto::DT_DUAL}, - {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, - {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, - {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, - {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, - {ge::DT_QINT8, ge::proto::DT_QINT8}, - {ge::DT_QINT16, ge::proto::DT_QINT16}, - {ge::DT_QINT32, ge::proto::DT_QINT32}, - {ge::DT_QUINT8, ge::proto::DT_QUINT8}, - {ge::DT_QUINT16, ge::proto::DT_QUINT16}, - {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, - {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, - {ge::DT_STRING, ge::proto::DT_STRING}, + {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED}, + {ge::DT_FLOAT, ge::proto::DT_FLOAT}, + {ge::DT_FLOAT16, ge::proto::DT_FLOAT16}, + {ge::DT_INT8, ge::proto::DT_INT8}, + {ge::DT_UINT8, ge::proto::DT_UINT8}, + {ge::DT_INT16, ge::proto::DT_INT16}, + {ge::DT_UINT16, ge::proto::DT_UINT16}, + {ge::DT_INT32, ge::proto::DT_INT32}, + {ge::DT_INT64, ge::proto::DT_INT64}, + {ge::DT_UINT32, ge::proto::DT_UINT32}, + {ge::DT_UINT64, ge::proto::DT_UINT64}, + {ge::DT_BOOL, ge::proto::DT_BOOL}, + {ge::DT_DOUBLE, ge::proto::DT_DOUBLE}, + {ge::DT_DUAL, ge::proto::DT_DUAL}, + {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8}, + {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8}, + {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64}, + {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128}, + {ge::DT_QINT8, ge::proto::DT_QINT8}, + {ge::DT_QINT16, ge::proto::DT_QINT16}, + {ge::DT_QINT32, ge::proto::DT_QINT32}, + {ge::DT_QUINT8, ge::proto::DT_QUINT8}, + {ge::DT_QUINT16, ge::proto::DT_QUINT16}, + {ge::DT_RESOURCE, ge::proto::DT_RESOURCE}, + {ge::DT_STRING_REF, ge::proto::DT_STRING_REF}, + {ge::DT_STRING, ge::proto::DT_STRING}, }; auto iter = data_type_map.find(data_type); @@ -177,7 +169,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr &node) { } input_map_.insert( - {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); + {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); } } GELOGI("Save data message successfully"); @@ -204,17 +196,14 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr op_desc_info.op_type = op->GetType(); op_desc_info.task_id = task_id; op_desc_info.stream_id = stream_id; - for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); - if (input_tensor_desc == nullptr) { - continue; - } - op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat()); - op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); - op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType()); + for (size_t i = 0; i < op->GetInputsSize(); ++i) { + GeTensorDesc input_desc = op->GetInputDesc(i); + op_desc_info.input_format.emplace_back(input_desc.GetFormat()); + op_desc_info.input_shape.emplace_back(input_desc.GetShape().GetDims()); + op_desc_info.input_data_type.emplace_back(input_desc.GetDataType()); int64_t input_size = 0; - - if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) { + auto tensor_descs = op->GetAllInputsDesc(); + if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(i), input_size) != SUCCESS) { GELOGW("Get input size failed"); return; } @@ -222,15 +211,13 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr op_desc_info.input_size.emplace_back(input_size); } for (size_t j = 0; j < op->GetOutputsSize(); ++j) { - GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); - if (output_tensor_desc == nullptr) { - continue; - } - op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat()); - op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); - op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType()); + GeTensorDesc output_desc = op->GetOutputDesc(j); + op_desc_info.output_format.emplace_back(output_desc.GetFormat()); + op_desc_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); + op_desc_info.output_data_type.emplace_back(output_desc.GetDataType()); int64_t output_size = 0; - if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) { + auto tensor_descs = op->GetAllOutputsDesc(); + if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(j), output_size) != SUCCESS) { GELOGW("Get input size failed"); return; } @@ -684,32 +671,12 @@ Status DataDumper::LoadDumpInfo() { op_mapping_info.set_flag(kAicpuLoadFlag); op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); - GELOGI("Dump step is %s and dump path is %s dump model is %s in load dump info", - dump_properties_.GetDumpStep().c_str(), dump_path.c_str(), dump_list_key.c_str()); - auto ret = BuildTaskInfo(op_mapping_info); - if (ret != SUCCESS) { - GELOGE(ret, "Build task info failed"); - return ret; - } - - SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info); - - SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); - - if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { - auto ret = ExecuteLoadDumpInfo(op_mapping_info); - if (ret != SUCCESS) { - GELOGE(ret, "Execute load dump info failed"); - return ret; - } - } - return SUCCESS; -} + GELOGI("Dump step is %s and dump path is %s in load dump info", dump_properties_.GetDumpStep().c_str(), + dump_path.c_str()); -Status DataDumper::BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info) { for (const auto &op_iter : op_list_) { auto op_desc = op_iter.op; - GELOGD("Op %s in model begin to add task in op_mapping_info", op_desc->GetName().c_str()); + GELOGD("Op %s in model %s begin to add task in op_mapping_info", op_desc->GetName().c_str(), dump_list_key.c_str()); aicpu::dump::Task task; task.set_end_graph(false); task.set_task_id(op_iter.task_id); @@ -733,7 +700,7 @@ Status DataDumper::BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info) { op_mapping_info.mutable_task()->Add(std::move(task)); continue; } - if (dump_properties_.GetDumpMode() == kDumpAll || is_op_debug_) { + if (dump_properties_.GetDumpMode() == kDumpAll) { auto ret = DumpOutput(op_iter, task); if (ret != SUCCESS) { GELOGE(ret, "Dump output failed when in dumping all"); @@ -750,6 +717,18 @@ Status DataDumper::BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info) { continue; } } + + SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info); + + SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info); + + if (!op_list_.empty() || is_op_debug_ || is_end_graph_) { + auto ret = ExecuteLoadDumpInfo(op_mapping_info); + if (ret != SUCCESS) { + GELOGE(ret, "Execute load dump info failed"); + return ret; + } + } return SUCCESS; } @@ -768,7 +747,7 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, is_end_graph_ = true; if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) { GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u", - op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_); + op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_); return; } GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_); @@ -923,14 +902,8 @@ Status DataDumper::DumpExceptionInfo(const std::vector exceptio dump_data.mutable_output()->Add(std::move(output)); } uint64_t now_time = GetNowTime(); - std::string op_name = op_desc_info.op_name; - std::string op_type = op_desc_info.op_type; - ReplaceStringElem(op_name); - ReplaceStringElem(op_type); - string dump_file_path = - "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time); - GELOGI("The exception dump file path is %s", dump_file_path.c_str()); - + string dump_file_path = "./" + op_desc_info.op_type + "." + op_desc_info.op_name + "." + + to_string(op_desc_info.task_id) + "." + to_string(now_time); uint64_t proto_size = dump_data.ByteSizeLong(); unique_ptr proto_msg(new (std::nothrow) char[proto_size]); bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size); diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h index 2acb963b..30218416 100755 --- a/ge/graph/load/new_model_manager/data_dumper.h +++ b/ge/graph/load/new_model_manager/data_dumper.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,10 +36,10 @@ namespace ge { class DataDumper { public: - explicit DataDumper(const RuntimeParam &rsh) + DataDumper() : model_name_(), model_id_(0), - runtime_param_(rsh), + runtime_param_(), dev_mem_load_(nullptr), dev_mem_unload_(nullptr), op_list_(), @@ -58,6 +58,8 @@ class DataDumper { void SetModelId(uint32_t model_id) { model_id_ = model_id; } + void SetMemory(const RuntimeParam &runtime_param) { runtime_param_ = runtime_param; } + void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } void SetComputeGraph(const ComputeGraphPtr &compute_graph) { compute_graph_ = compute_graph; }; @@ -103,7 +105,7 @@ class DataDumper { std::string om_name_; uint32_t model_id_; - const RuntimeParam &runtime_param_; + RuntimeParam runtime_param_; void *dev_mem_load_; void *dev_mem_unload_; @@ -125,7 +127,6 @@ class DataDumper { std::map ref_info_; void *l1_fusion_addr_ = nullptr; - uint32_t op_debug_task_id_ = 0; uint32_t op_debug_stream_id_ = 0; void *op_debug_addr_ = nullptr; @@ -133,28 +134,22 @@ class DataDumper { DumpProperties dump_properties_; - // Build task info of op mapping info - Status BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info); Status DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output, size_t i, const std::string &node_name_index); Status DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task); Status DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i, - const std::string &node_name_index); + const std::string &node_name_index); Status ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); void SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id, aicpu::dump::OpMappingInfo &op_mapping_info); void SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, aicpu::dump::OpMappingInfo &op_mapping_info); Status ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info); - Status GenerateInput(aicpu::dump::Input &input, - const OpDesc::Vistor &tensor_descs, - const uintptr_t &addr, - size_t index); - Status GenerateOutput(aicpu::dump::Output &output, - const OpDesc::Vistor &tensor_descs, - const uintptr_t &addr, - size_t index); + Status GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor &tensor_descs, + const uintptr_t &addr, size_t index); + Status GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor &tensor_descs, + const uintptr_t &addr, size_t index); void GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task); }; struct DataDumper::InnerDumpInfo { diff --git a/ge/graph/load/new_model_manager/data_inputer.cc b/ge/graph/load/new_model_manager/data_inputer.cc index 5efc710e..594a7bcd 100755 --- a/ge/graph/load/new_model_manager/data_inputer.cc +++ b/ge/graph/load/new_model_manager/data_inputer.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/data_inputer.h b/ge/graph/load/new_model_manager/data_inputer.h index 14ebcea5..cc511c36 100755 --- a/ge/graph/load/new_model_manager/data_inputer.h +++ b/ge/graph/load/new_model_manager/data_inputer.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 6d255cd1..50867782 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -42,8 +42,8 @@ #include "graph/ge_context.h" #include "graph/graph.h" #include "graph/load/new_model_manager/cpu_queue_schedule.h" -#include "graph/load/new_model_manager/model_manager.h" #include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/load/new_model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" @@ -107,7 +107,6 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptrGetWeight(); std::size_t weights_size = weights.GetSize(); GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); @@ -285,7 +281,6 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p } mem_base_ = static_cast(dev_ptr); - p2p_mem_base_ = static_cast(dev_ptr); weights_mem_base_ = static_cast(dev_ptr); is_inner_mem_base_ = false; is_inner_weight_base_ = false; @@ -298,23 +293,13 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p } GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, mem_base_, data_size); + weights_mem_base_ = mem_base_; is_inner_mem_base_ = true; is_inner_weight_base_ = true; } - if (p2p_data_size != 0) { - p2p_mem_base_ = MallocP2PMem(p2p_data_size); - if (p2p_mem_base_ == nullptr) { - GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); - return GE_EXEC_ALLOC_P2P_MEM_FAILED; - } - GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - p2p_mem_base_, p2p_data_size); - is_inner_p2p_mem_base_ = true; - } - if (weights_size != 0) { weights_mem_base_ = static_cast(weight_ptr); is_inner_weight_base_ = false; @@ -335,7 +320,6 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; - runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; return SUCCESS; } @@ -359,7 +343,6 @@ Status DavinciModel::InitVariableMem() { void DavinciModel::InitRuntimeParams() { int64_t value = 0; bool ret; - MemInfo p2p_mem_info; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value); runtime_param_.mem_size = ret ? (uint64_t)value : 0; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value); @@ -383,17 +366,14 @@ void DavinciModel::InitRuntimeParams() { ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_VAR_SIZE, value); runtime_param_.var_size = ret ? (uint64_t)value : 0; session_id_ = runtime_param_.session_id; - ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value); - p2p_mem_info.memory_size = ret ? (uint64_t)value : 0; - runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info); GELOGI( - "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, " - "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, " - "memory_size:%lu, weight_size:%lu, var_size:%lu", - runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num, - runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base, - runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); + "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, " + "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, " + "memory_size:%lu, weight_size:%lu, var_size:%lu", + runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num, + runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base, + runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); } void DavinciModel::CheckHasHcomOp() { @@ -538,7 +518,6 @@ void DavinciModel::OpDebugUnRegister() { debug_reg_mutex_.unlock(); rtError_t rt_ret = RT_ERROR_NONE; if (rt_model_handle_ != nullptr) { - GELOGD("start call debug_unregister."); rt_ret = rtDebugUnRegister(rt_model_handle_); if (rt_ret != RT_ERROR_NONE) { GELOGW("rtDebugUnRegister failed, ret: 0x%X", rt_ret); @@ -623,6 +602,11 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size // create model_handle to load model GE_CHK_RT_RET(rtModelCreate(&rt_model_handle_, 0)); GE_CHK_RT_RET(rtModelGetId(rt_model_handle_, &runtime_model_id_)); + // malloc 2M for dump l1fusion op + GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); + + // send l1fusion dump addr to rts + GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); // inference will use default graph_id 0; runtime_param_.graph_id = compute_graph->GetGraphID(); @@ -672,17 +656,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size auto ret = DoTaskSink(); GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); - auto all_dump_model = GetDumpProperties().GetAllDumpModel(); - bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end(); - bool findByModelName = all_dump_model.find(name_) != all_dump_model.end(); - if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) { - // malloc 2M for dump l1fusion op - GE_CHK_RT_RET(rtMalloc(&l1_fusion_addr_, kDumpL1FusionOpMByteSize, RT_MEMORY_DDR)); - - // send l1fusion dump addr to rts - GE_CHK_RT_RET(rtDumpAddrSet(rt_model_handle_, l1_fusion_addr_, kDumpL1FusionOpMByteSize, kDumpFlagOfL1Fusion)); - } - /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution, /// the aicpu opertor needs to destroy history record, and update operator memory address. /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). @@ -783,19 +756,20 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { typedef Status (DavinciModel::*OpDescCall)(const OpDescPtr &); static std::map op_desc_handle = { - {VARIABLE, &DavinciModel::InitVariable}, - {CONSTANTOP, &DavinciModel::InitConstant}, - {STREAMACTIVE, &DavinciModel::InitStreamActive}, - {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, - {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN}, - {LABELSET, &DavinciModel::InitLabelSet}, - {CASE, &DavinciModel::InitCase}, + {VARIABLE, &DavinciModel::InitVariable}, + {CONSTANTOP, &DavinciModel::InitConstant}, + {STREAMACTIVE, &DavinciModel::InitStreamActive}, + {STREAMSWITCH, &DavinciModel::InitStreamSwitch}, + {STREAMSWITCHN, &DavinciModel::InitStreamSwitchN}, + {LABELSET, &DavinciModel::InitLabelSet}, + {CASE, &DavinciModel::InitCase}, }; GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed."); map data_by_index; auto nodes = compute_graph->GetAllNodes(); + const TBEKernelStore &tbekernel_store = ge_model_->GetTBEKernelStore(); const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); for (size_t i = 0; i < nodes.size(); i++) { auto node = nodes.at(i); @@ -808,6 +782,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { op_list_[op_desc->GetId()] = op_desc; GE_TIMESTAMP_RESTART(LoadTBEKernelBinToOpDesc); + tbekernel_store.LoadTBEKernelBinToOpDesc(op_desc); aicpu_kernel_store.LoadCustAICPUKernelBinToOpDesc(op_desc); GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); @@ -923,8 +898,8 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma } uint32_t parent_index = 0; // Ignore subgraph Data Node. if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); - return InitInputBatchLabel(node); + GELOGI("Skip subgraph Data node: %s.", op_desc->GetName().c_str()); + return SUCCESS; } data_op_list_.push_back(op_desc); @@ -1035,9 +1010,9 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); GE_CHECK_NOTNULL(owner_graph); if (owner_graph->GetParentGraph() != nullptr) { - GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); + GELOGI("Skip subgraph NetOutput node: %s.", op_desc->GetName().c_str()); op_list_.erase(op_desc->GetId()); - return InitOutputBatchLabel(node); + return SUCCESS; } output_op_list_.push_back(op_desc); @@ -1083,7 +1058,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { for (size_t i = 0; i < tensor_addrs.size(); ++i) { void *real_addr = tensor_addrs.at(i); DisableZeroCopy(real_addr); - real_virtual_addrs_.insert(real_addr); + real_virtual_addrs_.emplace_back(real_addr); } GELOGI("SetOutputOutsideAddr success."); } @@ -1149,68 +1124,6 @@ Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { return SUCCESS; } -/// -/// @ingroup ge -/// @brief input zero copy node Initialize. -/// @param [in] NodePtr: Data Op. -/// @return Status -/// -Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { - string batch_label; - if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. - } - - const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); - GE_CHECK_NOTNULL(out_data_anchor); - - for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { - const auto &node = peer_in_data_anchor->GetOwnerNode(); - const auto &op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; - GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), - op_desc->GetId(), batch_label.c_str()); - } - } - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief output zero copy node Initialize for Case. -/// @param [in] NodePtr: netoutput Op. -/// @return Status -/// -Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { - string batch_label; - if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { - return SUCCESS; // Not Multi-batch. - } - - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_data_anchor == nullptr) { - continue; - } - - const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); - const auto &op_desc = peer_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { - zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; - GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", - op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); - } - } - - return SUCCESS; -} - /// @ingroup ge /// @brief LabelSet Op Initialize. /// @param [in] op_desc: LabelSet Op descriptor. @@ -1906,7 +1819,7 @@ Status DavinciModel::GetOutputDescInfo(vector &output_desc, // neweast plan, the index will add to name during generate model. bool contains_colon = out_node_name_[index].find(":") != std::string::npos; output_name = - contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); + contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); } else { output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]); @@ -1978,7 +1891,7 @@ Status DavinciModel::SyncVarData() { for (auto op_desc : variable_op_list_) { ret = - VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, op_desc->GetName().c_str()); } @@ -2143,7 +2056,7 @@ Status DavinciModel::SinkModelProfile() { memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); memory_info.total_size = - memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; + memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; reporter_data.data = (unsigned char *)&memory_info; reporter_data.dataLen = sizeof(struct memoryInfo); GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", @@ -2177,9 +2090,9 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { // report model data tag name std::string tag_name; tag_name.append("model_time_info_") - .append(std::to_string(this->Id())) - .append("_") - .append(std::to_string(current_data.index)); + .append(std::to_string(this->Id())) + .append("_") + .append(std::to_string(current_data.index)); GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); @@ -2445,7 +2358,7 @@ Status DavinciModel::ReturnNoOutput(uint32_t data_id) { GELOGI("ReturnNoOutput model id:%u", model_id_); for (auto op_desc : variable_op_list_) { Status ret = VarManager::Instance(session_id_) - ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, op_desc->GetName().c_str()); } @@ -2497,9 +2410,9 @@ void *DavinciModel::Run(DavinciModel *model) { GE_TIMESTAMP_START(Model_SyncVarData); ret = model->SyncVarData(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "Copy input data to model failed."); // [No need to check value] + ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "Copy input data to model failed."); // [No need to check value] GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(Model_SyncVarData, "Model Run SyncVarData")); GELOGI("Copy input data, model id:%u", model_id); @@ -2507,9 +2420,9 @@ void *DavinciModel::Run(DavinciModel *model) { model->SetProfileTime(MODEL_PRE_PROC_START)); ret = model->CopyInputData(current_data, false); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "Copy input data to model failed."); // [No need to check value] + ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "Copy input data to model failed."); // [No need to check value] GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); if (ProfilingManager::Instance().ProfilingOpTraceOn()) { @@ -2556,11 +2469,11 @@ void *DavinciModel::Run(DavinciModel *model) { seq_end_flag = true; } GE_IF_BOOL_EXEC( - rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); - (void)model->ReturnResult(current_data.index, false, seq_end_flag, - data_wrapper->GetOutput()); // [No need to check value] - CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); - continue); + rt_ret != RT_ERROR_NONE, rslt_flg = false; GELOGI("seq_end_flg: %d", seq_end_flag); + (void)model->ReturnResult(current_data.index, false, seq_end_flag, + data_wrapper->GetOutput()); // [No need to check value] + CsaInteract::GetInstance().StoreInternalErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC); + continue); GELOGI("rtStreamSynchronize end."); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(rtStreamSynchronize, "GraphExcute::Wait for rtStreamSynchronize")); @@ -2764,17 +2677,12 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); - if (total_args_size_ == 0) { - GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); - } else { - uint32_t total_addr_size = total_io_addrs_.size() * sizeof(uint64_t); - GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, src size %u", args_, total_args_size_, - total_addr_size); + uint32_t total_addr_size = total_io_addrs_.size() * sizeof(uint64_t); + GELOGI("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, src size %u", args_, total_args_size_, + total_addr_size); - Status rt_ret = - rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), total_addr_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) - } + Status rt_ret = rtMemcpy(args_, total_args_size_, total_io_addrs_.data(), total_addr_size, RT_MEMCPY_HOST_TO_DEVICE); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy error, ret: Ox%X", rt_ret); return FAILED;) GELOGI("DavinciModel::UpdateKnownNodeArgs success"); return SUCCESS; @@ -2819,11 +2727,6 @@ Status DavinciModel::MallocKnownArgs() { } } // malloc args memory - if (total_args_size_ == 0) { - GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero."); - return SUCCESS; - } - rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); @@ -2872,15 +2775,19 @@ Status DavinciModel::DistributeTask() { auto op_index = std::max(model_task_def->task(task_index).kernel().context().op_index(), model_task_def->task(task_index).kernel_ex().op_index()); OpDescPtr op = GetOpByIndex(op_index); - GE_CHECK_NOTNULL(op); + if (op == nullptr) { + GELOGE(PARAM_INVALID, "Op index %u is null, op list size %zu.", op_index, op_list_.size()); + return PARAM_INVALID; + } SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); if (reinterpret_cast(task->GetDumpArgs()) != nullptr) { bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo(); - if (call_dump || is_op_debug_reg_) { + if (call_dump) { SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs()); } } + // get op_name by task_index if (task->GetCtx() != nullptr) { auto iter = op_name_map_.find(task_index); @@ -2956,7 +2863,7 @@ void DavinciModel::SetCopyOnlyOutput() { /// @return None. /// void DavinciModel::DisableZeroCopy(const void *addr) { - if (real_virtual_addrs_.find(addr) == real_virtual_addrs_.end()) { + if (find(real_virtual_addrs_.begin(), real_virtual_addrs_.end(), addr) == real_virtual_addrs_.end()) { return; } @@ -3055,9 +2962,9 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 if (input_size > op_size) { GELOGW( - "Input size [%u] is bigger than om size need [%u], " - "MAY cause inference result ERROR, please check model input", - input_size, op_size); + "Input size [%u] is bigger than om size need [%u], " + "MAY cause inference result ERROR, please check model input", + input_size, op_size); } bool is_dynamic_aipp = false; for (const auto &op_desc : data_op_list_) { @@ -3170,8 +3077,8 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map & for (size_t count = 0; count < data.second.GetDataCount(); ++count) { int64_t size = data.second.GetDataInfo().at(count).first; void *addr = data.second.GetDataInfo().at(count).second; - void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data) + - data.second.GetRelativeOffset().at(count)); + void *buffer_addr = + reinterpret_cast(reinterpret_cast(buffer.data) + data.second.GetRelativeOffset().at(count)); GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(), data.first, addr, size, buffer_addr); // For input data, just copy for rts task. @@ -3245,10 +3152,10 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { return PARAM_INVALID;); GeTensor *tensor = const_cast(v_weights[0].get()); - GE_IF_BOOL_EXEC(static_cast(v_output_size[0]) < tensor->GetData().size(), - GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], - tensor->GetData().size()); - return PARAM_INVALID;); + GE_IF_BOOL_EXEC( + static_cast(v_output_size[0]) < tensor->GetData().size(), + GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], tensor->GetData().size()); + return PARAM_INVALID;); GE_IF_BOOL_EXEC(tensor->GetData().size() == 0, GELOGW("const op:%s has no weight data.", op_desc->GetName().c_str()); return SUCCESS;); @@ -3270,7 +3177,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { uint64_t offset = static_cast(elem_num * kBytes); uint64_t hbm_raw_data_base_addr = - reinterpret_cast(reinterpret_cast(v_output_addr[0])) + offset; + reinterpret_cast(reinterpret_cast(v_output_addr[0])) + offset; for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); } @@ -3290,8 +3197,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { /// @return Status /// Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { - auto kernel = ge_model_->GetTBEKernelStore().FindKernel(op_desc->GetName()); - auto tbe_kernel = (kernel != nullptr) ? kernel : op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + TBEKernelPtr tbe_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); if (tbe_kernel == nullptr) { GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc->GetName().c_str()); return INTERNAL_ERROR; @@ -3316,8 +3222,6 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { binary.magic = RT_DEV_BINARY_MAGIC_ELF; - } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; } else { GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); return PARAM_INVALID; @@ -3644,26 +3548,13 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { return mem_base; } -uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) { - uint8_t *p2p_mem_base = nullptr; - const string purpose("p2p memory, used for some op related to hcom"); - if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { - string p2p_memory_key = std::to_string(0) + "_p"; - p2p_mem_base = - MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId()); - } else { - p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId()); - } - return p2p_mem_base; -} - uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { uint8_t *weights_mem_base = nullptr; const string purpose("weights memory in inference network."); if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { string weight_memory_key = std::to_string(0) + "_w"; weights_mem_base = - MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); + MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); } else { weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId()); } @@ -3686,22 +3577,6 @@ void DavinciModel::FreeFeatureMapMem() { } } -void DavinciModel::FreeP2PMem() { - if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { - std::string p2p_memory_key = std::to_string(0) + "_p"; - if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) { - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()), - "failed to free p2p memory"); - } - p2p_mem_base_ = nullptr; - } else { - GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_, - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()), - "failed to free p2p memory"); - p2p_mem_base_ = nullptr); - } -} - void DavinciModel::FreeWeightsMem() { if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { string memory_key = std::to_string(0) + "_w"; @@ -3739,7 +3614,7 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) } GE_CHK_STATUS_RET_NOLOG( - TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum)); + TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum)); GELOGI("TransAllVarData success."); return SUCCESS; @@ -3749,6 +3624,7 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { GELOGI("set data dumper args, name: %s, id: %u.", name_.c_str(), model_id_); data_dumper_.SetModelName(name_); data_dumper_.SetModelId(model_id_); + data_dumper_.SetMemory(runtime_param_); data_dumper_.SetOmName(om_name_); data_dumper_.SetComputeGraph(compute_graph); data_dumper_.SetRefInfo(saved_task_addrs_); @@ -3819,14 +3695,11 @@ Status DavinciModel::GetComputeGraphInfo(const ComputeGraphPtr &graph, vectorGetName(); compute_graph_info.op_type = op_desc->GetType(); - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); + for (size_t i = 0; i < op_desc->GetInputsSize(); ++i) { + GeTensorDesc input_desc = op_desc->GetInputDesc(i); + compute_graph_info.input_format.emplace_back(input_desc.GetFormat()); + compute_graph_info.input_shape.emplace_back(input_desc.GetShape().GetDims()); + compute_graph_info.input_data_type.emplace_back(input_desc.GetDataType()); } for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { @@ -3914,12 +3787,12 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vectorGetInputDescPtr(kDataIndex)), data_input_size); GELOGD( - "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " - "%s, data_type: %s, shape: %s .", - index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, - TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), - TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), - formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); + "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " + "%s, data_type: %s, shape: %s .", + index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, + TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), + TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), + formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); } } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index f41817bb..438fe639 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -189,8 +189,6 @@ class DavinciModel { // get total mem size size_t TotalMemSize() const { return runtime_param_.mem_size; } - const std::map &P2PMemInfos() const {return runtime_param_.memory_infos;} - // model name string Name() const { return name_; } @@ -412,8 +410,6 @@ class DavinciModel { void DisableZeroCopy(const void *addr); - bool GetOpDugReg() const { return is_op_debug_reg_; } - /// /// @ingroup ge /// @brief Save outside address of Data or NetOutput used info for ZeroCopy. @@ -452,9 +448,7 @@ class DavinciModel { DavinciModel(const DavinciModel &model) = delete; - const map> &GetHcclFolowStream() { - return main_follow_stream_mapping_; - } + const map> &GetHcclFolowStream() { return main_follow_stream_mapping_; } void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); void InitRuntimeParams(); @@ -504,6 +498,11 @@ class DavinciModel { void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } + void SetMemcpyOffsetAndAddr(map &memcpy_4g_offset_addr) { + memcpy_4g_offset_addr_.insert(memcpy_4g_offset_addr.begin(), memcpy_4g_offset_addr.end()); + } + const map &GetMemcpyOffsetAndAddr() const { return memcpy_4g_offset_addr_; } + bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); } @@ -515,10 +514,8 @@ class DavinciModel { uint8_t *var_mem_base_; // memory address of model uint8_t *mem_base_; - uint8_t *p2p_mem_base_; bool is_inner_mem_base_; bool is_inner_weight_base_; - bool is_inner_p2p_mem_base_; // input data manager DataInputer *data_inputer_; @@ -600,14 +597,10 @@ class DavinciModel { uint8_t *MallocWeightsMem(size_t weights_size); - uint8_t* MallocP2PMem(size_t p2p_data_size); - void FreeFeatureMapMem(); void FreeWeightsMem(); - void FreeP2PMem(); - void ReleaseTask(); void UnbindTaskSinkStream(); @@ -672,22 +665,6 @@ class DavinciModel { /// /// @ingroup ge - /// @brief input zero copy node Initialize for Case. - /// @param [in] NodePtr: Data Op. - /// @return Status - /// - Status InitInputBatchLabel(const NodePtr &node); - - /// - /// @ingroup ge - /// @brief output zero copy node Initialize for Case. - /// @param [in] NodePtr: netoutput Op. - /// @return Status - /// - Status InitOutputBatchLabel(const NodePtr &node); - - /// - /// @ingroup ge /// @brief Constant Op Init. /// @return Status /// @@ -868,7 +845,7 @@ class DavinciModel { std::map new_input_outside_addrs_; std::map new_output_outside_addrs_; - std::set real_virtual_addrs_; + std::vector real_virtual_addrs_; // output op: save cce op actual needed memory size vector output_memory_size_list_; @@ -993,6 +970,8 @@ class DavinciModel { void *op_debug_addr_ = nullptr; void *p2p_debug_addr_ = nullptr; bool is_new_model_desc_{false}; + + std::map memcpy_4g_offset_addr_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/new_model_manager/davinci_model_parser.cc index 34180d08..b744f907 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.h b/ge/graph/load/new_model_manager/davinci_model_parser.h index 83eb4cc3..8907c97d 100755 --- a/ge/graph/load/new_model_manager/davinci_model_parser.h +++ b/ge/graph/load/new_model_manager/davinci_model_parser.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index ac906c67..4a596738 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ std::mutex ModelManager::exeception_infos_mutex_; std::shared_ptr ModelManager::GetInstance() { static const std::shared_ptr instance_ptr = - shared_ptr(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); + shared_ptr(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); return instance_ptr; } @@ -107,7 +107,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u } rt_ret = - rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); + rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); @@ -557,10 +557,10 @@ Status ModelManager::Stop(uint32_t model_id) { /// Status ModelManager::HandleCommand(const Command &command) { static const std::map> cmds = { - {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, - {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, - {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, - {kCmdTypeProfStop, HandleProfStopCommand}}; + {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, + {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, + {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, + {kCmdTypeProfStop, HandleProfStopCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { @@ -1096,51 +1096,19 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } -Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { - GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); +Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) { std::lock_guard lock(cust_aicpu_mutex_); - // get current context - rtContext_t rt_cur_ctx = nullptr; - auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); - if (rt_err != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_err)); - return RT_FAILED; - } - // use current context as resource key instead - uintptr_t resource_id = reinterpret_cast(rt_cur_ctx); - auto it = cust_aicpu_so_.find(resource_id); + auto it = cust_aicpu_so_.find(so_name); if (it == cust_aicpu_so_.end()) { GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", op_desc->GetName().c_str(), so_name.c_str()); - std::set so_name_set; - so_name_set.insert(so_name); - cust_aicpu_so_[resource_id] = so_name_set; - GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id); - return SUCCESS; - } - auto it_so_name = it->second.find(so_name); - if (it_so_name == it->second.end()) { - GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", - op_desc->GetName().c_str(), so_name.c_str()); - it->second.insert(so_name); - GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id); - } - return SUCCESS; -} - -Status ModelManager::ClearAICPUSo(void *ctx) { - auto ctx_id = reinterpret_cast(ctx); - GELOGI("ClearAICPUSo in. resource id = 0x%lx", static_cast(ctx_id)); - std::lock_guard lock(cust_aicpu_mutex_); - auto it = cust_aicpu_so_.find(ctx_id); - if (it == cust_aicpu_so_.end()) { - return SUCCESS; + (void)cust_aicpu_so_.insert(so_name); + GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); } - (void)cust_aicpu_so_.erase(it); return SUCCESS; } -Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { +Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) { CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); if (aicpu_kernel == nullptr) { GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); @@ -1157,8 +1125,8 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so GE_CHK_RT(rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM)); GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM)); - GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), - so_name.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), so_name.size(), + RT_MEMCPY_HOST_TO_DEVICE)); CustAicpuSoBuf cust_aicpu_so_buf; cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast(reinterpret_cast(d_aicpu_data)); @@ -1172,7 +1140,6 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); - GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size()); status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 9e8f61db..3dce3807 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -270,9 +270,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); - ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name); - ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name); - ge::Status ClearAICPUSo(void *ctx); + ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name); + + ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); @@ -335,12 +335,12 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::mutex map_mutex_; std::mutex sess_ids_mutex_; std::mutex session_id_create_mutex_; - static::std::mutex exeception_infos_mutex_; + static ::std::mutex exeception_infos_mutex_; uint64_t session_id_bias_; std::set sess_ids_; std::vector exception_infos_; std::mutex cust_aicpu_mutex_; - std::map> cust_aicpu_so_; + std::set cust_aicpu_so_; static DumpProperties dump_properties_; }; diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index 0884ba8b..2bb111f3 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,8 +46,10 @@ namespace ge { vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { vector v_input_size; GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_size); - const size_t inputs_size = op_desc->GetAllInputsSize(); + const string op_type = op_desc->GetType(); + + const vector v_is_input_const = op_desc->GetIsInputConst(); for (size_t i = 0; i < inputs_size; ++i) { const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(i); if (tensor_desc == nullptr) { @@ -56,12 +58,23 @@ vector ModelUtils::GetInputSize(ConstOpDescPtr op_desc) { } int64_t tensor_size = 0; + if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { + // TBE: add weights size to input + GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); + if (tensor_size) { + v_input_size.push_back(tensor_size); + } + GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); + continue; + } + GE_IF_BOOL_EXEC( - TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, - GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); - continue); + TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, + GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); + continue); + + GELOGI("[IMAS]GetInputSize op: %s, index: %lu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); - GELOGI("[IMAS]GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_input_size.push_back(tensor_size); } @@ -92,11 +105,10 @@ vector ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) { int64_t tensor_size = 0; GE_IF_BOOL_EXEC( - TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, - GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); - continue); + TensorUtils::GetSize(*tensor_desc, tensor_size) != GRAPH_SUCCESS, + GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i); + continue); - GELOGI("[IMAS]GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size); v_output_size.push_back(tensor_size); } @@ -302,7 +314,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co GE_CHECK_NOTNULL_EXEC(op_desc, return v_input_data_addr); uint64_t session_id = model_param.session_id; - const size_t inputs_size = op_desc->GetInputsSize(); + const size_t inputs_size = op_desc->GetAllInputsSize(); const vector v_input_offset = op_desc->GetInputOffset(); const string op_type = op_desc->GetType(); @@ -318,8 +330,10 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co } for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast(i)); - GE_IF_BOOL_EXEC(tensor_desc == nullptr, GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); - continue;) + if (tensor_desc == nullptr) { + GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); + continue; + } if ((i < v_is_input_const.size()) && v_is_input_const[i] && (op_type != NETOUTPUT)) { // TBE: add weights address to input int64_t tensor_size = 0; @@ -337,16 +351,6 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co continue; } - int64_t mem_type; - bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); - if (tensor_has_mem_type && v_memory_type[i] != RT_MEMORY_L1) { - uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_input_offset[i]; - v_input_data_addr.push_back(p2p_mem_addr); - GELOGI("[IMAS]GetInputDataAddrs graph_%u type[P] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, - op_desc->GetName().c_str(), i, p2p_mem_addr); - continue; - } - GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); break); @@ -362,16 +366,11 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co continue); // feature maps - void *mem_addr = nullptr; - if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion + uint8_t *mem_addr = nullptr; + // fusion + if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { mem_addr = reinterpret_cast(reinterpret_cast(input_offset)); v_input_data_addr.push_back(mem_addr); - } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { - int64_t tensor_size = 0; - GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); - VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); - mem_addr = model_param.ts_mem_mall->Acquire(input_offset, static_cast(tensor_size)); - v_input_data_addr.push_back(mem_addr); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); mem_addr = model_param.mem_base + input_offset; @@ -415,33 +414,12 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); - if (tensor_desc == nullptr) { - GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); - continue; - } - int64_t mem_type; - bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); - if (tensor_has_mem_type && v_memory_type[i] != RT_MEMORY_L1) { - uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_output_offset[i]; - v_output_data_addr.push_back(p2p_mem_addr); - GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[P] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, - op_desc->GetName().c_str(), i, p2p_mem_addr); - continue; - } // feature maps - void *mem_addr = nullptr; - if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion + uint8_t *mem_addr = nullptr; + // fusion + if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { mem_addr = reinterpret_cast(reinterpret_cast(v_output_offset[i])); v_output_data_addr.push_back(mem_addr); - } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { - const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL_EXEC(tensor_desc, return {}); - int64_t tensor_size = 0; - GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); - VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); - mem_addr = model_param.ts_mem_mall->Acquire(v_output_offset[i], static_cast(tensor_size)); - v_output_data_addr.push_back(mem_addr); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); mem_addr = static_cast(model_param.mem_base + v_output_offset[i]); @@ -469,38 +447,9 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param v_workspace_bytes.size()); return v_workspace_data_addr; } - - vector workspace_reuse_flag; - bool has_workspace_reuse = ge::AttrUtils::GetListBool(op_desc, "workspace_reuse_flag", workspace_reuse_flag); vector v_memory_type; - vector workspace_memory_type; bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); - bool has_mem_type_workspace = - ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type); for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { - // Temporary solution, the aicpu workspace of multiple images cannot be shared. - if (has_workspace_reuse && i < workspace_reuse_flag.size() - && !workspace_reuse_flag[i] && !model_param.is_single_op) { - void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]); - v_workspace_data_addr.push_back(mem_addr); - GELOGI( - "[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] aicpu workspace[%zu] offset[%ld] bytes[%ld] " - "memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], mem_addr); - continue; - } else if (has_mem_type_workspace && workspace_memory_type[i] == RT_MEMORY_P2P_DDR) { - int64_t p2p_workspace_offset = v_workspace_offset[i]; - int64_t p2p_workspace_bytes = v_workspace_bytes[i]; - uint8_t *p2p_mem_addr = p2p_workspace_bytes == 0 - ? nullptr - : model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + p2p_workspace_offset; - v_workspace_data_addr.push_back(p2p_mem_addr); - GELOGI( - "[IMAS]GetWorkspaceDataAddrs graph_%u type[P] name[%s] p2p workspace[%zu] offset[%ld] bytes[%ld] " - "memaddr[%p]", - model_param.graph_id, op_desc->GetName().c_str(), i, p2p_workspace_offset, p2p_workspace_bytes, p2p_mem_addr); - continue; - } if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { v_workspace_data_addr.push_back(reinterpret_cast(reinterpret_cast(v_workspace_offset[i]))); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", diff --git a/ge/graph/load/new_model_manager/model_utils.h b/ge/graph/load/new_model_manager/model_utils.h index 4b3d7ae7..8474a987 100755 --- a/ge/graph/load/new_model_manager/model_utils.h +++ b/ge/graph/load/new_model_manager/model_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc index b8b02f59..39f0591d 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h index 614544f9..82e228e6 100644 --- a/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/end_graph_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc b/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc index 772078c6..f742118c 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h b/ge/graph/load/new_model_manager/task_info/event_record_task_info.h index d3f5961e..04ee1779 100755 --- a/ge/graph/load/new_model_manager/task_info/event_record_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/event_record_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc index b6d8f04c..e8f96b35 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h index a92252d7..f9da30b8 100755 --- a/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/event_wait_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc index 32c79647..9b1ea04a 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h index b1897533..7f575639 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/fusion_start_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc index dd4edfd0..7acbb5b3 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h index 880ca487..66248e9f 100755 --- a/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/fusion_stop_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index 6679c980..11eaaca9 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -168,7 +168,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode for (int64_t i = 0; i < stream_num; ++i) { rtStream_t stream = nullptr; rtError_t rt_ret = - rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); + rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -319,8 +319,8 @@ void HcclTaskInfo::GetPrivateDefByTaskDef(const domi::TaskDef &task) { return; } - ret = rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_, - RT_MEMCPY_HOST_TO_HOST); + ret = + rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_, RT_MEMCPY_HOST_TO_HOST); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rtMemcpy Fail, ret = 0x%X.", ret); return; diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h b/ge/graph/load/new_model_manager/task_info/hccl_task_info.h index f7ce3468..d8456834 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index 2a295915..4f72ec36 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return FAILED; } errno_t sec_ret = - memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); + memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); if (sec_ret != EOK) { GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); return FAILED; @@ -110,7 +110,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin if (davinci_model_->IsKnownNode()) { void *input_output_addr = davinci_model_->GetCurrentArgsAddr(args_offset_); fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = - static_cast(reinterpret_cast(input_output_addr)); + static_cast(reinterpret_cast(input_output_addr)); void *workspace_base_addr = nullptr; rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret); @@ -118,7 +118,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin rt_ret = rtMemcpy(workspace_base_addr, kernel_ex_def.task_info_size(), kernel_ex_def.task_info().data(), kernel_ex_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE); fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = - static_cast(reinterpret_cast(workspace_base_addr)); + static_cast(reinterpret_cast(workspace_base_addr)); fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(ext_info_addr_); @@ -171,10 +171,6 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin dump_flag_ = RT_KERNEL_DUMPFLAG; dump_args_ = input_output_addr_; } - if (davinci_model_->GetOpDugReg()) { - GELOGI("Op debug is open in kernel ex task info"); - dump_args_ = input_output_addr_; - } } uint64_t input_output_addr = static_cast(reinterpret_cast(input_output_addr_)); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h index e4d3e6fd..b26a95ac 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,9 +54,8 @@ class KernelExTaskInfo : public TaskInfo { auto ret = reinterpret_cast(dump_args_); return ret; } - bool CallSaveDumpInfo() override { - return true; - }; + bool CallSaveDumpInfo() override { return true; }; + private: Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 9154edf9..3964e0d5 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ const uint32_t kAddrLen = sizeof(void *); namespace ge { KernelTaskInfo::SuperKernelTaskInfo KernelTaskInfo::skt_info_ = { - 0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr}; + 0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, kInvalidGroupKey, 0, nullptr}; Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GE_CHECK_NOTNULL(davinci_model); @@ -380,9 +380,9 @@ Status KernelTaskInfo::Distribute() { } else { /* default: not skt launch */ GELOGI( - "KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s " - "stubfunc:%p blockdim:%u stream:%p", - call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); + "KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s " + "stubfunc:%p blockdim:%u stream:%p", + call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); // l1 fusion enable and env flag open (kCloseSkt for skt debug) bool open_dump = false; auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel(); @@ -407,9 +407,9 @@ Status KernelTaskInfo::Distribute() { // set for task_id_ UpdateTaskId(); GELOGI( - "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " - "blockdim:%d stream:%p", - call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); + "KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " + "blockdim:%d stream:%p", + call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); return SUCCESS; } @@ -638,9 +638,6 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne dump_args_ = static_cast(args_) + offset; } - GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast(args_) + offset, - "Op debug is open in TVM task info"); - Status ge_ret = UpdateL2Data(kernel_def); // update origin l2 data if (ge_ret != SUCCESS) { @@ -747,15 +744,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel } } *(reinterpret_cast(args + ctx_.argsOffset[0])) = - reinterpret_cast(reinterpret_cast(custom_info_.input_descs)); // arg 0 + reinterpret_cast(reinterpret_cast(custom_info_.input_descs)); // arg 0 *(reinterpret_cast(args + ctx_.argsOffset[1])) = - reinterpret_cast(reinterpret_cast(custom_info_.input_addrs)); // arg 1 + reinterpret_cast(reinterpret_cast(custom_info_.input_addrs)); // arg 1 *(reinterpret_cast(args + ctx_.argsOffset[2])) = - reinterpret_cast(reinterpret_cast(custom_info_.output_descs)); // arg 2 + reinterpret_cast(reinterpret_cast(custom_info_.output_descs)); // arg 2 *(reinterpret_cast(args + ctx_.argsOffset[3])) = - reinterpret_cast(reinterpret_cast(custom_info_.output_addrs)); // arg 3 + reinterpret_cast(reinterpret_cast(custom_info_.output_addrs)); // arg 3 *(reinterpret_cast(args + ctx_.argsOffset[4])) = - reinterpret_cast(reinterpret_cast(custom_info_.attr_handle)); // arg 4 + reinterpret_cast(reinterpret_cast(custom_info_.attr_handle)); // arg 4 rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { @@ -763,8 +760,8 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel return RT_ERROR_TO_GE_STATUS(rt_ret); } - rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), - RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = + rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -834,8 +831,8 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) { } GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "cce task physical memory.", kernel_def.args_size()) - rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), - RT_MEMCPY_HOST_TO_DEVICE); + rt_ret = + rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -939,10 +936,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); } - if (davinci_model_->GetOpDugReg()) { - GELOGI("Op debug is open in aicpu task info"); - dump_args_ = static_cast(args_) + sizeof(aicpu::AicpuParamHead); - } if (kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; } @@ -1092,11 +1085,11 @@ Status KernelTaskInfo::UpdateCceArgs(std::string &sm_desc, std::string &flowtabl uint64_t data_base_addr = reinterpret_cast(davinci_model_->MemBase()) - davinci_model_->GetRtBaseAddr(); uint64_t weight_base_addr = - reinterpret_cast(davinci_model_->WeightsMemBase()) - davinci_model_->GetRtWeightAddr(); + reinterpret_cast(davinci_model_->WeightsMemBase()) - davinci_model_->GetRtWeightAddr(); uint64_t var_base_addr = reinterpret_cast(davinci_model_->VarMemBase()) - davinci_model_->GetRtVarAddr(); Status status = - CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); + CceUpdateKernelArgs(context, data_base_addr, weight_base_addr, var_base_addr, sm_desc, flowtable, kernel_def); if (status != SUCCESS) { GELOGE(status, "Call cce api failed"); return status; @@ -1187,8 +1180,8 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe } *(reinterpret_cast( - args + (reinterpret_cast(const_cast(context.args_offset().data())))[0])) = - reinterpret_cast(reinterpret_cast(flowtable_)); + args + (reinterpret_cast(const_cast(context.args_offset().data())))[0])) = + reinterpret_cast(reinterpret_cast(flowtable_)); } return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index f2945b0b..8ada2082 100644 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,7 +90,7 @@ class KernelTaskInfo : public TaskInfo { uint32_t GetSktTaskID() override { return skt_id_; } - bool CallSaveDumpInfo() override { return call_save_dump_; }; + bool CallSaveDumpInfo() override { return call_save_dump_; }; cce::ccOpContext ctx_; FusionOpInfo fusion_op_info_; @@ -114,9 +114,9 @@ class KernelTaskInfo : public TaskInfo { Status SetContext(const domi::KernelDef &kernel_def); Status UpdateCceArgs(std::string &sm_desc, std::string &flowtable, const domi::KernelDef &kernel_def); - Status CceUpdateKernelArgs(const domi::KernelContext &context, uint64_t &data_base_addr, - uint64_t &weight_base_addr, uint64_t &var_base_addr, std::string &sm_desc, - std::string &flowtable, const domi::KernelDef &kernel_def); + Status CceUpdateKernelArgs(const domi::KernelContext &context, uint64_t &data_base_addr, uint64_t &weight_base_addr, + uint64_t &var_base_addr, std::string &sm_desc, std::string &flowtable, + const domi::KernelDef &kernel_def); Status SetFlowtable(std::string &flowtable, const domi::KernelDef &kernel_def); diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc index 393c0b31..75f6c121 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,8 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da uint32_t label_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, label_index)) { - GELOGE(INTERNAL_ERROR, "LabelGotoExTaskInfo: %s attr [%s] not exist.", - op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); + GELOGE(INTERNAL_ERROR, "LabelGotoExTaskInfo: %s attr [%s] not exist.", op_desc->GetName().c_str(), + ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); return INTERNAL_ERROR; } diff --git a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h index f83cd1d9..c8a695c9 100755 --- a/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_goto_ex_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc index 5fa96a96..de6a1d65 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,8 +38,8 @@ Status LabelSetTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin uint32_t label_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, label_index)) { - GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: %s attr [%s] not exist.", - op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); + GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: %s attr [%s] not exist.", op_desc->GetName().c_str(), + ATTR_NAME_LABEL_SWITCH_INDEX.c_str()); return INTERNAL_ERROR; } diff --git a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h b/ge/graph/load/new_model_manager/task_info/label_set_task_info.h index bb02ccf0..c68ffb98 100644 --- a/ge/graph/load/new_model_manager/task_info/label_set_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_set_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc index f26c19a6..efefd3e2 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,10 +94,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo label_list_[idx] = label_list[label_id]; } - rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; - GELOGI("memory_type: %u", memory_type); args_size_ = branch_max_ * sizeof(rtLabelDevInfo); - rtError_t rt_ret = rtMalloc(&args_, args_size_, memory_type); + rtError_t rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h index 538b2d68..4cb39c95 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc index b95705f0..1f542154 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,10 @@ const uint32_t kAlignBytes = 64; namespace ge { Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGI("MemcpyAddrAsyncTaskInfo Init Start"); - GE_CHECK_NOTNULL(davinci_model); + if (davinci_model == nullptr) { + GELOGE(PARAM_INVALID, "davinci_model is null"); + return PARAM_INVALID; + } Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); if (ret != SUCCESS) { @@ -40,13 +43,12 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel return INTERNAL_ERROR; } - const RuntimeParam &rts_param = davinci_model->GetRuntimeParam(); - ret = ModelUtils::GetRtAddress(rts_param, memcpy_async.src(), src_); + ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_); if (ret != SUCCESS) { return ret; } - ret = ModelUtils::GetRtAddress(rts_param, memcpy_async.dst(), dst_); + ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); if (ret != SUCCESS) { return ret; } @@ -57,7 +59,10 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel // malloc args memory size_t args_size = sizeof(void *) * io_addrs.size(); - rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM; + rtMemType_t memory_type = RT_MEMORY_HBM; + if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) { + memory_type = RT_MEMORY_TS_4G; + } GELOGI("memory_type: %u", memory_type); rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, memory_type); if (rt_ret != RT_ERROR_NONE) { diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h index c7645b9f..90aad9b7 100644 --- a/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc index 51e822e2..96247e7d 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,25 +22,27 @@ namespace ge { Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { GELOGI("MemcpyAsyncTaskInfo Init Start"); - GE_CHECK_NOTNULL(davinci_model); - davinci_model_ = davinci_model; + if (davinci_model == nullptr) { + GELOGE(PARAM_INVALID, "davinci_model is null"); + return PARAM_INVALID; + } - Status ret = SetStream(task_def.stream_id(), davinci_model_->GetStreamList()); + Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); if (ret != SUCCESS) { return ret; } - memcpy_async_ = task_def.memcpy_async(); - count_ = memcpy_async_.count(); - kind_ = memcpy_async_.kind(); - dst_max_ = memcpy_async_.dst_max(); - OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); + memcpy_async = task_def.memcpy_async(); + count_ = memcpy_async.count(); + kind_ = memcpy_async.kind(); + dst_max_ = memcpy_async.dst_max(); + OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index()); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); + GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index()); return INTERNAL_ERROR; } - if (davinci_model_->IsKnownNode()) { + if (davinci_model->IsKnownNode()) { src_ = reinterpret_cast(davinci_model_->GetCurrentArgsAddr(args_offset_)); dst_ = reinterpret_cast(reinterpret_cast(src_) + sizeof(void *)); // for zero copy @@ -48,34 +50,29 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); return SUCCESS; } - - const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); - ret = ModelUtils::GetRtAddress(rts_param, memcpy_async_.src(), src_); + ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.src(), src_); if (ret != SUCCESS) { return ret; } // dst_ needs different address for different chips - vector memory_type_list; - (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memory_type_list); - if (!memory_type_list.empty() && memory_type_list[0] == RT_MEMORY_TS_4G) { // TS Feature, Just one. - uint64_t mem_offset = memcpy_async_.dst() - rts_param.logic_mem_base; - dst_ = static_cast(rts_param.ts_mem_mall->Acquire(mem_offset, memcpy_async_.dst_max())); - if (dst_ == nullptr) { - return FAILED; + if (op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE)) { + ret = AllocTsMemoryForMemcpy(op_desc, davinci_model); + if (ret != SUCCESS) { + return ret; } } else { - ret = ModelUtils::GetRtAddress(rts_param, memcpy_async_.dst(), dst_); + ret = ModelUtils::GetRtAddress(davinci_model->GetRuntimeParam(), memcpy_async.dst(), dst_); if (ret != SUCCESS) { return ret; } } GELOGI("MemcpyAsyncTaskInfo Init Success, logic[0x%lx, 0x%lx], src:%p, dst:%p, max:%lu, count:%lu", - memcpy_async_.src(), memcpy_async_.dst(), src_, dst_, dst_max_, count_); + memcpy_async.src(), memcpy_async.dst(), src_, dst_, dst_max_, count_); - davinci_model_->DisableZeroCopy(src_); - davinci_model_->DisableZeroCopy(dst_); + davinci_model->DisableZeroCopy(src_); + davinci_model->DisableZeroCopy(dst_); return SUCCESS; } @@ -105,12 +102,12 @@ Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davinci Status MemcpyAsyncTaskInfo::UpdateArgs() { GELOGI("MemcpyAsyncTaskInfo::UpdateArgs in."); GE_CHECK_NOTNULL(davinci_model_); - Status ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async_.src(), src_); + Status ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.src(), src_); if (ret != SUCCESS) { return ret; } - ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async_.dst(), dst_); + ret = ModelUtils::GetRtAddress(davinci_model_->GetRuntimeParam(), memcpy_async.dst(), dst_); if (ret != SUCCESS) { return ret; } @@ -125,5 +122,33 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { return SUCCESS; } +Status MemcpyAsyncTaskInfo::AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model) { + int64_t size = 0; + auto tensor_desc = op_desc->GetOutputDescPtr(0); + if ((tensor_desc == nullptr) || (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS)) { + GELOGE(FAILED, "GetTensorSizeInBytes failed!"); + return FAILED; + } + + rtError_t rt_ret = rtMalloc(&memory_4g_, size, RT_MEMORY_TS_4G); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtMalloc failed, ret: 0x%X", rt_ret); + return FAILED; + } + + // map save the opdesc's offset and special address, for update the streamSwitchN's input address + std::map memcpy_4g_offset_addr; + vector offsets = op_desc->GetOutputOffset(); + if (offsets.empty()) { + GELOGE(FAILED, "GetOutputOffset failed!"); + return FAILED; + } + memcpy_4g_offset_addr.insert(std::pair(offsets[0], memory_4g_)); + davinci_model->SetMemcpyOffsetAndAddr(memcpy_4g_offset_addr); + + dst_ = reinterpret_cast(memory_4g_); + return SUCCESS; +} + REGISTER_TASK_INFO(RT_MODEL_TASK_MEMCPY_ASYNC, MemcpyAsyncTaskInfo); } // namespace ge diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h index 320e7fbc..9436529d 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,11 +23,19 @@ namespace ge { class MemcpyAsyncTaskInfo : public TaskInfo { public: - MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(RT_MEMCPY_RESERVED) {} + MemcpyAsyncTaskInfo() : dst_(nullptr), dst_max_(0), src_(nullptr), count_(0), kind_(0), memory_4g_(nullptr) {} ~MemcpyAsyncTaskInfo() override { src_ = nullptr; dst_ = nullptr; + + if (memory_4g_ != nullptr) { + rtError_t ret = rtFree(memory_4g_); + if (ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret); + } + memory_4g_ = nullptr; + } } Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; @@ -39,6 +47,7 @@ class MemcpyAsyncTaskInfo : public TaskInfo { Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; private: + Status AllocTsMemoryForMemcpy(const OpDescPtr &op_desc, DavinciModel *davinci_model); uint8_t *dst_; uint64_t dst_max_; uint8_t *src_; @@ -46,7 +55,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo { uint32_t kind_; DavinciModel *davinci_model_ = nullptr; uint32_t args_offset_ = 0; - domi::MemcpyAsyncDef memcpy_async_; + domi::MemcpyAsyncDef memcpy_async; + void *memory_4g_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ diff --git a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc b/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc deleted file mode 100644 index ff8057aa..00000000 --- a/ge/graph/load/new_model_manager/task_info/model_exit_task_info.cc +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/load/new_model_manager/task_info/model_exit_task_info.h" - -#include "common/properties_manager.h" -#include "framework/common/debug/ge_log.h" -#include "graph/load/new_model_manager/davinci_model.h" - -namespace ge { -Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { - GELOGI("InitModelExitTaskInfo Init Start."); - if (davinci_model == nullptr) { - GELOGE(PARAM_INVALID, "davinci_model is null!"); - return PARAM_INVALID; - } - - Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList()); - if (ret != SUCCESS) { - GELOGE(ret, "SetStream fail, stream_id:%u", task_def.stream_id()); - return ret; - } - - model_ = davinci_model->GetRtModelHandle(); - GELOGI("InitModelExitTaskInfo Init Success, model:%p, stream:%p", model_, stream_); - return SUCCESS; -} - -Status ModelExitTaskInfo::Distribute() { - GELOGI("ModelExitTaskInfo Distribute Start."); - rtError_t rt_ret = rtModelExit(model_, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtModelExit failed, ret: 0x%x", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - GELOGI("ModelExitTaskInfo Distribute Success."); - return SUCCESS; -} - -REGISTER_TASK_INFO(RT_MODEL_TASK_MODEL_EXIT, ModelExitTaskInfo); -} // namespace ge diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc index 533c459a..fd5f4f4c 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h index 8989096d..ab07eb22 100755 --- a/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/profiler_trace_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc index 33ebea3b..f48f64e3 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,8 +63,8 @@ Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d active_stream_ = davinci_model->GetStreamList()[active_stream_index_list[internal_index]]; active_stream_id_ = stream_active_def.active_stream_id(); - GELOGI("InitStreamActiveTaskInfo Init Success, index:%u, activeStream:%p, activeStreamID:%u.", - internal_index, active_stream_, active_stream_id_); + GELOGI("InitStreamActiveTaskInfo Init Success, index:%u, activeStream:%p, activeStreamID:%u.", internal_index, + active_stream_, active_stream_id_); return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h index c6b263b4..a75e616e 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_active_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc index 616ba85f..45db2be5 100644 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,8 +56,8 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d size_t input_size = op_desc->GetInputsSize(); if (input_data_addr.size() != STREAM_SWITCH_INPUT_NUM || input_size != STREAM_SWITCH_INPUT_NUM) { - GELOGE(INTERNAL_ERROR, "Input num should be %u. inputAddr size:%zu, inputDesc size:%zu.", - STREAM_SWITCH_INPUT_NUM, input_data_addr.size(), input_size); + GELOGE(INTERNAL_ERROR, "Input num should be %u. inputAddr size:%zu, inputDesc size:%zu.", STREAM_SWITCH_INPUT_NUM, + input_data_addr.size(), input_size); return INTERNAL_ERROR; } @@ -93,8 +93,8 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d data_type_ = static_cast(data_type); } - GELOGI("InitStreamSwitchTaskInfo Init Success, cond:%d, trueStream:%p, trueStreamID:%u, datatype:%d.", - cond_, true_stream_, true_stream_id_, data_type_); + GELOGI("InitStreamSwitchTaskInfo Init Success, cond:%d, trueStream:%p, trueStreamID:%u, datatype:%d.", cond_, + true_stream_, true_stream_id_, data_type_); return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h index 89642cf8..e6e8339a 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ class StreamSwitchTaskInfo : public TaskInfo { Status Distribute() override; Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; + private: void SetInputAndValuePtr(DavinciModel *davinci_model, const vector &input_data_addrs); void *input_ptr_; diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc index 27adbbe4..d95aefac 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" #include #include "framework/common/debug/ge_log.h" @@ -82,7 +83,7 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel * Status StreamSwitchNTaskInfo::Distribute() { GELOGI("StreamSwitchNTaskInfo Distribute Start."); rtError_t rt_ret = - rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_); + rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -147,37 +148,38 @@ Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davin int64_t tensor_size = 0; GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); davinci_model->SetTotalFixedAddrsSize(input_tensor_name, tensor_size); - GELOGI("Calculate stream switchn task args, tensor_size %ld, args_offset %ld", tensor_size, args_offset_); + GELOGI("Calculate stream switchn task args , tensor_size %ld, args_offset %ld", tensor_size, args_offset_); return SUCCESS; } Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciModel *davinci_model) { - // dst_ needs different address for different chips - vector memory_type_list; - (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type_list); - if (!memory_type_list.empty() && memory_type_list[0] == RT_MEMORY_TS_4G) { // TS Feature, Just one. - const vector input_offset = op_desc->GetInputOffset(); - const vector input_legnth = ModelUtils::GetInputSize(op_desc); - if (input_offset.empty() || input_legnth.empty()) { - GELOGE(FAILED, "input offset size %zu, input legnth size: %zu", input_offset.size(), input_legnth.size()); - return FAILED; - } - const RuntimeParam &rts_param = davinci_model->GetRuntimeParam(); - input_ptr_ = rts_param.ts_mem_mall->Acquire(input_offset[0], input_legnth[0]); - } else { + bool is_4g_mem = false; + const map memcpy_4g_offset_addr = davinci_model->GetMemcpyOffsetAndAddr(); + vector input_offset = op_desc->GetInputOffset(); + if (input_offset.empty()) { + GELOGE(FAILED, "Get StreamSwitchN's input offset failed."); + return FAILED; + } + + auto iter = memcpy_4g_offset_addr.find(input_offset[0]); + if (iter != memcpy_4g_offset_addr.end()) { + input_ptr_ = iter->second; + is_4g_mem = true; + } + + if (is_4g_mem == false) { if (davinci_model->IsKnownNode()) { input_ptr_ = davinci_model->GetCurrentFixedAddr(args_offset_); } else { auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc); if (input_data_addr.empty()) { - GELOGE(FAILED, "input data addr is empty"); return FAILED; } input_ptr_ = input_data_addr[0]; } } - GELOGI("StreamSwitchN's input_ptr is %p", input_ptr_); + GELOGI("StreamSwitchN's input_ptr is %p, is_4g_mem: %d", input_ptr_, is_4g_mem); return SUCCESS; } REGISTER_TASK_INFO(RT_MODEL_TASK_STREAM_SWITCH_N, StreamSwitchNTaskInfo); diff --git a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h index 3d65a086..5a73eb1a 100755 --- a/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/stream_switchn_task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,5 +53,5 @@ class StreamSwitchNTaskInfo : public TaskInfo { vector value_list_; int64_t args_offset_; }; -} +} // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index 09ed7458..100a4fea 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,8 +26,8 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { reinterpret_cast(reinterpret_cast(this->GetNavTableSize()))}; rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return - RT_ERROR_TO_GE_STATUS(rt_ret);) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h index 9c94d1a9..b7e76af0 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index d237d56c..ca42b4e2 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,8 +112,8 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) // Create the necessary metadata for the super kernel - h = std::unique_ptr( - new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); + h = + std::unique_ptr(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim)); return SUCCESS; } } // namespace skt diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h index efd61ef7..7db44eec 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ class SuperKernelFactory { void *handle_ = nullptr; std::string sk_stub_name_ = "_Z21super_kernel_templatePmm"; bool is_init_ = false; - SuperKernelFactory() {}; + SuperKernelFactory(){}; ~SuperKernelFactory() { if (handle_ != nullptr) { GELOGI("SKT: SKT LIB PATH release."); diff --git a/ge/graph/load/new_model_manager/task_info/task_info.cc b/ge/graph/load/new_model_manager/task_info/task_info.cc index 674d477f..01bf0690 100755 --- a/ge/graph/load/new_model_manager/task_info/task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/task_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h index d296d29e..f69511e6 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info.h +++ b/ge/graph/load/new_model_manager/task_info/task_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,24 +22,10 @@ #include "cce/customize.h" #include "cce/taskdown_common.hpp" #include "framework/common/ge_inner_error_codes.h" -#include "graph/load/new_model_manager/ts_mem_mall.h" #include "graph/load/new_model_manager/task_info/task_info_factory.h" #include "proto/task.pb.h" - namespace ge { -struct MemInfo { - uint64_t memory_size = 0; - uint64_t logic_memory_base = 0; - uint8_t *memory_base = nullptr; -}; - struct RuntimeParam { - RuntimeParam() { - ts_mem_mall = std::unique_ptr(new (std::nothrow) TsMemMall()); - aicpu_mem_mall = std::unique_ptr(new (std::nothrow) TsMemMall(RT_MEMORY_HBM)); - } - ~RuntimeParam() = default; - uint64_t mem_size = 0; uint64_t logic_mem_base = 0; uint8_t *mem_base = nullptr; @@ -49,17 +35,12 @@ struct RuntimeParam { uint64_t var_size = 0; uint64_t logic_var_base = 0; uint8_t *var_base = nullptr; - std::map memory_infos; uint32_t batch_num = 0; uint32_t stream_num = 0; uint32_t event_num = 0; uint32_t label_num = 0; uint64_t session_id = 0; uint32_t graph_id = 0; - bool is_single_op = false; - - std::unique_ptr ts_mem_mall; - std::unique_ptr aicpu_mem_mall; }; typedef struct FusionOpInfo { diff --git a/ge/graph/load/new_model_manager/task_info/task_info_factory.h b/ge/graph/load/new_model_manager/task_info/task_info_factory.h index 8feef0ac..5b220960 100644 --- a/ge/graph/load/new_model_manager/task_info/task_info_factory.h +++ b/ge/graph/load/new_model_manager/task_info/task_info_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.cc b/ge/graph/load/new_model_manager/tbe_handle_store.cc index 591e88d0..5bdf4c81 100755 --- a/ge/graph/load/new_model_manager/tbe_handle_store.cc +++ b/ge/graph/load/new_model_manager/tbe_handle_store.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "tbe_handle_store.h" #include @@ -39,14 +40,9 @@ void TbeHandleInfo::used_dec(uint32_t num) { used_ -= num; } -uint32_t TbeHandleInfo::used_num() const { - return used_; -} - -void *TbeHandleInfo::handle() const { - return handle_; -} +uint32_t TbeHandleInfo::used_num() const { return used_; } +void *TbeHandleInfo::handle() const { return handle_; } TBEHandleStore &TBEHandleStore::GetInstance() { static TBEHandleStore instance; @@ -81,8 +77,7 @@ bool TBEHandleStore::FindTBEHandle(const std::string &name, void *&handle) { /// @param [in] kernel: TBE kernel bin to store. /// @return NA /// -void TBEHandleStore::StoreTBEHandle(const std::string &name, void *handle, - std::shared_ptr &kernel) { +void TBEHandleStore::StoreTBEHandle(const std::string &name, void *handle, std::shared_ptr &kernel) { std::lock_guard lock(mutex_); auto it = kernels_.find(name); if (it == kernels_.end()) { @@ -140,4 +135,4 @@ void TBEHandleStore::EraseTBEHandle(const std::map &names } } } -} // namespace ge +} // namespace ge diff --git a/ge/graph/load/new_model_manager/tbe_handle_store.h b/ge/graph/load/new_model_manager/tbe_handle_store.h index 6c3ad750..a8f68514 100644 --- a/ge/graph/load/new_model_manager/tbe_handle_store.h +++ b/ge/graph/load/new_model_manager/tbe_handle_store.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/load/new_model_manager/ts_mem_mall.h b/ge/graph/load/new_model_manager/ts_mem_mall.h deleted file mode 100644 index 42ad3957..00000000 --- a/ge/graph/load/new_model_manager/ts_mem_mall.h +++ /dev/null @@ -1,108 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_LOAD_TS_MEM_MALL_H_ -#define GE_GRAPH_LOAD_TS_MEM_MALL_H_ - -#include -#include -#include - -#include "runtime/base.h" -#include "framework/common/debug/ge_log.h" - -namespace { -constexpr uint32_t kMaxTsMemBlock = 2 * 1024 * 1024; // Max block 2M -constexpr uint32_t kTsMemAligment = 64; // Malloc for 64 bits align -constexpr uint32_t kTsMemAlignMask = kTsMemAligment - 1; -} - -namespace ge { -class TsMemMall { - public: - TsMemMall() { - mem_type_ = RT_MEMORY_TS_4G; - } - TsMemMall(rtMemType_t type) { - mem_type_ = type; - } - ~TsMemMall() { - for (auto it : mem_store_size_) { - rtError_t ret = rtFree(it.second); - if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret); - } - } - mem_store_size_.clear(); - mem_store_addr_.clear(); - } - - void *Acquire(int64_t offset, uint64_t size) { - if (size == 0) { - GELOGE(RT_FAILED, "Acquire mem block failed, size: %lu", size); - return nullptr; - } - - uint64_t bytes = (size + kTsMemAlignMask) & ~kTsMemAlignMask; - if (bytes > kMaxTsMemBlock) { - GELOGW("Acquire TS memory may not physical continuity, size: %lu", bytes); - } - - std::lock_guard lock(mem_mutex_); - const auto it = mem_store_size_.find(offset); - if (it != mem_store_size_.end()) { - GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", it->second, offset, size, bytes); - return it->second; - } - - void *addr = nullptr; - rtError_t rt_ret = rtMalloc(&addr, bytes, mem_type_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return nullptr; - } - - GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", addr, offset, size, bytes); - mem_store_size_[offset] = addr; - mem_store_addr_[addr] = offset; - return addr; - } - - void Release(void *addr) { - std::lock_guard lock(mem_mutex_); - const auto it = mem_store_addr_.find(addr); - if (it == mem_store_addr_.end()) { - GELOGW("Not TS memory: %p.", addr); - return; - } - - GELOGI("Release TS memory: %p.", addr); - mem_store_size_.erase(it->second); - mem_store_addr_.erase(it); - rtError_t ret = rtFree(addr); - if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret); - } - } - - private: - std::mutex mem_mutex_; - std::unordered_map mem_store_size_; - std::unordered_map mem_store_addr_; - rtMemType_t mem_type_; -}; -} // namespace ge -#endif // GE_GRAPH_LOAD_TS_MEM_MALL_H_ diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.cc b/ge/graph/load/new_model_manager/zero_copy_offset.cc index e93a7250..18b958ef 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.cc +++ b/ge/graph/load/new_model_manager/zero_copy_offset.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,7 +67,7 @@ Status ZeroCopyOffset::InitInputDataInfo(const vector &output_size_list if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) { out_count++; uint64_t out_offset = - reinterpret_cast(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index); + reinterpret_cast(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index); int64_t real_data_size = ModelUtils::GetOutputSize(op_desc).at(kDataIndex); data_info_.emplace_back(real_data_size, reinterpret_cast(reinterpret_cast(out_offset))); relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); @@ -141,7 +141,7 @@ void ZeroCopyOffset::IsL2Fusion(const vector &fusion_basic_addrs, const } void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::set &real_virtual_addrs) { + bool fusion_flag, std::vector &real_virtual_addrs) { GELOGI("[ZCPY] Start to SetInputOutsideAddrs for virtual_addr %p.", addr); uint32_t out_count = 0; if (!fusion_flag) { @@ -150,7 +150,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l std::map> addr_mapping; addr_mapping[addr] = {}; outside_addrs_.emplace_back(addr_mapping); - real_virtual_addrs.insert(addr); + real_virtual_addrs.emplace_back(addr); } else { GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); int64_t output_offset = output_offset_list.at(index); @@ -158,11 +158,11 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l if (zero_copy_basic_offset_.at(i) == output_offset) { out_count++; void *virtual_addr = - reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); + reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); std::map> addr_mapping; addr_mapping[virtual_addr] = {}; outside_addrs_.emplace_back(addr_mapping); - real_virtual_addrs.insert(virtual_addr); + real_virtual_addrs.emplace_back(virtual_addr); GELOGI("[ZCPY] virtual_addr %p has been fusion to virtual_addr %p.", addr, virtual_addr); } } @@ -187,7 +187,7 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo if (zero_copy_basic_offset_.at(i) == input_offset) { out_count++; void *virtual_addr = - reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); + reinterpret_cast(reinterpret_cast(addr) + zero_copy_relative_offset_.at(i)); std::map> addr_mapping; addr_mapping[virtual_addr] = {}; outside_addrs_.emplace_back(addr_mapping); diff --git a/ge/graph/load/new_model_manager/zero_copy_offset.h b/ge/graph/load/new_model_manager/zero_copy_offset.h index c662032b..eb2cdb4d 100644 --- a/ge/graph/load/new_model_manager/zero_copy_offset.h +++ b/ge/graph/load/new_model_manager/zero_copy_offset.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ class ZeroCopyOffset { Status InitInputDataInfo(const vector &output_size_list, const vector &virtual_addr_list, const OpDescPtr &op_desc, bool &fusion_flag); void SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::set &real_virtual_addrs); + bool fusion_flag, std::vector &real_virtual_addrs); void IsL2Fusion(const vector &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); Status InitOutputDataInfo(const vector &input_size_list, const vector &virtual_addr_list, diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc index 35169726..7db9c459 100755 --- a/ge/graph/load/new_model_manager/zero_copy_task.cc +++ b/ge/graph/load/new_model_manager/zero_copy_task.cc @@ -154,8 +154,8 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { GE_CHECK_NOTNULL(args_addr_); rtError_t rt_err = RT_ERROR_NONE; if (async_mode) { - rt_err = rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, - stream); + rt_err = + rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, stream); } else { __builtin_prefetch(args_addr_); rt_err = rtMemcpy(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE); diff --git a/ge/graph/load/new_model_manager/zero_copy_task.h b/ge/graph/load/new_model_manager/zero_copy_task.h index 57ccdbaf..c83387e8 100644 --- a/ge/graph/load/new_model_manager/zero_copy_task.h +++ b/ge/graph/load/new_model_manager/zero_copy_task.h @@ -28,8 +28,8 @@ using std::map; using std::set; -using std::vector; using std::string; +using std::vector; namespace ge { class ZeroCopyTask { @@ -83,13 +83,9 @@ class ZeroCopyTask { */ ge::Status DistributeParam(bool async_mode, rtStream_t stream); - void SetBatchLabel(const string &batch_label) { - batch_label_ = batch_label; - } + void SetBatchLabel(const string &batch_label) { batch_label_ = batch_label; } - const string& GetBatchLabel() const { - return batch_label_; - } + const string &GetBatchLabel() const { return batch_label_; } protected: bool CheckDynamicBatch(const map> &batch_addrs, const string &batch_label, uintptr_t addr); @@ -105,5 +101,5 @@ class ZeroCopyTask { //
map> task_addr_offset_; }; -} // namespace ge +} // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_TASK_H_ diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index dc4af753..850a73e8 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -33,8 +33,8 @@ #include "runtime/mem.h" namespace ge { -constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes -constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold +constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes +constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold constexpr size_t kKByteSize = 1024; constexpr size_t kMByteSize = 1024 * 1024; constexpr size_t kGByteSize = 1024 * 1024 * 1024; @@ -88,7 +88,6 @@ class CachingAllocator { Status Free(uint8_t *memory_addr, uint32_t device_id = 0); private: - /// /// @ingroup ge_graph /// @brief extend cache by size @@ -131,7 +130,7 @@ class CachingAllocator { /// @param [in] block ptr /// @return void /// - void FreeBlock(Block* block); + void FreeBlock(Block *block); /// /// @ingroup ge_graph diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 3f07b47e..39bdee36 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -45,7 +45,6 @@ #include "graph/manager/util/rt_context_util.h" #include "graph/partition/dynamic_shape_partition.h" #include "graph/passes/enter_pass.h" -#include "graph/partition/stage_partition.h" #include "graph/passes/addn_pass.h" #include "graph/passes/bitcast_pass.h" #include "graph/passes/atomic_addr_clean_pass.h" @@ -96,7 +95,6 @@ #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" #include "graph/passes/end_of_sequence_add_control_pass.h" #include "graph/passes/subexpression_migration_pass.h" -#include "graph/passes/subgraph_const_migration_pass.h" #include "graph/passes/unused_args_clean_pass.h" #include "graph/passes/global_step_insert_pass.h" #include "graph/utils/tensor_adapter.h" @@ -133,10 +131,9 @@ bool IsTailingOptimization() { } // namespace namespace ge { -GraphManager::GraphManager() - : thread_run_flag_(false), - graph_run_listener_(nullptr), - init_flag_(false) { +GraphManager::GraphManager(OmgContext &omg_context) + : thread_run_flag_(false), graph_run_listener_(nullptr), init_flag_(false), omg_context_(omg_context) { + SetLocalOmgContext(omg_context); } Status GraphManager::Initialize(const std::map &options) { @@ -165,6 +162,14 @@ Status GraphManager::Initialize(const std::map &options) { return ret; } + graph_builder_.SetOptions(options_); + ret = graph_optimize_.SetOptions(options_); + if (ret != SUCCESS) { + GELOGE(ret, "[Initialize] Graph optimize initialize failed."); + return ret; + } + graph_preparer_.SetOptions(options_); + ret = graph_context_->Initialize(options); if (ret != SUCCESS) { GELOGE(ret, "[Initialize] GraphContext initialize failed."); @@ -235,13 +240,6 @@ Status GraphManager::Finalize() { continue; } } - - // clear analyzer saved info(graph level) - auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); - GE_CHECK_NOTNULL(compute_graph); - auto session_id = compute_graph->GetSessionID(); - auto graph_id = compute_graph->GetGraphID(); - Analyzer::GetInstance()->DestroyGraphJsonObject(session_id, graph_id); } graph_map_.clear(); cache_helper_map_.clear(); @@ -260,9 +258,8 @@ Status GraphManager::Finalize() { } Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, - const std::map &options, - const OmgContext &omg_context) { - if (HasGraphNode(graph_id)) { + const std::map &options) { + if (graph_map_.find(graph_id) != graph_map_.end()) { GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id); return GE_GRAPH_GRAPH_ALREADY_EXIST; } @@ -271,10 +268,10 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, if (compute_graph != nullptr) { compute_graph->SetGraphID(graph_id); bool graph_has_been_added = false; - if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) - && graph_has_been_added) { - GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, - "[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id); + if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added) && + graph_has_been_added) { + GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] same graph object can not be added again, graph_id = %u.", + graph_id); return GE_GRAPH_GRAPH_ALREADY_EXIST; } (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true); @@ -307,34 +304,19 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, graph_node->SetGraph(graph_ptr); graph_node->SetOptions(options); - AddGraphNode(graph_id, graph_node); - AddLocalOmgContext(graph_id, omg_context); - if (!options_.output_datatype.empty()) { - GetLocalOmgContext().output_type = options_.output_datatype; - } + graph_map_.insert(std::make_pair(graph_id, graph_node)); - CompilerStages &stages = GetCompilerStages(graph_id); - stages.preparer.SetOptions(options_); - Status status = stages.optimizer.SetOptions(options_); - if (status != SUCCESS) { - GELOGE(status, "Graph optimizer set options failed."); - return status; - } - stages.builder.SetOptions(options_); + GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); var_acc_ctrl_.AddGraph(graph_id, compute_graph); - - GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); return SUCCESS; } -Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph, - GraphId root_graph_id) { +Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - GraphPartitioner &partitioner = GetCompilerStages(root_graph_id).partitioner; if (instance_ptr != nullptr && instance_ptr->InitFlag()) { - Status ret = partitioner.MergeAfterSubGraphOptimization(compute_graph, original_compute_graph); + Status ret = graph_partitioner_.MergeAfterSubGraphOptimization(compute_graph, original_compute_graph); if (ret != SUCCESS) { GELOGE(ret, "merge end and placeholder after subGraph optimization failed."); return FAILED; @@ -346,7 +328,7 @@ Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::Com return ret_topo; } } else { - auto subgraph_list = partitioner.GetSubGraphMap(); + auto subgraph_list = graph_partitioner_.GetSubGraphMap(); if (subgraph_list.find(original_compute_graph) != subgraph_list.end() && !subgraph_list[original_compute_graph].empty() && subgraph_list[original_compute_graph][0] != nullptr) { compute_graph = subgraph_list[original_compute_graph][0]->GetSubGraph(); @@ -392,8 +374,7 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr } Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, - Graph2SubGraphInfoList &sub_graph_map, - uint64_t session_id) { + Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) { GE_CHECK_NOTNULL(compute_graph); // use default 16 multi thread const uint32_t thread_num = 16; @@ -405,10 +386,10 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr GELOGI("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str()); for (const auto &subgraph : root_subgraph_list) { if (!op_compile_strategy.empty()) { - (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); + (void)AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } - std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, session_id, GetThreadLocalContext()); + std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, subgraph, session_id, + GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); return FAILED; @@ -420,10 +401,9 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr auto subgraph_list = sub_graph_map[function_graph]; for (const auto &subgraph : subgraph_list) { if (!op_compile_strategy.empty()) { - (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); + (void)AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); } - std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, - compute_graph->GetGraphID(), subgraph, session_id, + std::future f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, subgraph, session_id, GetThreadLocalContext()); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); @@ -456,7 +436,7 @@ bool GraphManager::CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_ const auto &root_subgraph_list = sub_graph_map[compute_graph]; for (const auto &subgraph : root_subgraph_list) { bool optimize_group = true; - (void) AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); + (void)AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); if (!optimize_group) { GELOGW("Run lx optimize for subgraph:%s failed.", subgraph->GetSubGraph()->GetName().c_str()); return false; @@ -466,7 +446,7 @@ bool GraphManager::CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_ const auto &subgraph_list = sub_graph_map[function_graph]; for (const auto &subgraph : subgraph_list) { bool optimize_group = true; - (void) AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); + (void)AttrUtils::GetBool(subgraph->GetSubGraph(), ATTR_NAME_OPTIMIZE_GROUP, optimize_group); if (!optimize_group) { GELOGW("Run lx optimize for subgraph:%s failed.", subgraph->GetSubGraph()->GetName().c_str()); return false; @@ -506,9 +486,9 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ return SUCCESS; } -Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner) { +Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph) { GE_CHECK_NOTNULL(compute_graph); - auto sub_graph_map = partitioner.GetSubGraphMap(); + auto sub_graph_map = graph_partitioner_.GetSubGraphMap(); std::string buffer_optimize; graphStatus graph_status = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); bool need_lx_fusion = (graph_status == GRAPH_SUCCESS) && (buffer_optimize != kOffOptimize); @@ -557,9 +537,7 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr /// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize"; /// 2. run lx fusion or buffer according build_mode and build_step in fe. GELOGI("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", - options_.build_mode.c_str(), - options_.build_step.c_str(), - buffer_optimize.c_str()); + options_.build_mode.c_str(), options_.build_step.c_str(), buffer_optimize.c_str()); Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); if (ret != SUCCESS) { GELOGE(ret, "Multiply optimize subgraph with lx buffer"); @@ -580,35 +558,34 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); - - CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); - GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph); - GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, + GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", graph_optimize_.OptimizeOriginalGraphForQuantize, compute_graph); + GM_RUN_AND_DUMP_PERF("HandleSummaryOp", graph_optimize_.HandleSummaryOp, compute_graph); + GM_RUN_AND_DUMP_PERF("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, session_id); - GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph); - GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", stages.preparer.PrepareRunningFormatRefiner); - GM_RUN_AND_DUMP_PERF("RefineRunningFormat", stages.optimizer.OptimizeOriginalGraphJudgeInsert, compute_graph); + GM_RUN_AND_DUMP_PERF("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); + GM_RUN_AND_DUMP_PERF("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); GM_RUN_AND_DUMP_PERF("SubexpressionMigration", SubexpressionMigration, compute_graph); - GE_RUN(GraphManager, stages.preparer.RecordAIPPInfo, compute_graph); + GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); if (IsTailingOptimization()) { - GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", stages.preparer.SwitchOpOptimize, compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); } GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph); GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed); + const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); + if (unknown_shape_skip != nullptr) { + PassManager graph_pass; + GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) + GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); + } - PassManager graph_pass; - GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) - GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); - - GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), "Identify reference failed."); + GE_CHK_STATUS_RET(graph_optimize_.IdentifyReference(compute_graph), "Identify reference failed."); GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); return SUCCESS; } -Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, - ge::ComputeGraphPtr &compute_graph, +Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); @@ -617,7 +594,7 @@ Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, // Dump graph to tuning path if (options_.build_mode == BUILD_MODE_TUNING && options_.build_step == BUILD_STEP_AFTER_UB_MATCH) { std::string tuning_path; - (void) GetContext().GetOption(TUNING_PATH, tuning_path); + (void)GetContext().GetOption(TUNING_PATH, tuning_path); GELOGI("Dump path:%s.", tuning_path.c_str()); GraphUtils::DumpGEGraph(compute_graph, "", true, tuning_path); } @@ -630,9 +607,7 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); - GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", - GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, - compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", graph_optimize_.OptimizeGraphBeforeBuildForRts, compute_graph); GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); GELOGI("PreRun:PreRunAfterOptimizeSubGraph success."); return SUCCESS; @@ -681,10 +656,10 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetName().c_str()); - return ret; + // BUILD_MODE_TUNING with BUILD_STEP_AFTER_MERGE no need PreRunOptimizeSubGraph. + bool run_optimize_subgraph = + !((options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_MERGE)); + if (run_optimize_subgraph) { + Status ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); + if (ret != SUCCESS) { + GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); + return ret; + } } /// 1. BUILD_MODE_TUNING with BUILD_STEP_BEFORE_UB_MATCH no need PreRunAfterOptimizeSubGraph; /// 2. BUILD_MODE_TUNING with BUILD_STEP_AFTER_BUILDER no need PreRunAfterOptimizeSubGraph. /// 3. BUILD_MODE_TUNING with BUILD_STEP_AFTER_BUILDER_SUB no need PreRunAfterOptimizeSubGraph. - bool run_after_optimize_subgraph = !((options_.build_mode == BUILD_MODE_TUNING) && - (options_.build_step == BUILD_STEP_BEFORE_UB_MATCH || - options_.build_step == BUILD_STEP_AFTER_BUILDER || - options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); + bool run_after_optimize_subgraph = + !((options_.build_mode == BUILD_MODE_TUNING) && + (options_.build_step == BUILD_STEP_BEFORE_UB_MATCH || options_.build_step == BUILD_STEP_AFTER_BUILDER || + options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)); if (run_after_optimize_subgraph) { Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id); if (ret != SUCCESS) { @@ -731,7 +711,7 @@ Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_START(SubexpressionMigrationPass); auto ret = pass_manager.Run(compute_graph); - GE_TIMESTAMP_END(SubexpressionMigrationPass, "GraphManager::SubexpressionMigration"); + GE_TIMESTAMP_END(SubexpressionMigrationPass, "GraphManager::OptimizeStage1_1"); if (ret != SUCCESS && ret != NOT_CHANGED) { GELOGE(ret, "Run SubexpressionMigrationPass failed, ret:%u.", ret); return ret; @@ -878,7 +858,6 @@ Status GraphManager::SaveCacheAfterBuild(uint32_t graph_id, ge::ComputeGraphPtr } if (instance_ptr->IsIncreBuild()) { - std::lock_guard lock(member_mutex_); auto iter = cache_helper_map_.find(graph_id); if (iter == cache_helper_map_.end()) { GELOGW("Can not find ModelCacheHelper of graph[%u]", graph_id); @@ -957,24 +936,21 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vectorSetRunFlag(true); ComputeGraphPtr compute_graph_tmp = GraphUtils::GetComputeGraph(*(graph_node->GetGraph())); - GE_IF_BOOL_EXEC(GetTrainFlag(), - GE_IF_BOOL_EXEC(compute_graph_tmp == nullptr, - GELOGE(GE_GRAPH_GRAPH_NODE_NULL, - "[RunGraph] compute_graph_tmp is NULL, graph id = %u.", graph_id); - return GE_GRAPH_GRAPH_NODE_NULL;)) + GE_IF_BOOL_EXEC( + GetTrainFlag(), + GE_IF_BOOL_EXEC(compute_graph_tmp == nullptr, + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] compute_graph_tmp is NULL, graph id = %u.", graph_id); + return GE_GRAPH_GRAPH_NODE_NULL;)) // when set incre build, add cache helper map AddModelCacheHelperToMap(graph_id, session_id, compute_graph_tmp); if (options_.local_fmk_op_flag) { - GetCompilerStages(graph_id).optimizer.TranFrameOp(compute_graph_tmp); + graph_optimize_.TranFrameOp(compute_graph_tmp); } GeRootModelPtr ge_root_model = nullptr; @@ -1031,9 +1007,7 @@ Status GraphManager::GenerateInfershapeGraph(GraphId &graph_id) { return GE_GRAPH_GRAPH_NODE_NULL; } - UpdateLocalOmgContext(graph_id); - - ret = GetCompilerStages(graph_id).preparer.GenerateInfershapeGraph(graph_node->GetGraph()); + ret = graph_preparer_.GenerateInfershapeGraph(graph_node->GetGraph()); if (ret != SUCCESS) { GELOGE(ret, "ATC dump infershape json failed"); return ret; @@ -1057,14 +1031,11 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[BuildGraph] graph node is NULL, graphId = %u.", graph_id); return GE_GRAPH_GRAPH_NODE_NULL; } - - UpdateLocalOmgContext(graph_id); - auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node->GetGraph(), inputs, - compute_graph, session_id); + GM_RUN_AND_DUMP_PERF("Prepare", graph_preparer_.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, + session_id); for (auto &node : compute_graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); @@ -1079,7 +1050,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const } OpsKernelInfoStorePtr kernel_info = - instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(op_desc->GetOpKernelLibName()); + instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(op_desc->GetOpKernelLibName()); if (kernel_info == nullptr) { GELOGE(FAILED, "Get op kernel info store failed"); return FAILED; @@ -1089,7 +1060,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const if (ret != SUCCESS) { GELOGE(ret, "Compile op failed, op = %s, graph_id = %u.", op_desc->GetName().c_str(), graph_id); return ret; - } + } } } @@ -1122,9 +1093,6 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vectorGetGraphId()); return GE_GRAPH_ALREADY_RUNNING; } - - UpdateLocalOmgContext(graph_id); - graph_node->SetAsync(async); // set graph's run flag graph_node->SetRunFlag(true); @@ -1169,7 +1137,6 @@ Status GraphManager::SaveParams(ge::GeModel &model, const std::string &type, con } void GraphManager::RemoveModelCacheHelper(const GraphId &graph_id) { - std::lock_guard lock(member_mutex_); auto iter = cache_helper_map_.find(graph_id); if (iter != cache_helper_map_.end()) { cache_helper_map_.erase(iter); @@ -1183,20 +1150,18 @@ bool GraphManager::CheckModelLoad(const GeRootModelPtr &ge_root_model, bool load } Status GraphManager::RemoveGraph(const GraphId &graph_id) { - GraphNodePtr graph_node = nullptr; - Status ret = GetGraphNode(graph_id, graph_node); - if (ret != SUCCESS) { + auto it = graph_map_.find(graph_id); + if (it == graph_map_.end()) { GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id); return GE_GRAPH_GRAPH_NOT_EXIST; } + GraphNodePtr graph_node = it->second; if ((graph_node == nullptr) || (graph_node->GetRunFlag())) { GELOGE(GE_GRAPH_GRAPH_IS_RUNNING, "[GraphManager] Id %u is running, can't be deleted.", graph_id); return GE_GRAPH_GRAPH_IS_RUNNING; } - - std::lock_guard lock(unload_model_mutex_); - + Status ret = SUCCESS; Status middle_ret; rtError_t rt_ret; const std::vector &all_sub_graph = graph_node->GetAllSubGraph(); @@ -1232,7 +1197,7 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { } } var_acc_ctrl_.RemoveGraph(graph_id); - RemoveGraphNode(graph_id); + graph_map_.erase(it); RemoveModelCacheHelper(graph_id); @@ -1258,9 +1223,6 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { ret = FAILED; } } - - RemoveCompilerStages(graph_id); - GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id); GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id); return SUCCESS; @@ -1384,6 +1346,9 @@ Status GraphManager::ParseOptions(const std::map &opti // net output node dataType ParseOption(options, OUTPUT_DATATYPE, options_.output_datatype); + if (!options_.output_datatype.empty()) { + omg_context_.output_type = options_.output_datatype; + } // Set save_original_model flag (ge.save_original_model) ParseOption(options, SAVE_ORIGINAL_MODEL, options_.save_original_model); @@ -1579,24 +1544,7 @@ Status GraphManager::ParseParallelNum(const std::string ¶llel_num, const std return SUCCESS; } - -void GraphManager::AddGraphNode(GraphId graph_id, const GraphNodePtr &graph_node) { - std::lock_guard lock(member_mutex_); - graph_map_.emplace(graph_id, graph_node); -} - -void GraphManager::RemoveGraphNode(GraphId graph_id) { - std::lock_guard lock(member_mutex_); - graph_map_.erase(graph_id); -} - -bool GraphManager::HasGraphNode(GraphId graph_id) { - std::lock_guard lock(member_mutex_); - return graph_map_.find(graph_id) != graph_map_.end(); -} - Status GraphManager::GetGraphNode(const GraphId &graph_id, GraphNodePtr &out) { - std::lock_guard lock(member_mutex_); auto iter = graph_map_.find(graph_id); if (iter == graph_map_.end()) { out = nullptr; @@ -1618,7 +1566,7 @@ Status GraphManager::SummaryHandle(const GraphId &graph_id, std::vector summary_output_index; GELOGI("[GraphManager] SummaryHandle, outputsSize=%zu.", outputs.size()); const std::map> &whole_summary_output_indexes = - GetCompilerStages(graph_id).optimizer.GetSummaryOutputIndexes(); + graph_optimize_.GetSummaryOutputIndexes(); if (whole_summary_output_indexes.find(graph_id) == whole_summary_output_indexes.end()) { GELOGE(FAILED, "No Summary graph found in map."); return FAILED; @@ -1712,9 +1660,7 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap } Status GraphManager::RegisterCallBackFunc( - const std::string &key, - const std::function &)> &callback) { - std::lock_guard lock(member_mutex_); + const std::string &key, const std::function &)> &callback) { GELOGI("[GraphManager] RegisterCallBackFunc, key=%s.", key.c_str()); me_callback_map_[key] = callback; return SUCCESS; @@ -1722,7 +1668,6 @@ Status GraphManager::RegisterCallBackFunc( Status GraphManager::PushSummaryData2ME(const GraphId &graph_id, const std::map &summary_data) { - std::lock_guard lock(member_mutex_); GELOGI("[GraphManager] PushSummaryData2ME, dataSize=%zu.", summary_data.size()); auto itr = me_callback_map_.find(kSummary); if (itr == me_callback_map_.end()) { @@ -1733,7 +1678,6 @@ Status GraphManager::PushSummaryData2ME(const GraphId &graph_id, } Status GraphManager::PushSaveData2ME(const GraphId &graph_id, const std::map &save_data) { - std::lock_guard lock(member_mutex_); GELOGI("[GraphManager] PushSaveData2ME, dataSize=%zu.", save_data.size()); auto itr = me_callback_map_.find(kSave); if (itr == me_callback_map_.end()) { @@ -1955,9 +1899,9 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { } PassManager after_merge_passes; GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); + after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); + after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", new (std::nothrow) CommonSubexpressionEliminationPass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::PermutePass", new (std::nothrow) PermutePass)) @@ -1994,7 +1938,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { return ret; } - GE_DUMP(compute_graph, "OptimizeStage1_1"); + GraphUtils::DumpGEGraphToOnnx(*compute_graph, "OptimizeStage1_1"); NamesToPass names_to_passes; TransOpNearbyAllreduceFusionPass trans_op_nearby_allreduce_fusion_pass; @@ -2034,30 +1978,28 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { uint64_t op_constant_folding_cost = 0; for (auto &it : constant_folding_pass.GetOpConstantFoldingPerfStatistic()) { op_constant_folding_cost += it.second.second; - GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", - it.first.c_str(), it.second.second, it.second.first); + GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(), + it.second.second, it.second.first); } GEEVENT("[GEPERFTRACE] The time cost of extern constant folding is [%lu] micro second.", op_constant_folding_cost); for (auto &it : constant_folding_pass.GetGeConstantFoldingPerfStatistic()) { op_constant_folding_cost += it.second.second; - GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", - it.first.c_str(), it.second.second, it.second.first); + GELOGI("The time cost of %s constant folding is [%lu] micro second, calls is %lu.", it.first.c_str(), + it.second.second, it.second.first); } - GE_DUMP(compute_graph, "OptimizeStage1_2"); + GraphUtils::DumpGEGraphToOnnx(*compute_graph, "OptimizeStage1_2"); PassManager graph_pass; // the prune pass should between SwitchPass and SwitchToStreamSwitchPass - GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::Migration", new (std::nothrow) SubgraphConstMigrationPass)); - GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ArgsClean", new (std::nothrow) UnusedArgsCleanPass)); GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::NextIterationPass", new (std::nothrow) NextIterationPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ControlTriggerPass", new (std::nothrow) ControlTriggerPass)) GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::MergeToStreamMergePass", new (std::nothrow) MergeToStreamMergePass)) + graph_pass.AddPass("OptimizeStage1_3::MergeToStreamMergePass", new (std::nothrow) MergeToStreamMergePass)) GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass)) + graph_pass.AddPass("OptimizeStage1_3::SwitchToStreamSwitchPass", new (std::nothrow) SwitchToStreamSwitchPass)) GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass)) + graph_pass.AddPass("OptimizeStage1_3::AttachStreamLabelPass", new (std::nothrow) AttachStreamLabelPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::MultiBatchPass", new (std::nothrow) MultiBatchPass(true))) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::IteratorOpPass", new (std::nothrow) IteratorOpPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::VariableRefUselessControlOutDeletePass", @@ -2067,7 +2009,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { // Priority: The GlobalStepInsertPass should work before graph partitioner. // Reason: Make sure that the var "global_step" can be partitioned to known sub graph and allocated memory GE_CHK_STATUS_RET( - graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) + graph_pass.AddPass("OptimizeStage1_3::GlobalStepInsertPass", new (std::nothrow) GlobalStepInsertPass)) } GE_TIMESTAMP_START(graph_pass); ret = graph_pass.Run(compute_graph); @@ -2095,7 +2037,7 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { PassManager after_merge_passes; GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", new (std::nothrow) - LinkGenMaskNodesPass(options_.stream_max_parallel_num))); + LinkGenMaskNodesPass(options_.stream_max_parallel_num))); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); @@ -2149,18 +2091,19 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", new (std::nothrow) CompileNodesPass)) GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( - "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) + "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new (std::nothrow) MarkGraphUnknownStatusPass)) GE_CHK_STATUS_RET( - pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", - new (std::nothrow) InputOutputConnectionIdentifyPass)) + pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::InputOutputConnectionIdentifyPass", + new (std::nothrow) InputOutputConnectionIdentifyPass)) // When the input node to be cleared is after a `Data` node, the atomic-clean-node should not be inserted. // So The ComputeGraph should not delete nodes after `AtomicAddrCleanPass` // to prevent unexpected deletion of nodes after a `Data` node GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass)) - GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::" - "EndOfSequenceAddControlPass", - new (std::nothrow) EndOfSequenceAddControlPass)) + GE_CHK_STATUS_RET( + pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::" + "EndOfSequenceAddControlPass", + new (std::nothrow) EndOfSequenceAddControlPass)) // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and // graph-structure. So try not to add new pass after SubgraphPass. GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", @@ -2178,11 +2121,11 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { } // After while sub graph handle, mark all node rw type - auto result = GetCompilerStages(compute_graph->GetGraphID()).optimizer.HandleMemoryRWConflict(compute_graph); + auto result = graph_optimize_.HandleMemoryRWConflict(compute_graph); if (result != SUCCESS) { GELOGW( - "Mark node rw type failed. It will take some effect on memory_assign_conflicts handling." - "Please pay attention to it."); + "Mark node rw type failed. It will take some effect on memory_assign_conflicts handling." + "Please pay attention to it."); } ChangeConstTypeWhenTraining(compute_graph); @@ -2228,7 +2171,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G GE_TIMESTAMP_START(LoadGraph); GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_); Status ret = - GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); + GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_); GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); if (ret != SUCCESS) { GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); @@ -2259,9 +2202,9 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra } GELOGI( - "CheckAndReleaseMemory Graph[%u] need memory_size[%ld], weight_size[%ld]," - " Device[%u] free_memory_size[%ld]", - graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); + "CheckAndReleaseMemory Graph[%u] need memory_size[%ld], weight_size[%ld]," + " Device[%u] free_memory_size[%ld]", + graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory); if (ge::CheckInt64AddOverflow(memory_size, weight_size) != SUCCESS) { GELOGE(INTERNAL_ERROR, "The sum of Memory size and weight size exceeds INT64_MAX"); return INTERNAL_ERROR; @@ -2269,16 +2212,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra if (free_memory >= (memory_size + weight_size)) { return SUCCESS; } - - std::lock_guard lock(unload_model_mutex_); - - std::map graph_map; - { - std::lock_guard lock(member_mutex_); - graph_map = graph_map_; - } - - for (auto &it : graph_map) { + rtError_t rt_ret; + for (auto &it : graph_map_) { auto graph_id = it.second->GetGraphId(); auto model = it.second->GetGeRootModel(); if (model == nullptr) { @@ -2297,7 +2232,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra } GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id, max_memory_size); - rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); + rt_ret = rtSetDevice(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id); continue; @@ -2319,18 +2254,16 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra it.second->SetLoadFlag(false); GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success and set LoadFlag to false.", graph_id, model_id); } - return SUCCESS; } -Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, +Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, const SubGraphInfoPtr &sub_graph_info_ptr, uint64_t session_id, const GEThreadLocalContext &ge_context) { + Status ret = SUCCESS; + GetThreadLocalContext() = ge_context; if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { - GetContext().SetSessionId(session_id); - GetThreadLocalContext() = ge_context; - graph_manager->UpdateLocalOmgContext(root_graph_id); - + SetLocalOmgContext(graph_manager->omg_context_); ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); GELOGI("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", @@ -2339,8 +2272,7 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); GE_CHECK_NOTNULL(compute_graph_tmp); compute_graph_tmp->SetSessionID(session_id); - Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, - engine_name); + ret = graph_manager->graph_optimize_.OptimizeSubGraph(compute_graph_tmp, engine_name); if (ret != SUCCESS) { GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); return ret; @@ -2353,10 +2285,9 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), pthread_self()); } else { - GELOGE(FAILED, "graph_manager or sub_graph_info_ptr is nullptr"); + GELOGE(ret, "graph_manager or sub_graph_info_ptr is nullptr"); return FAILED; } - return SUCCESS; } @@ -2379,7 +2310,6 @@ void GraphManager::AddModelCacheHelperToMap(const GraphId &graph_id, uint64_t se ComputeGraphPtr &compute_graph) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr != nullptr && instance_ptr->IsIncreBuild()) { - std::lock_guard lock(member_mutex_); auto iter = cache_helper_map_.find(graph_id); if (iter == cache_helper_map_.end()) { ModelCacheHelperPtr cache_helper = MakeShared(session_id, graph_id, compute_graph); @@ -2392,27 +2322,18 @@ void GraphManager::AddModelCacheHelperToMap(const GraphId &graph_id, uint64_t se } } -ModelCacheHelperPtr GraphManager::FindModelCacheHelper(GraphId graph_id) { - std::lock_guard lock(member_mutex_); - auto iter = cache_helper_map_.find(graph_id); - if (iter != cache_helper_map_.end()) { - return iter->second; - } - - return nullptr; -} - Status GraphManager::IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_model) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->IsIncreBuild()) { return FAILED; } const uint32_t graph_id = graph_node->GetGraphId(); - ModelCacheHelperPtr cache_helper = FindModelCacheHelper(graph_id); - if (cache_helper == nullptr) { + auto iter = cache_helper_map_.find(graph_id); + if (iter == cache_helper_map_.end()) { GELOGW("Can not find ModelCacheHelper of graph[%u]", graph_id); return FAILED; } + ModelCacheHelperPtr cache_helper = iter->second; if (cache_helper->IsModelCacheHit()) { GEEVENT("Model cache hit."); Status ret = LoadFromCache(graph_node, cache_helper, ge_model); @@ -2447,6 +2368,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) { GELOGW("Set thread name failed."); } + SetLocalOmgContext(graph_manager->omg_context_); PreRunArgs args; while (graph_manager->thread_run_flag_) { @@ -2454,13 +2376,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { if (!pop_status) { continue; } - - GELOGI("A new loop start."); - - GetContext().SetSessionId(args.session_id); GetThreadLocalContext() = args.context; - graph_manager->UpdateLocalOmgContext(args.graph_id); - + GELOGI("A new loop start."); std::vector ge_inputs; ConstructGeInput(ge_inputs, args); @@ -2481,7 +2398,6 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { graph_node->Unlock(); return; } - // set graph's run flag graph_node->SetRunFlag(true); @@ -2498,7 +2414,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { std::vector ge_models; if (graph_manager->options_.local_fmk_op_flag) { - graph_manager->GetCompilerStages(graph_node->GetGraphId()).optimizer.TranFrameOp(compute_graph_tmp); + graph_manager->graph_optimize_.TranFrameOp(compute_graph_tmp); } // it will not execute graph preprocess, optimize, parition, build if the graph has built successful. @@ -2510,8 +2426,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { if (graph_node->GetBuildFlag()) { ReturnError(graph_manager, args.callback, PARAM_INVALID, "The graph " + std::to_string(graph_node->GetGraphId()) + - " need to re-build, you should remove it" - " from GE first, then AddGraph again and rebuild it."); + " need to re-build, you should remove it" + " from GE first, then AddGraph again and rebuild it."); graph_node->Unlock(); return; } @@ -2541,8 +2457,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { ge_root_model = graph_node->GetGeRootModel(); } - graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.input_tensor, - ge_root_model, GetThreadLocalContext(), args.callback })); + graph_manager->run_args_q_.Push( + RunArgs({graph_node, args.graph_id, args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback})); GELOGI("Loop end."); } } @@ -2551,6 +2467,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) { if (prctl(PR_SET_NAME, ("GE_Run")) != 0) { GELOGW("Set thread name failed."); } + SetLocalOmgContext(graph_manager->omg_context_); RunArgs args; while (graph_manager->thread_run_flag_) { @@ -2558,13 +2475,8 @@ void GraphManager::RunThread(GraphManager *graph_manager) { if (!pop_status) { continue; } - GELOGI("A new loop start."); - - GetContext().SetSessionId(args.session_id); GetThreadLocalContext() = args.context; - graph_manager->UpdateLocalOmgContext(args.graph_id); - if (args.graph_node->graph_run_async_listener_ != nullptr) { args.graph_node->graph_run_async_listener_->SetCallback(args.callback); } @@ -2624,8 +2536,8 @@ void GraphManager::ReturnError(GraphManager *graph_manager, RunAsyncCallback cal callback(ret, outputs); } -void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, - RunAsyncCallback callback, Status ret, const string &log) { +void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, + Status ret, const string &log) { std::vector outputs; auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); if (graph_manager == nullptr || compute_graph == nullptr) { @@ -2668,10 +2580,10 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_ return; } tensor.length = len * size; - tensor.data.reset(new(std::nothrow) uint8_t[tensor.length]); + tensor.data.reset(new (std::nothrow) uint8_t[tensor.length]); // To avoid global step too small and can not stop, totally set a bigger value for (int64_t i = 0; i < tensor.length; i++) { - tensor.data[i] = 0x7F; // here stands for a positive max value + tensor.data[i] = 0x7F; // here stands for a positive max value } outputs.emplace_back(std::move(tensor)); } @@ -2720,19 +2632,10 @@ void GraphManager::SetOptionsRunGraphFlag(bool run_graph_flag) { options_.run_gr Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, uint64_t session_id) { // graph partition - // Stage partition, only for root graph - GE_TIMESTAMP_START(StagePartition); - StagePartitioner stage_partitioner(compute_graph); - auto ret = stage_partitioner.Partition(); - if (ret != SUCCESS) { - GELOGE(ret, "Graph partition by stage Failed"); - return ret; - } - GE_TIMESTAMP_EVENT_END(StagePartition, "OptimizeSubgraph::StagePartition"); // all sub graph list of root graph and sub graph GE_TIMESTAMP_START(GraphPartitionDynamicShape); DynamicShapePartitioner dynamic_shape_partitioner(compute_graph); - ret = dynamic_shape_partitioner.Partition(); + auto ret = dynamic_shape_partitioner.Partition(); if (ret != SUCCESS) { GELOGE(ret, "Graph partition by dynamic shape Failed"); return ret; @@ -2744,15 +2647,14 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra } GE_TIMESTAMP_EVENT_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape"); GE_TIMESTAMP_START(GraphPartition); - GraphPartitioner &partitioner = GetCompilerStages(graph_node->GetGraphId()).partitioner; - ret = partitioner.Partition(compute_graph, GraphPartitioner::kPartitioning); + ret = graph_partitioner_.Partition(compute_graph, GraphPartitioner::kPartitioning); if (ret != SUCCESS) { GELOGE(ret, "Graph partition Failed"); return ret; } GE_TIMESTAMP_EVENT_END(GraphPartition, "OptimizeSubgraph::Partition1"); GE_TIMESTAMP_START(SetSubgraph); - ret = SetSubgraph(session_id, compute_graph, partitioner); + ret = SetSubgraph(session_id, compute_graph); if (ret != SUCCESS) { GELOGE(ret, "Graph set subgraph Failed"); return ret; @@ -2763,9 +2665,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)) { GE_TIMESTAMP_START(ConvertGraphToFile); std::string tuning_path; - (void) GetContext().GetOption(TUNING_PATH, tuning_path); - Status ret = ConvertGraphToFile(compute_graph, partitioner, tuning_path, - (options_.build_step == BUILD_STEP_AFTER_BUILDER)); + (void)GetContext().GetOption(TUNING_PATH, tuning_path); + Status ret = ConvertGraphToFile(compute_graph, tuning_path, (options_.build_step == BUILD_STEP_AFTER_BUILDER)); if (ret != SUCCESS) { GELOGE(ret, "Convert graph[%s] to file failed", compute_graph->GetName().c_str()); return ret; @@ -2778,7 +2679,7 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra std::vector merged_sub_graph_list; GE_TIMESTAMP_START(MergeSubgraph); - ret = MergeSubGraph(merged_compute_graph, compute_graph, graph_node->GetGraphId()); + ret = MergeSubGraph(merged_compute_graph, compute_graph); if (ret != SUCCESS) { GELOGE(ret, "Merge SubGraph Failed"); return ret; @@ -2801,20 +2702,19 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra return SUCCESS; } -Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPartitioner &partitioner, std::string path, - bool exe_flag) { +Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, std::string path, bool exe_flag) { GE_CHECK_NOTNULL(compute_graph); GELOGI("compute_graph [%s] path [%s] Enter ConvertGraphToFile.", compute_graph->GetName().c_str(), path.c_str()); std::vector non_tuning_subgraphs; - auto input_node_sub_graph_map = partitioner.graph_2_input_subgraph_; + auto input_node_sub_graph_map = graph_partitioner_.graph_2_input_subgraph_; const auto &input_subgraph_info = input_node_sub_graph_map[compute_graph]; GE_CHECK_NOTNULL(input_subgraph_info); ComputeGraphPtr input_graph_tmp = input_subgraph_info->GetSubGraph(); non_tuning_subgraphs.push_back(input_graph_tmp); - auto sub_graph_map = partitioner.GetSubGraphMap(); + auto sub_graph_map = graph_partitioner_.GetSubGraphMap(); const auto &subgraph_infos = sub_graph_map[compute_graph]; std::vector tuning_subgraphs; - for (const auto &sub_graph_info_ptr: subgraph_infos) { + for (const auto &sub_graph_info_ptr : subgraph_infos) { GE_CHECK_NOTNULL(sub_graph_info_ptr); ComputeGraphPtr sub_graph_tmp = sub_graph_info_ptr->GetSubGraph(); // need to tuning @@ -2837,8 +2737,7 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp compute_graph->SetName(graph_name); } std::vector sub_graph_list; - auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, - session_id); + auto ret = graph_builder_.Build(compute_graph, sub_graph_list, ge_root_model, session_id); if (ret != SUCCESS) { GELOGE(ret, "SubGraph build Failed."); return ret; @@ -2957,30 +2856,4 @@ Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector< } return SUCCESS; } - -void GraphManager::AddLocalOmgContext(GraphId graph_id, const OmgContext &omg_context) { - std::lock_guard lock(member_mutex_); - omg_contexts_.emplace(graph_id, omg_context); - SetLocalOmgContext(omg_contexts_[graph_id]); -} - -void GraphManager::UpdateLocalOmgContext(GraphId graph_id) { - std::lock_guard lock(member_mutex_); - auto iter = omg_contexts_.find(graph_id); - if (iter != omg_contexts_.end()) { - SetLocalOmgContext(iter->second); - } else { - GELOGW("OmgContext of graph %u not found.", graph_id); - } -} - -GraphManager::CompilerStages &GraphManager::GetCompilerStages(GraphId graph_id) { - std::lock_guard lock(member_mutex_); - return compiler_stages_[graph_id]; -} - -void GraphManager::RemoveCompilerStages(GraphId graph_id) { - std::lock_guard lock(member_mutex_); - compiler_stages_.erase(graph_id); -} } // namespace ge diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index fc3601af..9096f4a8 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -45,7 +45,8 @@ namespace ge { class GraphManager { public: - GraphManager(); + GraphManager(OmgContext &omg_context); + ~GraphManager() = default; /// @@ -70,8 +71,7 @@ class GraphManager { /// @param [out] Graph output graph /// @return Status result of function /// - Status AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options, - const OmgContext &omg_context); + Status AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options); /// /// @ingroup ge_graph @@ -103,7 +103,6 @@ class GraphManager { ge::Status BuildGraph(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &models, uint64_t session_id = 0, bool async = false); - Status BuildGraphForUnregisteredOp(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id); @@ -138,8 +137,8 @@ class GraphManager { /// @param [out] callback: callback while run graph async finish /// @return Status result of function /// - Status RunGraphAsync(const GraphId &graph_id, const std::vector &inputs, - uint64_t session_id, RunAsyncCallback callback); + Status RunGraphAsync(const GraphId &graph_id, const std::vector &inputs, uint64_t session_id, + RunAsyncCallback callback); /// /// @ingroup ge_graph @@ -149,8 +148,7 @@ class GraphManager { /// @return Status result of function /// Status RegisterCallBackFunc( - const std::string &key, - const std::function &)> &callback); + const std::string &key, const std::function &)> &callback); const bool GetTrainFlag() const { return options_.train_graph_flag; } @@ -170,13 +168,6 @@ class GraphManager { Status SaveCheckPointResult(const Graph &graph, const std::vector &outputs, map &var_results); private: - struct CompilerStages { - GraphPrepare preparer; - GraphOptimize optimizer; - GraphPartitioner partitioner; - GraphBuilder builder; - }; - struct PreRunArgs { GraphId graph_id; std::vector input_tensor; @@ -188,30 +179,25 @@ class GraphManager { struct RunArgs { GraphNodePtr graph_node; GraphId graph_id; - uint64_t session_id; std::vector input_tensor; GeRootModelPtr ge_root_model; GEThreadLocalContext context; RunAsyncCallback callback; }; - void AddGraphNode(GraphId graph_id, const GraphNodePtr &graph_node); - void RemoveGraphNode(GraphId graph_id); - bool HasGraphNode(GraphId graph_id); Status GetGraphNode(const GraphId &graph_id, GraphNodePtr &out); std::shared_ptr GetModelListener() const { return graph_run_listener_; } - static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, - const SubGraphInfoPtr &sub_graph_info_ptr, uint64_t session_id, - const GEThreadLocalContext &ge_context); + static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, const SubGraphInfoPtr &sub_graph_info_ptr, + uint64_t session_id, const GEThreadLocalContext &ge_context); Status PreRun(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id = INVALID_SESSION_ID); Status OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, uint64_t session_id); - Status Build(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, - GeRootModelPtr &ge_root_model, uint64_t session_id); + Status Build(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, GeRootModelPtr &ge_root_model, + uint64_t session_id); Status StartForRunGraph(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id = INVALID_SESSION_ID); @@ -261,13 +247,11 @@ class GraphManager { bool CheckTransOpForCheckpointGraph(NodePtr &node); - Status MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph, - GraphId root_graph_id); + Status MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::ComputeGraphPtr &original_compute_graph); - Status ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPartitioner &partitioner, std::string file_path, - bool exe_flag = false); + Status ConvertGraphToFile(ComputeGraphPtr &compute_graph, std::string file_path, bool exe_flag = false); - Status SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph, GraphPartitioner &partitioner); + Status SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_graph); void SetAttrForHcomBroadCastOp(ge::ComputeGraphPtr &compute_graph); @@ -312,49 +296,36 @@ class GraphManager { void AddModelCacheHelperToMap(const GraphId &graph_id, uint64_t session_id, ComputeGraphPtr &compute_graph); Status IncreBuild(const GraphNodePtr &graph_node, GeModelPtr &ge_model); void RemoveModelCacheHelper(const GraphId &graph_id); - ModelCacheHelperPtr FindModelCacheHelper(GraphId graph_id); static void ConstructGeInput(std::vector &ge_inputs, PreRunArgs &args); static void PreRunThread(GraphManager *graph_manager); static void RunThread(GraphManager *graph_manager); static void StopQueue(GraphManager *graph_manager); static void ReturnError(GraphManager *graph_manager, RunAsyncCallback callback, Status ret, const string &log); - static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, - Status ret, const string &log); + static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, Status ret, + const string &log); void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); Status PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, const std::vector &inputs, ge::ComputeGraphPtr &compute_graph, uint64_t session_id); - Status PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, - ge::ComputeGraphPtr &compute_graph, + Status PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, ge::ComputeGraphPtr &compute_graph, uint64_t session_id); - Status PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, - ComputeGraphPtr &compute_graph, - GeRootModelPtr &ge_root_model, - uint64_t session_id); + Status PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, ComputeGraphPtr &compute_graph, + GeRootModelPtr &ge_root_model, uint64_t session_id); - Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, - Graph2SubGraphInfoList &sub_graph_map, + Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, Graph2SubGraphInfoList &sub_graph_map, std::unordered_map ©_graphs); - Status OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, - Graph2SubGraphInfoList &sub_graph_map, + Status OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph, Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id); bool CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_graph, Graph2SubGraphInfoList &sub_graph_map); - Status ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_graph, - Graph2SubGraphInfoList &sub_graph_map, + Status ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_graph, Graph2SubGraphInfoList &sub_graph_map, std::unordered_map ©_graphs); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id); - void AddLocalOmgContext(GraphId graph_id, const OmgContext &omg_context); - void UpdateLocalOmgContext(GraphId graph_id); - - CompilerStages &GetCompilerStages(GraphId graph_id); - void RemoveCompilerStages(GraphId graph_id); - std::atomic_bool thread_run_flag_; BlockingQueue prerun_args_q_{}; BlockingQueue run_args_q_{}; @@ -362,6 +333,7 @@ class GraphManager { std::thread run_thread_; std::map graph_map_; + std::map cache_helper_map_; // for run graph synchronous return @@ -376,18 +348,19 @@ class GraphManager { bool init_flag_; GraphManagerOptions options_; - GraphContextPtr graph_context_ = nullptr; - map omg_contexts_; + OmgContext &omg_context_; - map compiler_stages_; + GraphPrepare graph_preparer_; + GraphOptimize graph_optimize_; + GraphPartitioner graph_partitioner_; + GraphBuilder graph_builder_; + GraphLoader graph_loader_; GraphExecutor graph_executor_; + GraphContextPtr graph_context_ = nullptr; VarAccelerateCtrl var_acc_ctrl_; std::mutex run_mutex_; - - std::mutex member_mutex_; - std::mutex unload_model_mutex_; }; } // namespace ge diff --git a/ge/graph/manager/graph_manager_utils.cc b/ge/graph/manager/graph_manager_utils.cc index fe7e5b34..edacadb9 100644 --- a/ge/graph/manager/graph_manager_utils.cc +++ b/ge/graph/manager/graph_manager_utils.cc @@ -51,9 +51,7 @@ GraphNode::GraphNode(GraphId graph_id) GraphNode::~GraphNode() = default; -void GraphNode::Lock() { - sem_.Push(0); -} +void GraphNode::Lock() { sem_.Push(0); } void GraphNode::Unlock() { uint8_t unused; @@ -105,9 +103,9 @@ GraphModelListener::GraphModelListener(std::mutex &mutex, std::condition_variabl Status GraphModelListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result, std::vector &outputs) { GELOGI( - "[GraphManager] graph compute call back, model_id:%u, task_id:%u, " - "resultCode:%u.", - model_id, task_id, result); + "[GraphManager] graph compute call back, model_id:%u, task_id:%u, " + "resultCode:%u.", + model_id, task_id, result); std::lock_guard lock(mutex_); result_code_ = result; @@ -140,8 +138,7 @@ void RunAsyncListener::SetCallback(const RunAsyncCallback &callback) { Status RunAsyncListener::OnComputeDone(uint32_t model_id, uint32_t task_id, uint32_t result, std::vector &outputs) { - GELOGI("[GraphManager] run graph async call back, modelId:%u, taskId:%u, resultCode:%u.", - model_id, task_id, result); + GELOGI("[GraphManager] run graph async call back, modelId:%u, taskId:%u, resultCode:%u.", model_id, task_id, result); GE_CHECK_NOTNULL(callback_); callback_(result, outputs); uint8_t unused; diff --git a/ge/graph/manager/graph_manager_utils.h b/ge/graph/manager/graph_manager_utils.h index aee020f2..be39df21 100644 --- a/ge/graph/manager/graph_manager_utils.h +++ b/ge/graph/manager/graph_manager_utils.h @@ -167,7 +167,7 @@ class GraphNode { GeModelPtr GetGeModel() const { return ge_model_; } void SetGeRootModel(const GeRootModelPtr &ge_root_model) { ge_root_model_ = ge_root_model; } GeRootModelPtr GetGeRootModel() const { return ge_root_model_; } - const std::map& GetOptions() const { return options_; } + const std::map &GetOptions() const { return options_; } void SetOptions(const std::map &options) { options_ = options; } void Lock(); void Unlock(); diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index b832986b..20ca12ae 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -105,9 +105,9 @@ Status MemoryAllocator::FreeMemory(const string &memory_key, uint32_t device_id) if (it == memory_base_map_.end()) { if (mem_malloced_) { GELOGW( - "MemoryAllocator::FreeMemory failed," - " memory_key[%s] was not exist, device_id = %u.", - memory_key.c_str(), device_id); + "MemoryAllocator::FreeMemory failed," + " memory_key[%s] was not exist, device_id = %u.", + memory_key.c_str(), device_id); } return ge::INTERNAL_ERROR; } @@ -139,9 +139,9 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic auto it = memory_base_map_.find(memory_key); if (it == memory_base_map_.end()) { GELOGW( - "MemoryAllocator::GetMemoryAddr failed," - " memory_key[%s] was not exist, device_id = %u.", - memory_key.c_str(), device_id); + "MemoryAllocator::GetMemoryAddr failed," + " memory_key[%s] was not exist, device_id = %u.", + memory_key.c_str(), device_id); return nullptr; } diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index 2723ae5c..bebdedb6 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -109,8 +109,7 @@ class MemoryAllocator { /// @param [in] device_id device id /// @return memory address /// - uint8_t *MallocMemory(const string &purpose, const string &memory_key, size_t memory_size, - uint32_t device_id = 0); + uint8_t *MallocMemory(const string &purpose, const string &memory_key, size_t memory_size, uint32_t device_id = 0); /// /// @ingroup ge_graph diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 0b9f1ff9..8633e361 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -685,7 +685,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty return nullptr; } uint8_t *mem_addr = - logic_addr + reinterpret_cast(mem_base) - VarManager::Instance(session_id_)->GetVarMemLogicBase(); + logic_addr + reinterpret_cast(mem_base) - VarManager::Instance(session_id_)->GetVarMemLogicBase(); return mem_addr; } diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index b4f6aca3..4a038f13 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -219,7 +219,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { ge::Status SaveBroadCastInfo(uint32_t graph_id, const VarBroadCastInfo &broad_cast_info); - ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); + ge::Status GetBroadCastInfo(uint32_t graph_id, const string &var_name, VarBroadCastInfo &broad_cast_info); ge::Status SyncBroadCastData2Var(uint32_t graph_id, const std::string &var_name, ge::ConstOpDescPtr var_op_desc, uint8_t *base_ptr); diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index d4aceddd..1d35f7af 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -18,46 +18,20 @@ #include -#include "graph/ge_context.h" #include "graph/utils/tensor_utils.h" -#include "runtime/mem.h" -namespace { -const uint32_t kMallocHostMemFlag = 0; -} // namespace namespace ge { -Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { - auto device_id = GetContext().DeviceId(); - GELOGD("SharedMemAllocator::Malloc host mem size= %zu for devid:[%u].", mem_info.mem_size, device_id); - - auto dev_id = static_cast(device_id); - GE_CHK_RT_RET(rtSetDevice(dev_id)); - // DeviceReset before memory finished! - GE_MAKE_GUARD(not_used_var, [&] { GE_CHK_RT(rtDeviceReset(dev_id)); }); - - rtMallocHostSharedMemoryIn input_para = {mem_info.shm_name.c_str(), mem_info.mem_size, kMallocHostMemFlag}; - rtMallocHostSharedMemoryOut output_para; - rtError_t rt_ret = rtMallocHostSharedMemory(&input_para, &output_para); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api(rtMallocHostSharedMemory) failed, devid:[%u].", device_id); - return GE_GRAPH_MEMORY_ALLOC_FAILED; - } - mem_info.fd = output_para.fd; - mem_info.host_address = reinterpret_cast(output_para.ptr); - mem_info.device_address = reinterpret_cast(output_para.devPtr); +Status HostMemoryAllocator::Allocate(std::size_t memory_size, uint8_t *memory_addr) { + GELOGI("HostMemoryAllocator::MallocMemory size= %zu.", memory_size); return SUCCESS; } -Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { - GELOGD("SharedMemAllocator::DeAllocate"); - rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, - mem_info.host_address, mem_info.device_address}; - - rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); - return RT_FAILED; +Status HostMemoryAllocator::DeAllocate(uint8_t *memory_addr) { + if (rtFreeHost(memory_addr) != RT_ERROR_NONE) { + GELOGE(GE_GRAPH_FREE_FAILED, "MemoryAllocator::Free memory failed."); + return GE_GRAPH_FREE_FAILED; } + memory_addr = nullptr; return ge::SUCCESS; } @@ -68,9 +42,9 @@ HostMemManager &HostMemManager::Instance() { Status HostMemManager::Initialize() { std::lock_guard lock(mutex_); - allocator_ = std::unique_ptr(new (std::nothrow) SharedMemAllocator()); + allocator_ = std::unique_ptr(new (std::nothrow) HostMemoryAllocator()); if (allocator_ == nullptr) { - GELOGE(GE_GRAPH_MALLOC_FAILED, "Shared memory allocator init failed!"); + GELOGE(GE_GRAPH_MALLOC_FAILED, "Host mem allocator init failed!"); return GE_GRAPH_MALLOC_FAILED; } return SUCCESS; @@ -78,43 +52,35 @@ Status HostMemManager::Initialize() { void HostMemManager::Finalize() noexcept { std::lock_guard lock(mutex_); - for (auto &it : var_memory_base_map_) { - if (allocator_->DeAllocate(it.second) != SUCCESS) { - GELOGW("Host %s mem release failed!", it.first.c_str()); + + for (const auto &it : var_memory_base_map_) { + if (allocator_->DeAllocate(it.second.address) != SUCCESS) { + GELOGW("Host %s mem deAllocator failed!", it.first.c_str()); } } var_memory_base_map_.clear(); } -Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) { +Status HostMemManager::MallocMemoryForHostVar(const string &op_name, uint64_t tensor_size, uint8_t *&var_addr) { std::lock_guard lock(mutex_); - auto iter = var_memory_base_map_.find(mem_info.op_name); - if (iter != var_memory_base_map_.end()) { - GELOGE(FAILED, "Host shared memory for op %s has been malloced", mem_info.op_name.c_str()); - return FAILED; + if (var_memory_base_map_.find(op_name) != var_memory_base_map_.end()) { + GELOGI("Host mem for variable %s has been malloced", op_name.c_str()); + return SUCCESS; } - mem_info.shm_name = OpNameToShmName(mem_info.op_name); GE_CHECK_NOTNULL(allocator_); - GE_CHK_STATUS_RET(allocator_->Allocate(mem_info)); - var_memory_base_map_[mem_info.op_name] = mem_info; + GE_CHK_STATUS(allocator_->Allocate(tensor_size, var_addr)); + HostMemInfo info(var_addr, tensor_size); + var_memory_base_map_[op_name] = info; return SUCCESS; } Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size) { - std::lock_guard lock(mutex_); if (var_memory_base_map_.find(op_name) == var_memory_base_map_.end()) { GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str()); return INTERNAL_ERROR; } - base_addr = reinterpret_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); - data_size = var_memory_base_map_[op_name].mem_size; + base_addr = reinterpret_cast(reinterpret_cast(var_memory_base_map_[op_name].address)); + data_size = var_memory_base_map_[op_name].data_size; return SUCCESS; } - -string HostMemManager::OpNameToShmName(const string &op_name) { - string sh_name("Ascend_"); - std::hash hash_str; - sh_name.append(std::to_string(hash_str(op_name))); - return sh_name; -} } // namespace ge diff --git a/ge/graph/manager/host_mem_manager.h b/ge/graph/manager/host_mem_manager.h index 66bd5826..3a5a0602 100644 --- a/ge/graph/manager/host_mem_manager.h +++ b/ge/graph/manager/host_mem_manager.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "framework/common/ge_inner_error_codes.h" @@ -36,23 +35,19 @@ #include "runtime/mem.h" namespace ge { -struct SharedMemInfo { - string op_name; - string shm_name; - uint64_t mem_size = 0; - int fd = 0; - uint8_t *device_address = nullptr; - uint8_t *host_address = nullptr; - SharedMemInfo() = default; - SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} -}; -class SharedMemAllocator { +class HostMemoryAllocator { public: - SharedMemAllocator() = default; - ~SharedMemAllocator() = default; + ~HostMemoryAllocator() = default; + + Status Allocate(std::size_t size, uint8_t *memory_addr); + Status DeAllocate(uint8_t *memory_addr); +}; - Status Allocate(SharedMemInfo &mem_info); - Status DeAllocate(SharedMemInfo &mem_info); +struct HostMemInfo { + uint8_t *address; + uint64_t data_size; + HostMemInfo() : address(nullptr), data_size(0) {} + HostMemInfo(uint8_t *addr, uint64_t size) : address(addr), data_size(size) {} }; class HostMemManager { @@ -65,13 +60,12 @@ class HostMemManager { static HostMemManager &Instance(); Status Initialize(); void Finalize() noexcept; - Status MallocSharedMemory(SharedMemInfo &mem_nfo); + Status MallocMemoryForHostVar(const string &op_name, uint64_t tensor_size, uint8_t *&var_addr); Status QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size); private: - static string OpNameToShmName(const string &op_name); - std::unordered_map var_memory_base_map_; - std::unique_ptr allocator_; + std::unordered_map var_memory_base_map_; + std::unique_ptr allocator_; mutable std::recursive_mutex mutex_; }; } // namespace ge diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 45e4bb65..0a98e983 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,13 +18,11 @@ #include -#include "common/ge/plugin_manager.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/rdma_pool_allocator.h" -#include "graph/utils/type_utils.h" #include "hccl/base.h" -#include "hccl/hccl_types.h" +#include "hccl/hcom.h" namespace ge { Status InitRdmaPool(size_t size, rtMemType_t mem_type) { @@ -37,71 +35,6 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t uint64_t device_base = 0; uint64_t device_size = 0; GE_CHK_STATUS_RET(MemManager::Instance().RdmaPoolInstance(mem_type).GetBaseAddr(device_base, device_size)); - auto table_len = var_info.size() + 1; - std::unique_ptr reg_addrs(new (std::nothrow) MemRegisterAddr[table_len]); - GE_CHECK_NOTNULL(reg_addrs); - for (size_t i = 0; i < var_info.size(); ++i) { - reg_addrs[i] = {var_info[i].base_addr, var_info[i].var_size}; - } - reg_addrs[table_len - 1] = {device_base, device_size}; - - std::string file_name = "libhccl.so"; - std::string path = PluginManager::GetPath(); - path.append(file_name); - string canonical_path = RealPath(path.c_str()); - if (canonical_path.empty()) { - GELOGE(FAILED, "Failed to get realpath of %s", path.c_str()); - return FAILED; - } - GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonical_path.c_str()); - auto handle = dlopen(canonical_path.c_str(), RTLD_NOW | RTLD_GLOBAL); - GE_CHECK_NOTNULL(handle); - GE_MAKE_GUARD(not_used_var, [&] { - if (dlclose(handle) != 0) { - GELOGW("Failed to close handle %s", dlerror()); - } - }); - - auto hcom_remote_mem_register = - (HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register"); - if (hcom_remote_mem_register == nullptr) { - GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); - return FAILED; - } - - HcclResult hccl_ret = hcom_remote_mem_register(reg_addrs.get(), table_len); - if (hccl_ret != HCCL_SUCCESS) { - GELOGE(HCCL_E_INTERNAL, "Rdma mem register failed, ret: 0x%X", hccl_ret); - return HCCL_E_INTERNAL; - } - return SUCCESS; -} - -Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size) { - GELOGD("MallocSharedMemory in"); - uint32_t type_size = 0; - bool result = TypeUtils::GetDataTypeLength(tensor_info.data_type, type_size); - if (!result) { - GELOGE(GRAPH_FAILED, "GetDataTypeLength failed, data_type=(%s).", - TypeUtils::DataTypeToSerialString(tensor_info.data_type).c_str()); - return GRAPH_FAILED; - } - memory_size = type_size; - for (auto dim : tensor_info.dims) { - if (dim <= 0) { - GELOGE(GRAPH_FAILED, "Tensor dims should be positive"); - return GRAPH_FAILED; - } - memory_size *= dim; - } - SharedMemInfo mem_info(tensor_info.var_name, memory_size); - Status ret = HostMemManager::Instance().MallocSharedMemory(mem_info); - if (ret != SUCCESS) { - GELOGE(GRAPH_FAILED, "MallocSharedMemory failed op name [%s]", tensor_info.var_name.c_str()); - return GRAPH_FAILED; - } - dev_addr = reinterpret_cast(reinterpret_cast(mem_info.device_address)); - GELOGD("MallocSharedMemory Succeeded"); return SUCCESS; } @@ -109,4 +42,4 @@ Status GetVarBaseAddrAndSize(const string &var_name, uint64_t &base_addr, uint64 GELOGD("GetVarBaseAddrAndSize in"); return HostMemManager::Instance().QueryVarMemInfo(var_name, base_addr, var_size); } -} // namespace ge +} // namespace ge \ No newline at end of file diff --git a/ge/graph/manager/model_manager/event_manager.cc b/ge/graph/manager/model_manager/event_manager.cc index 69a946f9..686eb3d8 100644 --- a/ge/graph/manager/model_manager/event_manager.cc +++ b/ge/graph/manager/model_manager/event_manager.cc @@ -67,7 +67,7 @@ Status EventManager::EventElapsedTime(size_t start_event_idx, size_t stop_event_ GE_CHK_BOOL_RET_STATUS_NOLOG(this->inited_, INTERNAL_ERROR); GE_CHK_BOOL_RET_STATUS_NOLOG(start_event_idx < this->event_list_.size() && - stop_event_idx < this->event_list_.size() && start_event_idx <= stop_event_idx, + stop_event_idx < this->event_list_.size() && start_event_idx <= stop_event_idx, PARAM_INVALID); GE_CHK_RT_RET(rtEventElapsedTime(&time, this->event_list_[start_event_idx], this->event_list_[stop_event_idx])); diff --git a/ge/graph/manager/model_manager/event_manager.h b/ge/graph/manager/model_manager/event_manager.h index a7464e0c..a20afead 100644 --- a/ge/graph/manager/model_manager/event_manager.h +++ b/ge/graph/manager/model_manager/event_manager.h @@ -17,7 +17,6 @@ #ifndef GE_GRAPH_MANAGER_MODEL_MANAGER_EVENT_MANAGER_H_ #define GE_GRAPH_MANAGER_MODEL_MANAGER_EVENT_MANAGER_H_ - #include #include "common/fmk_error_codes.h" diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc index 03e01bd2..ef82deff 100644 --- a/ge/graph/manager/rdma_pool_allocator.cc +++ b/ge/graph/manager/rdma_pool_allocator.cc @@ -126,7 +126,7 @@ uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) { if (ShouldSplit(block, aligned_size)) { GELOGD("Block will be splited block size = %zu, aligned_size:%zu", block->size, aligned_size); auto *new_block = - new (std::nothrow) Block(device_id, block->size - aligned_size, nullptr, block->ptr + aligned_size); + new (std::nothrow) Block(device_id, block->size - aligned_size, nullptr, block->ptr + aligned_size); if (new_block == nullptr) { GELOGW("Block split failed"); return block->ptr; @@ -140,8 +140,8 @@ uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) { block->size = aligned_size; block_bin_.insert(new_block); } - GELOGD("Find block size = %zu", block->size); return block->ptr; + GELOGD("Find block size = %zu", block->size); } GELOGW("Memory block not founded."); return nullptr; diff --git a/ge/graph/manager/trans_var_data_utils.cc b/ge/graph/manager/trans_var_data_utils.cc index d6865716..60a0d0db 100644 --- a/ge/graph/manager/trans_var_data_utils.cc +++ b/ge/graph/manager/trans_var_data_utils.cc @@ -122,7 +122,7 @@ Status CopyVarFromDevice(uint64_t session_id, const NodePtr &var, std::unique_pt return INTERNAL_ERROR; } - std::unique_ptr var_host(new(std::nothrow) uint8_t[var_size_bytes]); + std::unique_ptr var_host(new (std::nothrow) uint8_t[var_size_bytes]); if (var_host == nullptr) { GELOGE(OUT_OF_MEMORY, "Failed to malloc rt-host memory, size %ld", var_size_bytes); return OUT_OF_MEMORY; @@ -220,9 +220,7 @@ Status TransVarOnHost(uint8_t *var_data, const VarTransRoad &trans_road, formats /// @param var_size_bytes /// @param var_device /// @return -Status ReAssignVarAddr(uint64_t session_id, - const std::string &var_name, - const GeTensorDesc &tensor_desc, +Status ReAssignVarAddr(uint64_t session_id, const std::string &var_name, const GeTensorDesc &tensor_desc, void **var_device) { uint8_t *var_logic = nullptr; Status ret = VarManager::Instance(session_id)->GetVarAddr(var_name, tensor_desc, &var_logic); @@ -310,19 +308,17 @@ Status TransTensor(uint8_t *var_data, const NodePtr &var_src, const NodePtr &var auto src_data_datatype = var_src->GetOpDesc()->GetOutputDesc(0).GetDataType(); auto dst_data_datatype = var_dst->GetOpDesc()->GetOutputDesc(0).GetDataType(); GE_IF_BOOL_EXEC( - src_data_datatype != dst_data_datatype, - auto ret = formats::TransDataType( - {var_data, static_cast(src_data_shape_size), src_data_datatype, dst_data_datatype}, result); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "trans var data on host failed"); - return ret; - }); + src_data_datatype != dst_data_datatype, + auto ret = formats::TransDataType( + {var_data, static_cast(src_data_shape_size), src_data_datatype, dst_data_datatype}, result); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "trans var data on host failed"); + return ret; + }); return SUCCESS; } -Status CopyTensorFromSrcVarNode(const NodePtr &var_src, - const NodePtr &var_dst, - uint64_t session_id, +Status CopyTensorFromSrcVarNode(const NodePtr &var_src, const NodePtr &var_dst, uint64_t session_id, uint32_t device_id) { /// after FE fusion pass, input num of applymomentum op was changed, 0th input is var_fp32, 6th input is /// var_fp16(new). @@ -365,7 +361,7 @@ Status CopyTensorFromSrcVarNode(const NodePtr &var_src, GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Failed to send var data to device"); return ret); return SUCCESS; } -} // namespace +} // namespace Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, uint8_t *dst_addr, int64_t dst_addr_size, uint64_t session_id) { GE_CHK_BOOL_RET_STATUS(dst_addr != nullptr, FAILED, "dst addr is null. "); @@ -390,7 +386,7 @@ Status TransVarDataUtils::SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_a GE_CHK_RT_RET(rtMemcpy(host_addr, src_addr_size, src_addr, src_addr_size, RT_MEMCPY_DEVICE_TO_HOST)); GE_CHK_STATUS_RET( - SyncTensorToDevice(var_name, reinterpret_cast(host_addr), src_addr_size, dst_tensor_desc, session_id)); + SyncTensorToDevice(var_name, reinterpret_cast(host_addr), src_addr_size, dst_tensor_desc, session_id)); return SUCCESS; } @@ -402,10 +398,10 @@ Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeT uint8_t *src_addr = nullptr; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); uint8_t *mem_addr = - src_addr - - static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + - static_cast( - reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); + src_addr - + static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + + static_cast( + reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); GE_CHK_RT_RET(rtMallocHost(reinterpret_cast(host_addr), src_tensor_size)); GE_CHK_RT_RET(rtMemcpy(*host_addr, src_tensor_size, mem_addr, src_tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); @@ -419,10 +415,10 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8 uint8_t *dst_addr = nullptr; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); uint8_t *mem_addr = - dst_addr - - static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + - static_cast( - reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); + dst_addr - + static_cast(reinterpret_cast(VarManager::Instance(session_id)->GetVarMemLogicBase())) + + static_cast( + reinterpret_cast(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("SyncTensorToDevice var_name %s, addr_size %u", var_name.c_str(), addr_size); @@ -430,11 +426,8 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8 return SUCCESS; } -Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, - uint64_t session_id, - rtContext_t context, - uint32_t graph_id, - uint32_t thread_num) { +Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, uint64_t session_id, + rtContext_t context, uint32_t graph_id, uint32_t thread_num) { ThreadPool executor(thread_num); std::vector> vector_future; for (auto &node : variable_nodes) { @@ -447,40 +440,40 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, } std::future f = executor.commit( - [](const ge::NodePtr &node, uint64_t session_id, rtContext_t ctx, uint32_t graph_id) -> Status { - rtError_t rt_ret = rtCtxSetCurrent(ctx); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); - return RT_ERROR_TO_GE_STATUS(rt_ret); + [](const ge::NodePtr &node, uint64_t session_id, rtContext_t ctx, uint32_t graph_id) -> Status { + rtError_t rt_ret = rtCtxSetCurrent(ctx); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + uint32_t allocated_graph_id = 0; + Status ret = VarManager::Instance(session_id)->GetAllocatedGraphId(node->GetName(), allocated_graph_id); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "var has not been allocated, node:%s, graph_id:%u.", node->GetName().c_str(), + graph_id); + return INTERNAL_ERROR; + } + uint32_t changed_graph_id = 0; + ret = VarManager::Instance(session_id)->GetChangedGraphId(node->GetName(), changed_graph_id); + bool call_trans_var = + (ret == SUCCESS && changed_graph_id == graph_id && changed_graph_id != allocated_graph_id); + if (call_trans_var) { + GELOGI("VarManager::GetChangedGraphId() success, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); + VarTransRoad *trans_road = VarManager::Instance(session_id)->GetTransRoad(node->GetName()); + if (trans_road == nullptr) { + GELOGI("The variable %s does not have any trans road", node->GetName().c_str()); + return SUCCESS; } - uint32_t allocated_graph_id = 0; - Status ret = VarManager::Instance(session_id)->GetAllocatedGraphId(node->GetName(), allocated_graph_id); + ret = TransVarData(node, *trans_road, session_id); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "var has not been allocated, node:%s, graph_id:%u.", node->GetName().c_str(), - graph_id); + GELOGE(INTERNAL_ERROR, "TransVarData failed, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); return INTERNAL_ERROR; } - uint32_t changed_graph_id = 0; - ret = VarManager::Instance(session_id)->GetChangedGraphId(node->GetName(), changed_graph_id); - bool call_trans_var = - (ret == SUCCESS && changed_graph_id == graph_id && changed_graph_id != allocated_graph_id); - if (call_trans_var) { - GELOGI("VarManager::GetChangedGraphId() success, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); - VarTransRoad *trans_road = VarManager::Instance(session_id)->GetTransRoad(node->GetName()); - if (trans_road == nullptr) { - GELOGI("The variable %s does not have any trans road", node->GetName().c_str()); - return SUCCESS; - } - ret = TransVarData(node, *trans_road, session_id); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "TransVarData failed, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); - return INTERNAL_ERROR; - } - VarManager::Instance(session_id)->RemoveChangedGraphId(node->GetName()); - } - return SUCCESS; - }, - node, session_id, context, graph_id); + VarManager::Instance(session_id)->RemoveChangedGraphId(node->GetName()); + } + return SUCCESS; + }, + node, session_id, context, graph_id); if (!f.valid()) { GELOGE(FAILED, "Future is invalid"); return FAILED; @@ -514,7 +507,7 @@ Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint GE_IF_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), "_copy_from_var_node", cp_from_node), GELOGI("Get original type of cp_from_node")); if (cp_from_node.length() != 0) { - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), "_copy_value", copy_value); // no need to check value + (void)ge::AttrUtils::GetBool(node->GetOpDesc(), "_copy_value", copy_value); // no need to check value if (!copy_value) { auto src_node = compute_graph->FindNode(cp_from_node); GE_CHECK_NOTNULL(src_node); @@ -523,7 +516,7 @@ Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint auto ret = CopyTensorFromSrcVarNode(src_node, node, session_id, device_id); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(FAILED, "copy tensor failed!"); return FAILED); // only copy once - (void) ge::AttrUtils::SetBool(node->GetOpDesc(), "_copy_value", true); // no need to check value + (void)ge::AttrUtils::SetBool(node->GetOpDesc(), "_copy_value", true); // no need to check value } } } diff --git a/ge/graph/manager/trans_var_data_utils.h b/ge/graph/manager/trans_var_data_utils.h index 95ebd09a..efdfa51f 100755 --- a/ge/graph/manager/trans_var_data_utils.h +++ b/ge/graph/manager/trans_var_data_utils.h @@ -34,11 +34,8 @@ class TransVarDataUtils { static ge::Status SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_addr_size, const string &var_name, const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id_); - static ge::Status TransAllVarData(const std::vector &variable_nodes, - uint64_t session_id, - rtContext_t context, - uint32_t graph_id, - uint32_t thread_num = 16); + static ge::Status TransAllVarData(const std::vector &variable_nodes, uint64_t session_id, + rtContext_t context, uint32_t graph_id, uint32_t thread_num = 16); static ge::Status CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id); diff --git a/ge/graph/manager/util/hcom_util.cc b/ge/graph/manager/util/hcom_util.cc index d865b40e..614f8527 100644 --- a/ge/graph/manager/util/hcom_util.cc +++ b/ge/graph/manager/util/hcom_util.cc @@ -108,19 +108,19 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType } GE_IF_BOOL_EXEC( - op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0; - GE_CHK_BOOL_RET_STATUS(ge::AttrUtils::GetInt(op_desc, HCOM_ATTR_RANK_SIZE, rank_size), PARAM_INVALID, - "get HCOM_ATTR_RANK_SIZE failed"); - GE_CHK_BOOL_RET_STATUS(rank_size != 0, PARAM_INVALID, "rank size is zero"); - int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); GE_CHK_STATUS_RET( - ge::CheckInt64Uint32MulOverflow(shape_size, size), "Product of shape size and size beyond INT64_MAX"); - block_size = (shape_size * size) / rank_size; - GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), "Total size is beyond the INT64_MAX"); - total_size = total_size + block_size; continue;); + op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0; + GE_CHK_BOOL_RET_STATUS(ge::AttrUtils::GetInt(op_desc, HCOM_ATTR_RANK_SIZE, rank_size), PARAM_INVALID, + "get HCOM_ATTR_RANK_SIZE failed"); + GE_CHK_BOOL_RET_STATUS(rank_size != 0, PARAM_INVALID, "rank size is zero"); + int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); GE_CHK_STATUS_RET( + ge::CheckInt64Uint32MulOverflow(shape_size, size), "Product of shape size and size beyond INT64_MAX"); + block_size = (shape_size * size) / rank_size; + GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), "Total size is beyond the INT64_MAX"); + total_size = total_size + block_size; continue;); int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); - GELOGD("hcom util node %s inputsize %ld, shapesize %ld, datasize %d.", - op_desc->GetName().c_str(), input_size, shape_size, size); + GELOGD("hcom util node %s inputsize %ld, shapesize %ld, datasize %d.", op_desc->GetName().c_str(), input_size, + shape_size, size); GE_CHK_STATUS_RET(ge::CheckInt64Int32MulOverflow(shape_size, size), "Product of shape size and size beyond INT64_MAX"); GE_IF_BOOL_EXEC(is_allgather, block_size = shape_size * size;); diff --git a/ge/graph/manager/util/hcom_util.h b/ge/graph/manager/util/hcom_util.h index f80ced35..064058f8 100644 --- a/ge/graph/manager/util/hcom_util.h +++ b/ge/graph/manager/util/hcom_util.h @@ -35,27 +35,23 @@ using std::string; using std::vector; static std::map kConstOpHcclDataType = { - {ge::DT_FLOAT, HCCL_DATA_TYPE_FP32}, - {ge::DT_FLOAT16, HCCL_DATA_TYPE_FP16}, - {ge::DT_INT8, HCCL_DATA_TYPE_INT8}, - {ge::DT_INT32, HCCL_DATA_TYPE_INT32}, - {ge::DT_INT64, HCCL_DATA_TYPE_INT64}, - {ge::DT_UINT64, HCCL_DATA_TYPE_UINT64}, + {ge::DT_FLOAT, HCCL_DATA_TYPE_FP32}, + {ge::DT_FLOAT16, HCCL_DATA_TYPE_FP16}, + {ge::DT_INT8, HCCL_DATA_TYPE_INT8}, + {ge::DT_INT32, HCCL_DATA_TYPE_INT32}, }; static std::map kConstOpHcclDataTypeSize = { - {HCCL_DATA_TYPE_FP32, sizeof(float)}, - {HCCL_DATA_TYPE_FP16, sizeof(float) / 2}, - {HCCL_DATA_TYPE_INT8, sizeof(int8_t)}, - {HCCL_DATA_TYPE_INT32, sizeof(int32_t)}, - {HCCL_DATA_TYPE_INT64, sizeof(int64_t)}, - {HCCL_DATA_TYPE_UINT64, sizeof(uint64_t)}, + {HCCL_DATA_TYPE_FP32, sizeof(float)}, + {HCCL_DATA_TYPE_FP16, sizeof(float) / 2}, + {HCCL_DATA_TYPE_INT8, sizeof(int8_t)}, + {HCCL_DATA_TYPE_INT32, sizeof(int32_t)}, }; static std::map kHorovodRedOpToHcclRedOp = { - {HOROVOD_REDUCE_SUM, HCCL_REDUCE_SUM}, {HOROVOD_REDUCE_MIN, HCCL_REDUCE_MIN}, - {HOROVOD_REDUCE_MAX, HCCL_REDUCE_MAX}, {HOROVOD_REDUCE_PROD, HCCL_REDUCE_PROD}, - {HOROVOD_REDUCE_RESERVED, HCCL_REDUCE_RESERVED}, + {HOROVOD_REDUCE_SUM, HCCL_REDUCE_SUM}, {HOROVOD_REDUCE_MIN, HCCL_REDUCE_MIN}, + {HOROVOD_REDUCE_MAX, HCCL_REDUCE_MAX}, {HOROVOD_REDUCE_PROD, HCCL_REDUCE_PROD}, + {HOROVOD_REDUCE_RESERVED, HCCL_REDUCE_RESERVED}, }; class HcomOmeUtil { @@ -154,8 +150,7 @@ class HcomOmeUtil { /// @return SUCCESS /// @return FAIL /// - static Status GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType data_type, bool is_allgather, - int &count); + static Status GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType data_type, bool is_allgather, int &count); private: /// diff --git a/ge/graph/manager/util/rt_context_util.cc b/ge/graph/manager/util/rt_context_util.cc index a8aad574..75b25740 100644 --- a/ge/graph/manager/util/rt_context_util.cc +++ b/ge/graph/manager/util/rt_context_util.cc @@ -20,7 +20,7 @@ namespace ge { namespace { - const int64_t kDefaultGraphId = -1; +const int64_t kDefaultGraphId = -1; } void RtContextUtil::AddRtContext(uint64_t session_id, rtContext_t context) { diff --git a/ge/graph/manager/util/rt_context_util.h b/ge/graph/manager/util/rt_context_util.h index 5fbd52be..50f0fbed 100644 --- a/ge/graph/manager/util/rt_context_util.h +++ b/ge/graph/manager/util/rt_context_util.h @@ -52,4 +52,3 @@ class RtContextUtil { } // namespace ge #endif // GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_ - diff --git a/ge/graph/manager/util/variable_accelerate_ctrl.cc b/ge/graph/manager/util/variable_accelerate_ctrl.cc index 22f9169c..b62be02c 100644 --- a/ge/graph/manager/util/variable_accelerate_ctrl.cc +++ b/ge/graph/manager/util/variable_accelerate_ctrl.cc @@ -25,10 +25,9 @@ namespace { inline bool IsVariable(const std::string &node_type) { return node_type == VARIABLE || node_type == VARIABLEV2 || node_type == VARHANDLEOP; } -} +} // namespace bool VarAccelerateCtrl::IsVarPermitToChangeFormats(const std::string &var_name) { - std::lock_guard lock(mutex_); auto iter = var_names_to_change_times_.find(var_name); if (iter == var_names_to_change_times_.end()) { return true; @@ -37,13 +36,13 @@ bool VarAccelerateCtrl::IsVarPermitToChangeFormats(const std::string &var_name) } void VarAccelerateCtrl::SetVarChanged(const std::string &var_name) { - std::lock_guard lock(mutex_); auto times = ++var_names_to_change_times_[var_name]; for (auto &graph_id_to_var_names : graph_ids_to_var_names_) { if (graph_id_to_var_names.second.count(var_name) > 0) { - GELOGI("The format of var %s has been changed, total changed times %d, " - "the graph %u contains which should be re-build before next run", - var_name.c_str(), times, graph_id_to_var_names.first); + GELOGI( + "The format of var %s has been changed, total changed times %d, " + "the graph %u contains which should be re-build before next run", + var_name.c_str(), times, graph_id_to_var_names.first); /// The graph being compiled right now is also added to the rebuild-list /// and can be deleted by calling `SetGraphBuildEnd` at the end of compilation. graph_ids_need_rebuild_.insert(graph_id_to_var_names.first); @@ -52,7 +51,6 @@ void VarAccelerateCtrl::SetVarChanged(const std::string &var_name) { } void VarAccelerateCtrl::AddGraph(uint32_t graph_id, const ComputeGraphPtr &compute_graph) { - std::lock_guard lock(mutex_); if (compute_graph == nullptr) { GELOGE(PARAM_INVALID, "Failed to add graph %u, the compute graph is null", graph_id); return; @@ -69,19 +67,14 @@ void VarAccelerateCtrl::AddGraph(uint32_t graph_id, const ComputeGraphPtr &compu } void VarAccelerateCtrl::RemoveGraph(uint32_t graph_id) { - std::lock_guard lock(mutex_); GELOGD("Remove graph %u", graph_id); graph_ids_to_var_names_.erase(graph_id); graph_ids_need_rebuild_.erase(graph_id); } - bool VarAccelerateCtrl::IsGraphNeedRebuild(uint32_t graph_id) const { - std::lock_guard lock(mutex_); return graph_ids_need_rebuild_.count(graph_id) > 0; } - void VarAccelerateCtrl::SetGraphBuildEnd(uint32_t graph_id) { - std::lock_guard lock(mutex_); graph_ids_need_rebuild_.erase(graph_id); GELOGD("The graph %u has built end, remove it from the rebuild-set", graph_id); } diff --git a/ge/graph/manager/util/variable_accelerate_ctrl.h b/ge/graph/manager/util/variable_accelerate_ctrl.h index a7ff04c2..d8504c02 100644 --- a/ge/graph/manager/util/variable_accelerate_ctrl.h +++ b/ge/graph/manager/util/variable_accelerate_ctrl.h @@ -20,7 +20,6 @@ #include #include #include -#include #include "graph/compute_graph.h" #include "graph/node.h" @@ -60,8 +59,6 @@ class VarAccelerateCtrl { /// std::map var_names_to_change_times_; static const int kMaxVarChangeTimes_ = 1; - - mutable std::mutex mutex_; }; } // namespace ge diff --git a/ge/graph/optimize/common/params.h b/ge/graph/optimize/common/params.h index c174a4d1..ee2a735b 100644 --- a/ge/graph/optimize/common/params.h +++ b/ge/graph/optimize/common/params.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index d0931e24..214f68eb 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,9 @@ GraphOptimize::GraphOptimize() : optimize_type_(domi::FrameworkType::TENSORFLOW), cal_config_(""), insert_op_config_(""), - core_type_("") {} + parse_out_node_(""), + core_type_(""), + graph_context_(nullptr) {} void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { if (compute_graph == nullptr) { @@ -101,12 +103,11 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std return SUCCESS; } - if (build_mode_ == BUILD_MODE_TUNING && - (build_step_ == BUILD_STEP_AFTER_UB_MATCH || build_step_ == BUILD_STEP_AFTER_MERGE)) { + if (build_mode_ == BUILD_MODE_TUNING && build_step_ == BUILD_STEP_AFTER_UB_MATCH) { for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); if (ret != SUCCESS) { - GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret); + GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphStage2]: graph optimize failed, ret:%d", ret); return ret; } } @@ -246,8 +247,8 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr graph_optimizer.size()); Status ret = SUCCESS; string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; - GELOGI("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", - exclude_core_Type.c_str(), core_type_.c_str()); + GELOGI("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", exclude_core_Type.c_str(), + core_type_.c_str()); if (graph_optimizer.size() != 0) { for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { if (iter->first == exclude_core_Type || iter->second == nullptr) { @@ -319,20 +320,20 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) { auto input_desc = op_desc->GetInputDesc(name_index.second); input_desc.SetRefPortByIndex({name_index.second}); op_desc->UpdateInputDesc(name_index.second, input_desc); - GELOGI("SetRefPort: set op[%s] input desc[%u-%s] ref.", - op_desc->GetName().c_str(), name_index.second, name_index.first.c_str()); + GELOGI("SetRefPort: set op[%s] input desc[%u-%s] ref.", op_desc->GetName().c_str(), name_index.second, + name_index.first.c_str()); auto output_desc = op_desc->GetOutputDesc(static_cast(out_index)); output_desc.SetRefPortByIndex({name_index.second}); op_desc->UpdateOutputDesc(static_cast(out_index), output_desc); - GELOGI("SetRefPort: set op[%s] output desc[%u-%s] ref.", - op_desc->GetName().c_str(), out_index, name_index.first.c_str()); + GELOGI("SetRefPort: set op[%s] output desc[%u-%s] ref.", op_desc->GetName().c_str(), out_index, + name_index.first.c_str()); is_ref = true; } } if (is_ref) { AttrUtils::SetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); - GELOGI("param [node] %s is reference node, set attribute %s to be true.", - node->GetName().c_str(), ATTR_NAME_REFERENCE.c_str()); + GELOGI("param [node] %s is reference node, set attribute %s to be true.", node->GetName().c_str(), + ATTR_NAME_REFERENCE.c_str()); } } return SUCCESS; diff --git a/ge/graph/optimize/graph_optimize.h b/ge/graph/optimize/graph_optimize.h index 78d580b7..3d2db782 100755 --- a/ge/graph/optimize/graph_optimize.h +++ b/ge/graph/optimize/graph_optimize.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,8 +79,12 @@ class GraphOptimize { domi::FrameworkType optimize_type_; std::string cal_config_; std::string insert_op_config_; + std::string parse_out_node_; std::string core_type_; + std::vector out_nodes_name_; + std::vector out_nodes_index_; bool train_graph_flag_ = false; + GraphContextPtr graph_context_; bool local_fmk_op_flag_ = false; // record the summary names for filter sumarry result. std::map> summary_output_indexes_ = {}; diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 98024de2..3c3419ae 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -1,11 +1,12 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -86,8 +87,8 @@ OutputRWType GetSingleNodeOutputRWTypeByIndex(const Node &node, uint32_t index) } // check if it is ref switch std::string type; - if ((node.GetType() == FRAMEWORK_OP_TYPE) && AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type) - && (type == REFSWITCH)) { + if ((node.GetType() == FRAMEWORK_OP_TYPE) && AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type) && + (type == REFSWITCH)) { return OutputRWType::kWriteable; } @@ -217,8 +218,8 @@ InputRWType GetSingleNodeInputRWTypeByIndex(const Node &node, uint32_t index) { if (op_desc == nullptr) { return InputRWType::kInvalidRWType; } - if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HCOMALLGATHER - || op_desc->GetType() == HCOMREDUCESCATTER) { + if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HCOMALLGATHER || + op_desc->GetType() == HCOMREDUCESCATTER) { return InputRWType::kScopeWriteable; } // check if it is ref input @@ -230,8 +231,8 @@ InputRWType GetSingleNodeInputRWTypeByIndex(const Node &node, uint32_t index) { } // check if it is ref switch std::string type; - if ((node.GetType() == FRAMEWORK_OP_TYPE) && (AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type)) - && (type == REFSWITCH) && (index == 0)) { + if ((node.GetType() == FRAMEWORK_OP_TYPE) && (AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type)) && + (type == REFSWITCH) && (index == 0)) { return InputRWType::kWriteable; } @@ -490,8 +491,8 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I if (input_rw_type == InputRWType::kScopeWriteable || input_rw_type == InputRWType::kWriteable) { auto new_identity = CreateIdentityAfterSrcNode(*pre_node, pre_out_data_anchor->GetIdx()); GE_CHECK_NOTNULL(new_identity); - if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS - || GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { + if (GraphUtils::AddEdge(pre_out_data_anchor, new_identity->GetInDataAnchor(kIdentityAnchorIndex)) != SUCCESS || + GraphUtils::AddEdge(new_identity->GetOutDataAnchor(kIdentityAnchorIndex), peer_in_data_anchor) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to insert Identity between node %s and %s", pre_out_data_anchor->GetOwnerNode()->GetName().c_str(), peer_in_data_anchor->GetOwnerNode()->GetName().c_str()); @@ -509,8 +510,8 @@ Status SplitIdentityAlongAnchor(const OutDataAnchorPtr &out_data_anchor, const I peer_in_data_anchor->GetOwnerNode()->GetName().c_str()); } else { // copy control edge to pre and peer node - if (GraphUtils::CopyInCtrlEdges(old_identity, peer_in_data_node) != SUCCESS - || GraphUtils::CopyOutCtrlEdges(old_identity, pre_node) != SUCCESS) { + if (GraphUtils::CopyInCtrlEdges(old_identity, peer_in_data_node) != SUCCESS || + GraphUtils::CopyOutCtrlEdges(old_identity, pre_node) != SUCCESS) { GELOGW("Fail to copy control edge from node %s.", old_identity->GetName().c_str()); return FAILED; } @@ -567,7 +568,7 @@ Status SplitIdentity(const NodePtr &node) { Status InsertIdentityAsNeeded(const NodePtr &node) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - if (node->GetOutDataNodesSize() == 0) { + if (node->GetOutDataNodesSize() == 0 || node->GetInDataNodes().empty()) { return SUCCESS; } for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { diff --git a/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc b/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc index 34c3a0de..be025730 100644 --- a/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc +++ b/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/optimize/optimizer/allreduce_fusion_pass.h b/ge/graph/optimize/optimizer/allreduce_fusion_pass.h index 8b2168d9..2701ba16 100644 --- a/ge/graph/optimize/optimizer/allreduce_fusion_pass.h +++ b/ge/graph/optimize/optimizer/allreduce_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,11 +45,10 @@ class AllReducePass : public GraphPass { vector &peerInControlFromOutControlVec, ge::NodePtr &srcNodePtr); Status GetPeerOutDataToInData(std::unordered_set &anchorSet, - std::vector &peerOutDataAnchorVec, - ge::NodePtr &srcNodePtr); + std::vector &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr); Status GetPeerInAnchorToOutData(std::unordered_set &anchorSet, std::vector> &fusionOpPeerInDataAnchor, - std::vector>&fusionOpPeerInControlFromOutData, + std::vector> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr); }; } // namespace ge diff --git a/ge/graph/optimize/summary_optimize.cc b/ge/graph/optimize/summary_optimize.cc index 077ab1b0..a8325da3 100644 --- a/ge/graph/optimize/summary_optimize.cc +++ b/ge/graph/optimize/summary_optimize.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,8 +80,8 @@ Status GraphOptimize::HandleSummaryOp(ComputeGraphPtr &compute_graph) { del_nodes.emplace_back(node_ptr); } } - GE_IF_BOOL_EXEC(!summary_output_indexes.empty(), summary_output_indexes_.insert({compute_graph->GetGraphID(), - summary_output_indexes})); + GE_IF_BOOL_EXEC(!summary_output_indexes.empty(), + summary_output_indexes_.insert({compute_graph->GetGraphID(), summary_output_indexes})); // add output nodes for summary std::vector> out_nodes_info; diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 87fac994..d1b00f12 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -282,7 +282,7 @@ static std::string ToString(const std::vector &clusters) { ss << (*iter)->Id() << ")."; return ss.str(); } -} +} // namespace void DynamicShapePartitioner::MergeClustersUnknownShape() { // Merge unknown shape clusters @@ -354,34 +354,19 @@ Status DynamicShapePartitioner::MergeClusters() { return SUCCESS; } -bool DynamicShapePartitioner::JudgeUnknowShapeWithAttr(const OpDescPtr &opdesc) { - bool is_forced_unknown = false; - if (AttrUtils::GetBool(opdesc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_forced_unknown) && is_forced_unknown) { - GELOGD("Collect node %s as unknown as it was marked unknown forcibly.", opdesc->GetName().c_str()); - return true; - } - - bool forced_unknown = false; - if (AttrUtils::GetBool(opdesc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, forced_unknown) && forced_unknown) { - GELOGD("Collect node %s as unknown as it was marked force unknown node forcibly.", opdesc->GetName().c_str()); - return true; - } - return false; -} - Status DynamicShapePartitioner::CollectSpreadUnknownShapeNodes(NodePtr node) { if (unknown_shape_nodes_.count(node) > 0) { return SUCCESS; } auto opdesc = node->GetOpDesc(); - REQUIRE_NOT_NULL(opdesc, "Opdesc is nullptr."); // One can set 'ATTR_NAME_IS_UNKNOWN_SHAPE=true' on node so as to forcing the node flow into the unknown subgraph, // ignore the actual shape. - if (JudgeUnknowShapeWithAttr(opdesc)) { + bool is_forced_unknown = false; + if (AttrUtils::GetBool(opdesc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_forced_unknown) && is_forced_unknown) { + GELOGD("Collect node %s as unknown as it was marked unknown forcibly.", node->GetName().c_str()); unknown_shape_nodes_.insert(node); return SUCCESS; } - size_t anchor_index = 0; bool is_unknown = false; for (auto &out_tensor : opdesc->GetAllOutputsDesc()) { @@ -690,10 +675,10 @@ Status Cluster::BuildFrame() { auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()]; if (src_cluster->id_ != id_) { REQUIRE_GRAPH_SUCCESS( - GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), - "Failed remove edge from node %s index %d to node %s index %d.", - peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(peer_out_control_anchor), - in_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(in_control_anchor)); + GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), + "Failed remove edge from node %s index %d to node %s index %d.", + peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(peer_out_control_anchor), + in_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(in_control_anchor)); control_inputs_.insert(src_cluster); src_cluster->control_outputs_.insert(peer_out_control_anchor); } @@ -756,9 +741,9 @@ Status Cluster::BuildPartitionFrame() { auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()]; if (src_cluster->id_ != id_) { REQUIRE_GRAPH_SUCCESS( - GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), - "Failed remove edge from %s:%d to %s:%d.", peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), - peer_out_control_anchor->GetIdx(), node->GetName().c_str(), in_control_anchor->GetIdx()); + GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor), + "Failed remove edge from %s:%d to %s:%d.", peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), + peer_out_control_anchor->GetIdx(), node->GetName().c_str(), in_control_anchor->GetIdx()); control_inputs_.insert(src_cluster); src_cluster->control_outputs_.insert(peer_out_control_anchor); } @@ -821,7 +806,7 @@ Status Cluster::BuildPartitionSubgraph() { int64_t parent_node_index = 0; for (auto anchor : inputs_) { auto data_op = - MakeShared(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA); + MakeShared(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA); REQUIRE_NOT_NULL(data_op, "Failed new memory for data op."); auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx()); REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc."); diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index b0477ae8..06a94833 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,7 +145,6 @@ class DynamicShapePartitioner { // Debug functions void DumpGraph(const std::string &suffix); std::string DebugString() const; - bool JudgeUnknowShapeWithAttr(const OpDescPtr &opdesc); // Util functions Status CollectSpreadUnknownShapeNodes(NodePtr node); Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); diff --git a/ge/graph/partition/engine_place.cc b/ge/graph/partition/engine_place.cc index cdf29e56..80ac355f 100755 --- a/ge/graph/partition/engine_place.cc +++ b/ge/graph/partition/engine_place.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,15 +65,13 @@ Status EnginePlacer::Run() { std::string kernel_name; // Check if this node has assigned engine bool has_engine_attr = - AttrUtils::GetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, engine_name) && !engine_name.empty(); + AttrUtils::GetStr(op_desc, ATTR_NAME_ENGINE_NAME_FOR_LX, engine_name) && !engine_name.empty(); bool has_kernel_attr = - AttrUtils::GetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name) && !kernel_name.empty(); + AttrUtils::GetStr(op_desc, ATTR_NAME_KKERNEL_LIB_NAME_FOR_LX, kernel_name) && !kernel_name.empty(); bool use_exist_engine_name = !op_desc->GetOpKernelLibName().empty() || (has_kernel_attr && has_engine_attr); if (use_exist_engine_name) { if (op_desc->GetOpEngineName().empty()) { - GELOGI("Op %s set engine_name %s engine_name %s from attrs", - op_desc->GetName().c_str(), - engine_name.c_str(), + GELOGI("Op %s set engine_name %s engine_name %s from attrs", op_desc->GetName().c_str(), engine_name.c_str(), kernel_name.c_str()); op_desc->SetOpEngineName(engine_name); op_desc->SetOpKernelLibName(kernel_name); @@ -85,8 +83,8 @@ Status EnginePlacer::Run() { // If can't get op's engine name, keep check support finish and return failed if (engine_name.empty()) { is_check_support_success = false; - ErrorManager::GetInstance().ATCReportErrMessage( - "E13003", {"opname", "optype"}, {op_desc->GetName(), op_desc->GetType()}); + ErrorManager::GetInstance().ATCReportErrMessage("E13003", {"opname", "optype"}, + {op_desc->GetName(), op_desc->GetType()}); GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Can not find engine of op type %s", node_ptr->GetOpDesc()->GetType().c_str()); continue; @@ -121,4 +119,3 @@ Status EnginePlacer::AssignEngineAndLog(ge::ConstNodePtr node_ptr, const std::st return SUCCESS; } } // namespace ge - diff --git a/ge/graph/partition/engine_place.h b/ge/graph/partition/engine_place.h index 5dc3e6a0..1672df0d 100755 --- a/ge/graph/partition/engine_place.h +++ b/ge/graph/partition/engine_place.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index c8e8cda3..b280074e 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -15,14 +15,11 @@ */ #include "graph/partition/graph_partition.h" - #include #include #include #include #include - -#include "analyzer/analyzer.h" #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "framework/common/types.h" @@ -152,22 +149,18 @@ Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr &output_merged_compute_graph, const ge::ComputeGraphPtr &original_compute_graph) { - Status real_ret = SUCCESS; auto ret = MergeSubGraph(output_merged_compute_graph, original_compute_graph); if (ret != SUCCESS) { - // even though failed, ensure all op do finish check support - real_ret = FAILED; GELOGE(ret, "Graph merging Failed"); + return ret; } - GE_CHECK_NOTNULL(original_compute_graph); // partition sub graph for (const auto &sub_graph : original_compute_graph->GetAllSubgraphs()) { ComputeGraphPtr merged_sub_graph = nullptr; ret = MergeSubGraph(merged_sub_graph, sub_graph); if (ret != SUCCESS) { - real_ret = FAILED; GELOGE(ret, "Sub graph merging Failed"); - continue; + return ret; } // add sub graph output_merged_compute_graph->SetName(original_compute_graph->GetName()); @@ -183,25 +176,24 @@ Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr GELOGE(FAILED, "Find graph info failed, graph name is %s", original_graph->GetName().c_str()); return FAILED;) auto graph_info = graph_2_graph_partition_info_[original_graph]; - GE_IF_BOOL_EXEC(graph_info.corresponding_node_in_partitions_.count(parent_node) == 0, + GE_IF_BOOL_EXEC( + graph_info.corresponding_node_in_partitions_.find(parent_node) == + graph_info.corresponding_node_in_partitions_.end(), GELOGE(FAILED, "Find corresponding node failed, parent node name is %s", parent_node->GetName().c_str()); return FAILED;) auto corresponding_node = graph_info.corresponding_node_in_partitions_[parent_node]; - GE_IF_BOOL_EXEC(corresponding_node == nullptr, GELOGE(FAILED, "Get null node, node name is %s", - parent_node->GetName().c_str()); return FAILED;); + GE_IF_BOOL_EXEC(corresponding_node == nullptr, + GELOGE(FAILED, "Get null node, node name is %s", parent_node->GetName().c_str()); + return FAILED;); merged_sub_graph->SetParentNode(corresponding_node); auto subgraph_parent_graph = corresponding_node->GetOwnerComputeGraph(); merged_sub_graph->SetParentGraph(subgraph_parent_graph); ret = output_merged_compute_graph->AddSubgraph(sub_graph->GetName(), merged_sub_graph); GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, return ret;) } - ClearAllPartitionData(); - if (real_ret != SUCCESS) { - auto root_graph = ge::GraphUtils::FindRootGraph(original_compute_graph); - GE_CHECK_NOTNULL(root_graph); - (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), root_graph->GetGraphID()); - } - return real_ret; + graph_2_graph_partition_info_.clear(); + graph_2_subgraph_list_.clear(); + return SUCCESS; } Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_compute_graph, @@ -272,10 +264,10 @@ Status ge::GraphPartitioner::UpdatePldOpDesc(const NodePtr &dst_node, int input_ } const auto &input_desc = dst_node->GetOpDesc()->GetInputDesc(static_cast(input_index)); GE_IF_BOOL_EXEC(pld_op_desc->AddOutputDesc(input_desc) != GRAPH_SUCCESS, GELOGE(FAILED, "AddOutputDesc failed"); - return FAILED;) + return FAILED;) if (pld_op_desc->MutableOutputDesc(0) != nullptr) { ge::TensorUtils::SetRealDimCnt(*(pld_op_desc->MutableOutputDesc(0).get()), - static_cast(input_desc.GetShape().GetDims().size())); + static_cast(input_desc.GetShape().GetDims().size())); } else { GELOGE(GE_GRAPH_ADD_PLC_END_FAILED, "[GraphPartitioner]: pld_op_desc is null."); return FAILED; @@ -290,10 +282,10 @@ Status ge::GraphPartitioner::UpdateEndOpDesc(const NodePtr &src_node, int output } const auto &output_desc = src_node->GetOpDesc()->GetOutputDesc(static_cast(output_index)); GE_IF_BOOL_EXEC(end_op_desc->AddInputDesc(output_desc) != GRAPH_SUCCESS, GELOGE(FAILED, "AddInputDesc failed"); - return FAILED;) + return FAILED;) if (end_op_desc->MutableInputDesc(0) != nullptr) { ge::TensorUtils::SetRealDimCnt(*(end_op_desc->MutableInputDesc(0).get()), - static_cast(output_desc.GetShape().GetDims().size())); + static_cast(output_desc.GetShape().GetDims().size())); } else { GELOGE(GE_GRAPH_ADD_PLC_END_FAILED, "[GraphPartitioner]: pld_op_desc is null."); return FAILED; @@ -322,12 +314,12 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr GELOGW("SetInt peerIndex failed");) GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, "parentOpType", dst_node->GetType()), GELOGW("SetStr parentOpType failed");) - GE_IF_BOOL_EXEC(!end_op_desc->SetExtAttr("parentNode", dst_node), - GELOGW("SetEndExtAttr parentNode failed");) + GE_IF_BOOL_EXEC(!end_op_desc->SetExtAttr("parentNode", dst_node), GELOGW("SetEndExtAttr parentNode failed");) OpDescPtr dst_node_op_desc = dst_node->GetOpDesc(); GE_CHECK_NOTNULL(dst_node_op_desc); - GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, ATTR_NAME_END_REAR_NODE_ENGINE_NAME, - dst_node_op_desc->GetOpEngineName()), GELOGW("SetStr rearNodeEngineName failed");) + GE_IF_BOOL_EXEC( + !AttrUtils::SetStr(end_op_desc, ATTR_NAME_END_REAR_NODE_ENGINE_NAME, dst_node_op_desc->GetOpEngineName()), + GELOGW("SetStr rearNodeEngineName failed");) // replace input_desc of end with owner node's desc int output_index = ge::AnchorUtils::GetIdx(out_anchor); bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); @@ -380,13 +372,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr GELOGW("SetStr parentId failed");) GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "anchorIndex", AnchorUtils::GetIdx(out_anchor)), GELOGW("SetInt anchorIndex failed");) - GE_IF_BOOL_EXEC(!pld_op_desc->SetExtAttr("parentNode", src_node), - GELOGW("SetPldExtAttr parentNode failed");) + GE_IF_BOOL_EXEC(!pld_op_desc->SetExtAttr("parentNode", src_node), GELOGW("SetPldExtAttr parentNode failed");) OpDescPtr src_node_op_desc = src_node->GetOpDesc(); GE_CHECK_NOTNULL(src_node_op_desc); - GE_IF_BOOL_EXEC(!AttrUtils::SetStr(pld_op_desc, ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME, - src_node_op_desc->GetOpEngineName()), GELOGW("SetStr frontNodeEngineName failed");) + GE_IF_BOOL_EXEC( + !AttrUtils::SetStr(pld_op_desc, ATTR_NAME_PLD_FRONT_NODE_ENGINE_NAME, src_node_op_desc->GetOpEngineName()), + GELOGW("SetStr frontNodeEngineName failed");) // do not care over flow graph_info_.num_of_pld_end_++; // replace output_desc of pld with input node's output desc @@ -593,32 +585,30 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorSetParentNode(compute_graph->GetParentNode()); - (void) AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); - auto sgi = MakeShared(); - if (sgi == nullptr) { - GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); - return FAILED; - } - // set engine name - sgi->SetEngineName(engine_name); - // set stream label - string sub_graph_stream; - if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { - sgi->SetStreamLabel(sub_graph_stream); - } - /// for now inputFlag is the same before and after partition. It should - /// be changed according to the real partition - std::vector sub_graph_input(graph_info_.input_size_, true); - std::vector sub_graph_output(graph_info_.output_size_, true); - sgi->SetSubGraph(sub_graph); - sgi->SetOutputFlag(sub_graph_output); - sgi->SetInputFlag(sub_graph_input); - sgi->SetOutputContext(graph_info_.output_name_); - AddEndPldInformationToSubGraphInfo(sgi); - GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", - engine_name.c_str(), - sub_graph->GetName().c_str(), - sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); + (void)AttrUtils::SetStr(*sub_graph, ATTR_NAME_PARENT_GRAPH_NAME, compute_graph->GetName()); + auto sgi = MakeShared(); + if (sgi == nullptr) { + GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MakeShared sub graph info failed."); + return FAILED; + } + // set engine name + sgi->SetEngineName(engine_name); + // set stream label + string sub_graph_stream; + if (AttrUtils::GetStr(sub_graph->GetDirectNode().at(0)->GetOpDesc(), ATTR_NAME_STREAM_LABEL, sub_graph_stream)) { + sgi->SetStreamLabel(sub_graph_stream); + } + /// for now inputFlag is the same before and after partition. It should + /// be changed according to the real partition + std::vector sub_graph_input(graph_info_.input_size_, true); + std::vector sub_graph_output(graph_info_.output_size_, true); + sgi->SetSubGraph(sub_graph); + sgi->SetOutputFlag(sub_graph_output); + sgi->SetInputFlag(sub_graph_input); + sgi->SetOutputContext(graph_info_.output_name_); + AddEndPldInformationToSubGraphInfo(sgi); + GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", engine_name.c_str(), + sub_graph->GetName().c_str(), sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); if (engine_name != input_subgraph_name) { // do not add Data subGraph into SubGraphInfo output_subgraphs.push_back(sgi); } else { @@ -844,29 +834,22 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo } Status ge::GraphPartitioner::Partition(ge::ComputeGraphPtr compute_graph, Mode mode) { - ClearAllPartitionData(); - auto real_ret = SUCCESS; + graph_2_graph_partition_info_.clear(); + graph_2_subgraph_list_.clear(); auto ret = PartitionSubGraph(compute_graph, mode); if (ret != SUCCESS) { GELOGE(ret, "Sub graph partition Failed"); - real_ret = ret; + return ret; } - GE_CHECK_NOTNULL(compute_graph); // partition sub graph for (const auto &sub_graph : compute_graph->GetAllSubgraphs()) { ret = PartitionSubGraph(sub_graph, mode); if (ret != SUCCESS) { GELOGE(ret, "Sub graph partition Failed"); - real_ret = ret; + return ret; } } - if (real_ret != SUCCESS) { - auto root_graph = ge::GraphUtils::FindRootGraph(compute_graph); - GE_CHECK_NOTNULL(root_graph); - (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), - root_graph->GetGraphID()); - } - return real_ret; + return SUCCESS; } Status ge::GraphPartitioner::PartitionSubGraph(ge::ComputeGraphPtr compute_graph, Mode mode) { @@ -1054,12 +1037,4 @@ void ge::GraphPartitioner::AddEndPldInformationToSubGraphInfo(ge::SubGraphInfoPt } const Graph2SubGraphInfoList &ge::GraphPartitioner::GetSubGraphMap() { return graph_2_subgraph_list_; } - -void ge::GraphPartitioner::ClearAllPartitionData() { - graph_2_graph_partition_info_.clear(); - graph_2_subgraph_list_.clear(); - graph_2_input_subgraph_.clear(); - GELOGD("Clear all partition data success."); - return; -} } // namespace ge diff --git a/ge/graph/partition/graph_partition.h b/ge/graph/partition/graph_partition.h index 703a1570..a363bd9d 100644 --- a/ge/graph/partition/graph_partition.h +++ b/ge/graph/partition/graph_partition.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -131,7 +131,7 @@ class GraphPartitioner { Status UpdatePldOpDesc(const NodePtr &dst_node, int input_index, OpDescPtr &end_op_desc); // Clear partition data - void ClearAllPartitionData(); + void ClearAllPartitionData(Mode mode); void SetMergedGraphId(ComputeGraphPtr &output_merged_compute_graph); struct GraphPartitionInfo { diff --git a/ge/graph/partition/stage_partition.cc b/ge/graph/partition/stage_partition.cc deleted file mode 100644 index 93a06afe..00000000 --- a/ge/graph/partition/stage_partition.cc +++ /dev/null @@ -1,377 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/partition/stage_partition.h" - -#include -#include "framework/common/debug/ge_log.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/graph_utils.h" -#include "graph/utils/op_desc_utils.h" -#include "common/util.h" -#include "common/types.h" - -namespace ge { -Status StagePartitioner::Partition() { - GE_CHECK_NOTNULL(root_graph_); - if (root_graph_->GetParentGraph() != nullptr) { - return SUCCESS; - } - - for (const auto &node : root_graph_->GetDirectNode()) { - auto op_desc = node->GetOpDesc(); - uint32_t level = 0; - if (!AttrUtils::GetInt(op_desc, ATTR_STAGE_LEVEL, level)) { - continue; - } - stage_nodes_[level].insert(node); - } - if (stage_nodes_.empty()) { - GELOGI("Graph %s does not set stage_level, it is not_changed.", root_graph_->GetName().c_str()); - return SUCCESS; - } - - if (SplitStageLevel() != SUCCESS) { - GELOGE(FAILED, "Split graph-stage for graph %s failed.", root_graph_->GetName().c_str()); - return FAILED; - } - - if (StagePartition() != SUCCESS) { - GELOGE(FAILED, "Stage partition for graph %s failed.", root_graph_->GetName().c_str()); - return FAILED; - } - - if (root_graph_->TopologicalSorting() != GRAPH_SUCCESS) { - GELOGE(FAILED, "Topological sort for graph %s after stage partition failed, " - "maybe stage_level was not set correctly.", root_graph_->GetName().c_str()); - return FAILED; - } - return SUCCESS; -} - -Status StagePartitioner::SplitStageLevel() { - std::stack nodes; - std::unordered_set visited_stage_nodes; - for (auto &stage : stage_nodes_) { - uint32_t cur_stage_level = stage.first; - const auto &cur_stage_nodes = stage.second; - for (const auto &marked_node : cur_stage_nodes) { - nodes.push(marked_node); - } - visited_stage_nodes.clear(); - while (!nodes.empty()) { - auto node = nodes.top(); - nodes.pop(); - GE_CHECK_NOTNULL(node->GetOpDesc()); - if (node->GetOpDesc()->HasAttr(ATTR_STAGE_LEVEL) && (cur_stage_nodes.count(node) == 0)) { - continue; - } - for (const auto &in_node : node->GetInAllNodes()) { - if (visited_stage_nodes.count(in_node) != 0) { - continue; - } - nodes.push(in_node); - } - if (!AttrUtils::SetInt(node->GetOpDesc(), ATTR_STAGE_LEVEL, cur_stage_level)) { - GELOGE(INTERNAL_ERROR, "Set attr ATTR_STAGE_LEVEL on node %s failed.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - GELOGD("Mark stage_level node %s, stage_level=%u", node->GetName().c_str(), cur_stage_level); - visited_stage_nodes.emplace(node); - } - for (const auto &node : visited_stage_nodes) { - stage.second.insert(node); - } - } - - return SUCCESS; -} - -Status StagePartitioner::StagePartition() { - for (const auto &stage : stage_nodes_) { - StageInfo stage_info(stage.first); - FindStageIO(stage.second, stage_info); - - std::string subgraph_name = "Subgraph_Level_" + std::to_string(stage.first); - NodePtr graph_node = BuildSubgraphNode(subgraph_name, stage_info); - if (graph_node == nullptr) { - GELOGE(FAILED, "Build PartitionedCall node for stage %u failed.", stage.first); - return FAILED; - } - - ComputeGraphPtr subgraph = BuildStageGraph(graph_node, stage_info); - if (subgraph == nullptr) { - GELOGE(FAILED, "Build subgraph for stage %u failed.", stage.first); - return FAILED; - } - if (root_graph_->AddSubgraph(subgraph) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add subgraph of stage %u failed.", stage.first); - return FAILED; - } - - if ((RelinkDataEdges(graph_node, stage_info) != SUCCESS) || - (RelinkCtrlEdges(graph_node, stage_info) != SUCCESS)) { - GELOGE(FAILED, "Relink edges for stage %u failed.", stage.first); - return FAILED; - } - - for (const auto &stage_node : stage.second) { - if (GraphUtils::RemoveNodeWithoutRelink(root_graph_, stage_node) != GRAPH_SUCCESS) { - GELOGW("Remove node %s failed.", stage_node->GetName().c_str()); - } - } - } - - return SUCCESS; -} - -void StagePartitioner::FindStageIO(const std::unordered_set &stage_nodes, StageInfo &stage_info) { - for (const auto &node : stage_nodes) { - // stage nodes - stage_info.stage_nodes.emplace(node); - // in data nodes - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - if (peer_out_anchor == nullptr) { - continue; - } - if (stage_nodes.count(peer_out_anchor->GetOwnerNode()) == 0) { - stage_info.data_inputs.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor)); - } else { - stage_info.inner_data_edges.emplace_back(std::make_pair(peer_out_anchor, in_data_anchor)); - } - } - // out data nodes - std::list peer_data_anchors; - for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) { - peer_data_anchors.clear(); - for (const auto &peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { - if (stage_nodes.count(peer_in_anchor->GetOwnerNode()) == 0) { - peer_data_anchors.emplace_back(peer_in_anchor); - } - } - if (!peer_data_anchors.empty()) { - stage_info.data_outputs.emplace_back(std::make_pair(out_data_anchor, peer_data_anchors)); - } - } - // in ctrl nodes - for (const auto &in_ctrl_node : node->GetInControlNodes()) { - if (stage_nodes.count(in_ctrl_node) == 0) { - stage_info.ctrl_inputs.emplace_back(in_ctrl_node->GetOutControlAnchor(), node->GetInControlAnchor()); - } else { - stage_info.inner_ctrl_edges.emplace_back(std::make_pair(in_ctrl_node->GetOutControlAnchor(), - node->GetInControlAnchor())); - } - } - // out ctrl nodes - for (const auto &out_ctrl_node : node->GetOutControlNodes()) { - if (stage_nodes.count(out_ctrl_node) == 0) { - stage_info.ctrl_outputs.emplace_back(node->GetOutControlAnchor(), out_ctrl_node->GetInControlAnchor()); - } - } - } -} - -NodePtr StagePartitioner::BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info) { - OpDescBuilder op_desc_builder(graph_name, PARTITIONEDCALL); - size_t input_num = stage_info.data_inputs.size(); - for (size_t i = 0; i < input_num; i++) { - auto input_desc = stage_info.data_inputs[i].second->GetOwnerNode()->GetOpDesc(); - if (input_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is null, node: %s", - stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str()); - return nullptr; - } - op_desc_builder.AddInput("args" + std::to_string(i), - input_desc->GetInputDesc(stage_info.data_inputs[i].second->GetIdx())); - } - size_t output_num = stage_info.data_outputs.size(); - for (size_t i = 0; i < output_num; i++) { - auto output_desc = stage_info.data_outputs[i].first->GetOwnerNode()->GetOpDesc(); - if (output_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is null, node: %s", - stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str()); - return nullptr; - } - op_desc_builder.AddOutput("output" + std::to_string(i), - output_desc->GetOutputDesc(stage_info.data_outputs[i].first->GetIdx())); - } - - OpDescPtr op_desc = op_desc_builder.Build(); - if (op_desc == nullptr) { - GELOGE(FAILED, "Create op_desc for subgraph node failed, name:%s.", graph_name.c_str()); - return nullptr; - } - - op_desc->AddSubgraphName("f"); - op_desc->SetSubgraphInstanceName(0, graph_name); - - NodePtr subgraph_node = root_graph_->AddNode(op_desc); - if (subgraph_node == nullptr) { - GELOGE(FAILED, "Add node %s failed.", graph_name.c_str()); - return nullptr; - } - if (subgraph_node->SetOwnerComputeGraph(root_graph_) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Set owner graph for node %s failed.", subgraph_node->GetName().c_str()); - return nullptr; - } - - return subgraph_node; -} - -ComputeGraphPtr StagePartitioner::BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info) { - CompleteGraphBuilder graph_builder(subgraph_node->GetName(), false); - // Add parent node - graph_builder.SetParentNode(subgraph_node); - - // Add node - for (const auto &node : stage_info.stage_nodes) { - graph_builder.AddNode(AttrUtils::CopyOpDesc(node->GetOpDesc())); - } - - // Set Input - size_t data_input_num = stage_info.data_inputs.size(); - for (size_t i = 0; i < data_input_num; i++) { - graph_builder.SetInput(i, { stage_info.data_inputs[i].second->GetOwnerNode()->GetName() }, - { static_cast(stage_info.data_inputs[i].second->GetIdx()) }); - } - - // Add Outputs - size_t data_output_num = stage_info.data_outputs.size(); - for (uint32_t i = 0; i < data_output_num; i++) { - graph_builder.AddOutput(stage_info.data_outputs[i].first->GetOwnerNode()->GetName(), - stage_info.data_outputs[i].first->GetIdx()); - } - - // Add Data Edges - for (const auto &data_edge : stage_info.inner_data_edges) { - graph_builder.AddDataLink(data_edge.first->GetOwnerNode()->GetName(), data_edge.first->GetIdx(), - data_edge.second->GetOwnerNode()->GetName(), data_edge.second->GetIdx()); - } - - // Add Ctrl Edges - for (const auto &ctrl_edge : stage_info.inner_ctrl_edges) { - graph_builder.AddControlLink(ctrl_edge.first->GetOwnerNode()->GetName(), - ctrl_edge.second->GetOwnerNode()->GetName()); - } - - // Add Input-Mapping - std::map input_mapping; - for (size_t i = 0; i < data_input_num; i++) { - input_mapping[i] = i; - } - graph_builder.SetInputMapping(input_mapping); - - // Add outputMapping - std::map output_mapping; - for (size_t i = 0; i < data_output_num; i++) { - output_mapping[i] = i; - } - graph_builder.SetOutputMapping(output_mapping); - - graphStatus error_code = GRAPH_SUCCESS; - std::string error_msg; - ComputeGraphPtr subgraph = graph_builder.Build(error_code, error_msg); - if (subgraph == nullptr) { - GELOGE(error_code, "Build subgraph %s failed: %s.", subgraph_node->GetName().c_str(), error_msg.c_str()); - return nullptr; - } - if (!AttrUtils::SetInt(subgraph, ATTR_STAGE_LEVEL, stage_info.stage_level)) { - GELOGE(FAILED, "Set ATTR_STAGE_LEVEL on graph %s failed.", subgraph->GetName().c_str()); - return nullptr; - } - - return subgraph; -} - -Status StagePartitioner::RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) { - // in data nodes - for (size_t i = 0; i < stage_info.data_inputs.size(); i++) { - if (stage_info.data_inputs[i].first->Unlink(stage_info.data_inputs[i].second) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Remove data edge %s:%d->%s:%d failed.", - stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), - stage_info.data_inputs[i].first->GetIdx(), - stage_info.data_inputs[i].second->GetOwnerNode()->GetName().c_str(), - stage_info.data_inputs[i].second->GetIdx()); - return INTERNAL_ERROR; - } - if (stage_info.data_inputs[i].first->LinkTo(subgraph_node->GetInDataAnchor(i)) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add data edge %s:%d->%s:%zu failed.", - stage_info.data_inputs[i].first->GetOwnerNode()->GetName().c_str(), - stage_info.data_inputs[i].first->GetIdx(), - subgraph_node->GetName().c_str(), i); - return INTERNAL_ERROR; - } - } - // out data nodes - for (size_t i = 0; i < stage_info.data_outputs.size(); i++) { - const auto &out_data_anchor = subgraph_node->GetOutDataAnchor(i); - GE_CHECK_NOTNULL(out_data_anchor); - for (const auto &peer_in_anchor : stage_info.data_outputs[i].second) { - if (stage_info.data_outputs[i].first->Unlink(peer_in_anchor) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Remove data edge %s:%d->%s:%d failed.", - stage_info.data_outputs[i].first->GetOwnerNode()->GetName().c_str(), - stage_info.data_outputs[i].first->GetIdx(), - peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); - return INTERNAL_ERROR; - } - if (out_data_anchor->LinkTo(peer_in_anchor) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add data edge %s:%zu->%s:%d failed.", subgraph_node->GetName().c_str(), i, - peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); - return INTERNAL_ERROR; - } - } - } - - return SUCCESS; -} - -Status StagePartitioner::RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info) { - // in ctrl nodes - for (const auto &ctrl_input : stage_info.ctrl_inputs) { - if (ctrl_input.first->Unlink(ctrl_input.second) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Remove ctrl edge %s->%s failed.", - ctrl_input.first->GetOwnerNode()->GetName().c_str(), ctrl_input.second->GetOwnerNode()->GetName().c_str()); - return INTERNAL_ERROR; - } - if (!ctrl_input.first->IsLinkedWith(subgraph_node->GetInControlAnchor())) { - if (ctrl_input.first->LinkTo(subgraph_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add ctrl edge %s->%s failed.", - ctrl_input.first->GetOwnerNode()->GetName().c_str(), subgraph_node->GetName().c_str()); - return INTERNAL_ERROR; - } - } - } - // out ctrl nodes - for (const auto &ctrl_output : stage_info.ctrl_outputs) { - if (ctrl_output.first->Unlink(ctrl_output.second) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Remove ctrl edge %s->%s failed.", - ctrl_output.first->GetOwnerNode()->GetName().c_str(), - ctrl_output.second->GetOwnerNode()->GetName().c_str()); - return INTERNAL_ERROR; - } - if (!subgraph_node->GetOutControlAnchor()->IsLinkedWith(ctrl_output.second)) { - if (subgraph_node->GetOutControlAnchor()->LinkTo(ctrl_output.second) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Add ctrl edge %s->%s failed.", - subgraph_node->GetName().c_str(), ctrl_output.second->GetOwnerNode()->GetName().c_str()); - return INTERNAL_ERROR; - } - } - } - - return SUCCESS; -} -} // namespace ge diff --git a/ge/graph/partition/stage_partition.h b/ge/graph/partition/stage_partition.h deleted file mode 100644 index bac00e6b..00000000 --- a/ge/graph/partition/stage_partition.h +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PARTITION_STAGE_PARTITION_H_ -#define GE_GRAPH_PARTITION_STAGE_PARTITION_H_ - -#include -#include -#include -#include -#include "common/ge_inner_error_codes.h" -#include "graph/compute_graph.h" - -namespace ge { -struct StageInfo { - explicit StageInfo(uint32_t level) : stage_level(level) {} - uint32_t stage_level; - std::unordered_set stage_nodes; - std::vector> data_inputs; - std::vector>> data_outputs; - std::list> ctrl_inputs; - std::list> ctrl_outputs; - std::list> inner_data_edges; - std::list> inner_ctrl_edges; -}; - -class StagePartitioner { - public: - explicit StagePartitioner(ComputeGraphPtr graph) : root_graph_(std::move(graph)) {} - ~StagePartitioner() = default; - - Status Partition(); - - private: - Status SplitStageLevel(); - - Status StagePartition(); - - static void FindStageIO(const std::unordered_set &stage_nodes, StageInfo &stage_info); - - NodePtr BuildSubgraphNode(const std::string &graph_name, const StageInfo &stage_info); - - static ComputeGraphPtr BuildStageGraph(const NodePtr &subgraph_node, const StageInfo &stage_info); - - static Status RelinkDataEdges(const NodePtr &subgraph_node, const StageInfo &stage_info); - - static Status RelinkCtrlEdges(const NodePtr &subgraph_node, const StageInfo &stage_info); - - ComputeGraphPtr root_graph_; - std::map> stage_nodes_; -}; -} // namespace ge - -#endif // GE_GRAPH_PARTITION_STAGE_PARTITION_H_ diff --git a/ge/graph/passes/addn_pass.cc b/ge/graph/passes/addn_pass.cc index c8f820fc..c0592965 100644 --- a/ge/graph/passes/addn_pass.cc +++ b/ge/graph/passes/addn_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/addn_pass.h b/ge/graph/passes/addn_pass.h index 373d1842..dd44e3cd 100644 --- a/ge/graph/passes/addn_pass.h +++ b/ge/graph/passes/addn_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/aicpu_constant_folding_pass.cc b/ge/graph/passes/aicpu_constant_folding_pass.cc index 0331e2e6..4157b5d6 100644 --- a/ge/graph/passes/aicpu_constant_folding_pass.cc +++ b/ge/graph/passes/aicpu_constant_folding_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,10 +29,9 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/type_utils.h" #include "init/gelib.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" namespace { -const char *const kKernelLibName = "aicpu_tf_kernel"; +const char *const kKernelLibName = "aicpu_kernel"; const char *const kNotSupported = "0"; const uint64_t kReleaseFlag = 1; const uint64_t kOpsFlag = 1; @@ -315,8 +314,8 @@ Status AicpuConstantFoldingPass::LaunchSingleOpRunTask(const NodePtr &node, cons GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized"); return GE_CLI_GE_NOT_INITIALIZED; } - auto kernel_builder = OpsKernelBuilderManager::Instance().GetOpsKernelBuilder(kKernelLibName); - if (kernel_builder == nullptr) { + OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kKernelLibName); + if (kernel_info == nullptr) { GELOGE(FAILED, "Get op kernel info store failed"); return FAILED; } @@ -326,7 +325,7 @@ Status AicpuConstantFoldingPass::LaunchSingleOpRunTask(const NodePtr &node, cons aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; std::string task_info; - Status ret = kernel_builder->GenSingleOpRunTask(node, aicpu_task, task_info); + Status ret = kernel_info->GenSingleOpRunTask(node, aicpu_task, task_info); if (ret != SUCCESS) { return ret; } @@ -370,8 +369,8 @@ Status AicpuConstantFoldingPass::LaunchMemCopyTask(const vector &data_ GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized"); return GE_CLI_GE_NOT_INITIALIZED; } - auto kernel_builder = OpsKernelBuilderManager::Instance().GetOpsKernelBuilder(kKernelLibName); - if (kernel_builder == nullptr) { + OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kKernelLibName); + if (kernel_info == nullptr) { GELOGE(FAILED, "Get op kernel info store failed"); return FAILED; } @@ -381,7 +380,7 @@ Status AicpuConstantFoldingPass::LaunchMemCopyTask(const vector &data_ aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; std::string task_info; - Status ret = kernel_builder->GenMemCopyTask(data_infos.size(), aicpu_task, task_info); + Status ret = kernel_info->GenMemCopyTask(data_infos.size(), aicpu_task, task_info); if (ret != SUCCESS) { return ret; } diff --git a/ge/graph/passes/aicpu_constant_folding_pass.h b/ge/graph/passes/aicpu_constant_folding_pass.h index d584c392..02babd8e 100755 --- a/ge/graph/passes/aicpu_constant_folding_pass.h +++ b/ge/graph/passes/aicpu_constant_folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/assert_pass.cc b/ge/graph/passes/assert_pass.cc index 79f75f53..725016a9 100644 --- a/ge/graph/passes/assert_pass.cc +++ b/ge/graph/passes/assert_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/assert_pass.h b/ge/graph/passes/assert_pass.h index 7d8546f2..79955348 100755 --- a/ge/graph/passes/assert_pass.h +++ b/ge/graph/passes/assert_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_ASSERT_PASS_H_ #define GE_GRAPH_PASSES_ASSERT_PASS_H_ @@ -34,7 +33,7 @@ class AssertPass : public BaseNodePass { /// @param nodes_unused nodes to be deleted /// @return void /// - void CollectUnusedNode(const NodePtr &assert_node, std::vector& nodes_unused); + void CollectUnusedNode(const NodePtr& assert_node, std::vector& nodes_unused); /// /// remove unused nodes from graph diff --git a/ge/graph/passes/assign_pass.cc b/ge/graph/passes/assign_pass.cc index bb7a0f04..fe287f90 100644 --- a/ge/graph/passes/assign_pass.cc +++ b/ge/graph/passes/assign_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace { const uint32_t kValidInputNodeOutputNum = 1; const int32_t kAssignRefInputIndex = 0; const int32_t kAssignValueInputIndex = 1; -} +} // namespace namespace ge { Status AssignPass::Run(NodePtr &node) { diff --git a/ge/graph/passes/assign_pass.h b/ge/graph/passes/assign_pass.h index 11cf1073..d7dc5138 100644 --- a/ge/graph/passes/assign_pass.h +++ b/ge/graph/passes/assign_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/atomic_addr_clean_pass.cc b/ge/graph/passes/atomic_addr_clean_pass.cc index 690dee27..2c7fb9bb 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/ge/graph/passes/atomic_addr_clean_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,8 +50,8 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { return SUCCESS; } - bool is_unknown_graph = graph->GetGraphUnknownFlag(); - if (is_unknown_graph) { + bool is_known_graph = graph->GetGraphUnknownFlag(); + if (is_known_graph) { GELOGD("Graph[%s] is unknown graph. It will call fe interface to compile op.", graph->GetName().c_str()); GE_CHK_STATUS_RET(CompileUnknownGraphOp(atomic_node_vec)); return SUCCESS; @@ -196,7 +196,7 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { GELOGW("Get graph session_graph_id attr failed."); } if (!session_graph_id.empty()) { - (void) AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); + (void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); } string node_name = op_desc->GetName(); // Only flush subgraph name @@ -214,20 +214,18 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node) { GE_IF_BOOL_EXEC(atomic_node == nullptr || atomic_clean_node == nullptr, - DOMI_LOGE("param [atomic_node][atomic_clean_node] must not be null."); return PARAM_INVALID); + DOMI_LOGE("param [atomic_node][atomic_clean_node] must not be null."); + return PARAM_INVALID); InControlAnchorPtr in_ctrl_anchor = atomic_node->GetInControlAnchor(); OutControlAnchorPtr out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor(); if (in_ctrl_anchor == nullptr || out_ctrl_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "Get control anchor faild, dst node: %s.", - atomic_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "Get control anchor faild, dst node: %s.", atomic_node->GetName().c_str()); return INTERNAL_ERROR; } graphStatus status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor); if (status != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Graph add cleanAddrNode op out ctrl edge fail, dst node: %s.", + GELOGE(INTERNAL_ERROR, "Graph add cleanAddrNode op out ctrl edge fail, dst node: %s.", atomic_node->GetName().c_str()); return INTERNAL_ERROR; } @@ -309,7 +307,7 @@ Status AtomicAddrCleanPass::CompileUnknownGraphOp(const vector &atomic_ return ge::GE_CLI_GE_NOT_INITIALIZED; } - for (auto &atomic_node: atomic_node_vec) { + for (auto &atomic_node : atomic_node_vec) { auto op_desc = atomic_node->GetOpDesc(); if (op_desc == nullptr) { GELOGW("op desc is nullptr."); diff --git a/ge/graph/passes/atomic_addr_clean_pass.h b/ge/graph/passes/atomic_addr_clean_pass.h index ad60b7b5..e22c1792 100755 --- a/ge/graph/passes/atomic_addr_clean_pass.h +++ b/ge/graph/passes/atomic_addr_clean_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,10 +41,10 @@ class AtomicAddrCleanPass : public GraphPass { private: /** - * HandleLoopGraph - * @param graph - * @return - */ + * HandleLoopGraph + * @param graph + * @return + */ Status HandleLoopGraph(ComputeGraphPtr &graph, const vector &atomic_node_vec); /** * HandleNormalGraph @@ -84,7 +84,6 @@ class AtomicAddrCleanPass : public GraphPass { Status HandleDispersedAtomicNodes(ComputeGraphPtr &graph, const std::vector &atomic_node_vec, std::vector &common_atomic_nodes); - vector hcom_node_vec_; bool is_loop_graph_ = false; }; diff --git a/ge/graph/passes/attach_stream_label_pass.cc b/ge/graph/passes/attach_stream_label_pass.cc index b04643a4..6b718418 100644 --- a/ge/graph/passes/attach_stream_label_pass.cc +++ b/ge/graph/passes/attach_stream_label_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -266,8 +266,8 @@ Status AttachStreamLabelPass::UpdateLoopBranch(const std::stack &enter_ } std::string out_type = out_desc->GetType(); bool need_skip = - out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || - (((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) && (out_type == STREAMACTIVE)); + out_desc->HasAttr(ATTR_NAME_STREAM_LABEL) || (out_type == ENTER) || (out_type == REFENTER) || + (((cur_node->GetType() == ENTER) || (cur_node->GetType() == REFENTER)) && (out_type == STREAMACTIVE)); if (need_skip) { continue; } diff --git a/ge/graph/passes/attach_stream_label_pass.h b/ge/graph/passes/attach_stream_label_pass.h index 19f11480..28e828b5 100755 --- a/ge/graph/passes/attach_stream_label_pass.h +++ b/ge/graph/passes/attach_stream_label_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 8c808e46..4da51ab0 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,8 +134,7 @@ Status BaseNodePass::IsolateAndDeleteNode(NodePtr &node, const std::vector GELOGE(FAILED, "parameter is null."); return FAILED; } - GELOGI("Prepare to isolate and delete node, name:%s, type:%s.", node->GetName().c_str(), - node->GetType().c_str()); + GELOGI("Prepare to isolate and delete node, name:%s, type:%s.", node->GetName().c_str(), node->GetType().c_str()); ComputeGraphPtr graph = node->GetOwnerComputeGraph(); if (graph == nullptr) { GELOGE(FAILED, "[%s] The owner graph must not be null.", node->GetName().c_str()); @@ -170,9 +169,9 @@ Status GEPass::Run(const NamesToPass &names_to_passes) { if (depth_ > kMaxRecursiveDepth) { GELOGE(PARAM_INVALID, - "The pass for root graph %s will be terminated because too many nesting" - " levels(%d) of subgraphs, last subgraph is %s", - root_graph_->GetName().c_str(), depth_, graph_->GetName().c_str()); + "The pass for root graph %s will be terminated because too many nesting" + " levels(%d) of subgraphs, last subgraph is %s", + root_graph_->GetName().c_str(), depth_, graph_->GetName().c_str()); return PARAM_INVALID; } @@ -212,8 +211,8 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); if (ret != SUCCESS) { - GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", - node->GetName().c_str(), node->GetType().c_str(), ret); + GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", node->GetName().c_str(), + node->GetType().c_str(), ret); return ret; } @@ -229,8 +228,8 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { SetFlagOption(kOptimizeAfterSubGraph, names_to_passes); ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen); if (ret != SUCCESS) { - GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", - node->GetName().c_str(), node->GetType().c_str(), ret); + GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u", node->GetName().c_str(), + node->GetType().c_str(), ret); return ret; } @@ -263,8 +262,8 @@ Status GEPass::RunPassesOnSubGraph(const NodePtr &node, const NamesToPass &names for (const auto &name : sub_graph_names) { auto graph = root_graph_->GetSubgraph(name); if (graph == nullptr) { - GELOGW("Can not find the sub graph %s from node %s, the pass-process will skip it", - name.c_str(), node->GetName().c_str()); + GELOGW("Can not find the sub graph %s from node %s, the pass-process will skip it", name.c_str(), + node->GetName().c_str()); continue; } has_sub_graph = true; diff --git a/ge/graph/passes/base_pass.h b/ge/graph/passes/base_pass.h index bb41691d..6e7b292e 100644 --- a/ge/graph/passes/base_pass.h +++ b/ge/graph/passes/base_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/bitcast_pass.cc b/ge/graph/passes/bitcast_pass.cc index 8388b21a..e8e1f84f 100644 --- a/ge/graph/passes/bitcast_pass.cc +++ b/ge/graph/passes/bitcast_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,14 +56,12 @@ Status BitcastPass::Run(NodePtr &node) { } Status BitcastPass::CheckDstDataType(const OpDescPtr op_desc, ge::DataType &dst_data_type) { - if (!ge::AttrUtils::GetDataType(op_desc, kAttrNameType, dst_data_type)) { GELOGE(PARAM_INVALID, "Node failed to get attribute type."); return PARAM_INVALID; } if (dst_data_type >= ge::DT_UNDEFINED) { - GELOGE(PARAM_INVALID, "dst_data_type[%s] is not valid.", - TypeUtils::DataTypeToSerialString(dst_data_type).c_str()); + GELOGE(PARAM_INVALID, "dst_data_type[%s] is not valid.", TypeUtils::DataTypeToSerialString(dst_data_type).c_str()); return PARAM_INVALID; } @@ -91,8 +89,7 @@ Status BitcastPass::CheckOutputShape(const OpDescPtr op_desc, const ge::DataType // get origin data_type and shape ge::DataType ori_data_type = input_tensor_desc->GetDataType(); if (ori_data_type >= ge::DT_UNDEFINED) { - GELOGE(PARAM_INVALID, "ori_data_type[%s] is not valid.", - TypeUtils::DataTypeToSerialString(ori_data_type).c_str()); + GELOGE(PARAM_INVALID, "ori_data_type[%s] is not valid.", TypeUtils::DataTypeToSerialString(ori_data_type).c_str()); return PARAM_INVALID; } @@ -148,4 +145,4 @@ Status BitcastPass::CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::Data return SUCCESS; } -} // namespace ge +} // namespace ge diff --git a/ge/graph/passes/bitcast_pass.h b/ge/graph/passes/bitcast_pass.h index 34acaf57..4a9e2e1b 100644 --- a/ge/graph/passes/bitcast_pass.h +++ b/ge/graph/passes/bitcast_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,7 @@ class BitcastPass : public BaseNodePass { private: Status CheckDstDataType(const OpDescPtr op_desc, ge::DataType &dst_data_type); Status CheckOutputShape(const OpDescPtr op_desc, const ge::DataType dst_data_type); - Status CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::DataType ori_data_type, - ge::DataType dst_data_type); + Status CalcAndUpdateShape(BitcastPass::kVecInt64 &dim_vec, ge::DataType ori_data_type, ge::DataType dst_data_type); }; } // namespace ge diff --git a/ge/graph/passes/cast_remove_pass.cc b/ge/graph/passes/cast_remove_pass.cc index 62c92866..ab4f2098 100644 --- a/ge/graph/passes/cast_remove_pass.cc +++ b/ge/graph/passes/cast_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_remove_pass.h b/ge/graph/passes/cast_remove_pass.h index 0ee52998..67fa697e 100644 --- a/ge/graph/passes/cast_remove_pass.h +++ b/ge/graph/passes/cast_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cast_translate_pass.cc b/ge/graph/passes/cast_translate_pass.cc index 01b5c96b..ee67e93d 100644 --- a/ge/graph/passes/cast_translate_pass.cc +++ b/ge/graph/passes/cast_translate_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,15 +52,13 @@ bool CastTranslatePass::CheckInAndOutDataAnchor(NodePtr &node) const { bool CastTranslatePass::IsCastNode(NodePtr &node) const { std::string original_type; - GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, - GELOGW("get original type failed"); return false); + GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, GELOGW("get original type failed"); return false); return (original_type == CAST); } bool CastTranslatePass::IsTranslateNode(NodePtr &node) const { std::string original_type; - GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, - GELOGW("get original type failed"); return false); + GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, GELOGW("get original type failed"); return false); return (original_type == TRANSLATE); } @@ -178,13 +176,14 @@ bool CastTranslatePass::IsOpSupportedOptimize(NodePtr &cast_node, NodePtr &trans } if (is_src_cast) { - GE_IF_BOOL_EXEC( - !AttrUtils::SetInt(trans_op_desc, ATTR_NAME_INPUT_DATATYPE, static_cast(cast_in_datatype)), - GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); return false); + GE_IF_BOOL_EXEC(!AttrUtils::SetInt(trans_op_desc, ATTR_NAME_INPUT_DATATYPE, static_cast(cast_in_datatype)), + GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); + return false); } else { GE_IF_BOOL_EXEC( - !AttrUtils::SetInt(trans_op_desc, ATTR_NAME_OUTPUT_DATATYPE, static_cast(cast_out_datatype)), - GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); return false); + !AttrUtils::SetInt(trans_op_desc, ATTR_NAME_OUTPUT_DATATYPE, static_cast(cast_out_datatype)), + GELOGW("set ATTR_NAME_INPUT_DATATYPE failed"); + return false); } GELOGI("CastTranslatePass, translate in %d out %d.", trans_op_indesc->GetDataType(), trans_op_outdesc->GetDataType()); return true; diff --git a/ge/graph/passes/cast_translate_pass.h b/ge/graph/passes/cast_translate_pass.h index 04c03d42..a802fe9e 100755 --- a/ge/graph/passes/cast_translate_pass.h +++ b/ge/graph/passes/cast_translate_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index a4662d5d..4415d144 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "common_subexpression_elimination_pass.h" #include @@ -34,9 +35,7 @@ std::string GetCseKey(const NodePtr &node) { if (src_anchor == nullptr) { ss << in_anchor->GetIdx() << "-null-"; } else { - ss << in_anchor->GetIdx() << "-" - << src_anchor->GetOwnerNode()->GetName() << "-" - << src_anchor->GetIdx() << "-"; + ss << in_anchor->GetIdx() << "-" << src_anchor->GetOwnerNode()->GetName() << "-" << src_anchor->GetIdx() << "-"; } } @@ -75,13 +74,13 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { bool is_unknown = false; auto ret = NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown); if (ret != GRAPH_SUCCESS) { - GELOGW("Get node unknown status failed, node name:%s, type:%s.", - node->GetName().c_str(), node->GetType().c_str()); + GELOGW("Get node unknown status failed, node name:%s, type:%s.", node->GetName().c_str(), + node->GetType().c_str()); continue; } if (is_unknown) { - GELOGI("Current node %s, type %s is unknown shape which should be skip.", - node->GetName().c_str(), node->GetType().c_str()); + GELOGI("Current node %s, type %s is unknown shape which should be skip.", node->GetName().c_str(), + node->GetType().c_str()); continue; } auto key = GetCseKey(node); @@ -94,7 +93,7 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { if (node->GetAllOutDataAnchorsSize() != iter->second->GetAllOutDataAnchorsSize()) { GELOGW("The node %s and %s have the same CSE key, but different output anchor count, skip to fusion them", - iter->second->GetName().c_str(), node->GetName().c_str()); + iter->second->GetName().c_str(), node->GetName().c_str()); continue; } @@ -105,8 +104,8 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { ret = GraphUtils::ReplaceNodeAnchors(iter->second, node, {}, output_map); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s error node %u", - node->GetName().c_str(), iter->second->GetName().c_str(), ret); + GELOGE(INTERNAL_ERROR, "Failed to replace node %s by node %s error node %u", node->GetName().c_str(), + iter->second->GetName().c_str(), ret); return INTERNAL_ERROR; } @@ -118,11 +117,9 @@ Status CommonSubexpressionEliminationPass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } - GELOGI("Remove node %s by the CSE process, replace it with node %s", - node->GetName().c_str(), iter->second->GetName().c_str()); + GELOGI("Remove node %s by the CSE process, replace it with node %s", node->GetName().c_str(), + iter->second->GetName().c_str()); } return SUCCESS; } } // namespace ge - - diff --git a/ge/graph/passes/common_subexpression_elimination_pass.h b/ge/graph/passes/common_subexpression_elimination_pass.h index 83bfbace..b5aecf6b 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.h +++ b/ge/graph/passes/common_subexpression_elimination_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ #define GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ @@ -22,7 +23,7 @@ namespace ge { class CommonSubexpressionEliminationPass : public GraphPass { public: - Status Run(ge::ComputeGraphPtr graph) override ; + Status Run(ge::ComputeGraphPtr graph) override; }; } // namespace ge -#endif //GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ +#endif // GE_COMMON_SUBEXPRESSION_ELIMINATION_H_ diff --git a/ge/graph/passes/compile_nodes_pass.cc b/ge/graph/passes/compile_nodes_pass.cc index 9faa35ae..a93671c7 100755 --- a/ge/graph/passes/compile_nodes_pass.cc +++ b/ge/graph/passes/compile_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/compile_nodes_pass.h" #include @@ -29,7 +30,7 @@ using domi::ImplyType; namespace { const char *const kAICPUEngineName = "DNN_VM_AICPU"; -const char *const kAICPUKernelLibName = "aicpu_tf_kernel"; +const char *const kAICPUKernelLibName = "aicpu_kernel"; } // namespace namespace ge { diff --git a/ge/graph/passes/compile_nodes_pass.h b/ge/graph/passes/compile_nodes_pass.h index e2fb59c2..70f8cbf5 100644 --- a/ge/graph/passes/compile_nodes_pass.h +++ b/ge/graph/passes/compile_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/cond_pass.cc b/ge/graph/passes/cond_pass.cc index a2d77a1b..c3a421b1 100644 --- a/ge/graph/passes/cond_pass.cc +++ b/ge/graph/passes/cond_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/cond_pass.h" #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" @@ -20,9 +21,9 @@ #include "graph/utils/node_utils.h" namespace { - const std::string kStringLength = "StringLength"; - const size_t kScalarDimNum = 1; -} +const std::string kStringLength = "StringLength"; +const size_t kScalarDimNum = 1; +} // namespace namespace ge { Status CondPass::Run(NodePtr &node) { @@ -171,8 +172,8 @@ Status CondPass::GetCondInfoForWhile(const NodePtr &node, ComputeGraphPtr &graph // cond_graph has and only has one output uint32_t output_num = net_output_node->GetAllInDataAnchorsSize(); if (output_num != 1) { - GELOGE(FAILED, "output size of cond_graph is invalid, expect 1 but %u exactly, while_node:%s.", - output_num, node->GetName().c_str()); + GELOGE(FAILED, "output size of cond_graph is invalid, expect 1 but %u exactly, while_node:%s.", output_num, + node->GetName().c_str()); return FAILED; } @@ -233,10 +234,9 @@ Status CondPass::HandleScalarCond(const ComputeGraphPtr &graph, const OutDataAnc return FAILED; } - if (GraphUtils::InsertNodeAfter(out_anchor, { in_anchor }, cast_node) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Insert Cast node %s between %s->%s failed.", - cast_node->GetName().c_str(), out_anchor->GetOwnerNode()->GetName().c_str(), - in_anchor->GetOwnerNode()->GetName().c_str()); + if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, cast_node) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Insert Cast node %s between %s->%s failed.", cast_node->GetName().c_str(), + out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); return FAILED; } @@ -279,10 +279,9 @@ Status CondPass::InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr } AddRePassNode(new_node); - if (GraphUtils::InsertNodeAfter(out_anchor, { in_anchor }, new_node) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Insert %s node %s between %s->%s failed.", type.c_str(), - new_node->GetName().c_str(), out_anchor->GetOwnerNode()->GetName().c_str(), - in_anchor->GetOwnerNode()->GetName().c_str()); + if (GraphUtils::InsertNodeAfter(out_anchor, {in_anchor}, new_node) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Insert %s node %s between %s->%s failed.", type.c_str(), new_node->GetName().c_str(), + out_anchor->GetOwnerNode()->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str()); return FAILED; } @@ -314,8 +313,7 @@ NodePtr CondPass::AddCastNode(const ComputeGraphPtr &graph, const std::string &n GELOGE(FAILED, "Create cast op_desc failed, name: %s.", name.c_str()); return nullptr; } - if (!(AttrUtils::SetInt(cast_desc, CAST_ATTR_SRCT, src) && - AttrUtils::SetInt(cast_desc, CAST_ATTR_DSTT, dst) && + if (!(AttrUtils::SetInt(cast_desc, CAST_ATTR_SRCT, src) && AttrUtils::SetInt(cast_desc, CAST_ATTR_DSTT, dst) && AttrUtils::SetInt(cast_desc, CAST_ATTR_DST_TYPE, dst) && AttrUtils::SetBool(cast_desc, CAST_ATTR_TRUNCATE, false))) { GELOGE(FAILED, "Set CAST_ATTR failed, node: %s.", name.c_str()); diff --git a/ge/graph/passes/cond_pass.h b/ge/graph/passes/cond_pass.h index 5c0c83bc..fead8474 100644 --- a/ge/graph/passes/cond_pass.h +++ b/ge/graph/passes/cond_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_COND_PASS_H #define GE_GRAPH_PASSES_COND_PASS_H @@ -33,7 +34,7 @@ class CondPass : public BaseNodePass { /// @return Status /// static Status GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, OutDataAnchorPtr &cond_out_anchor, - InDataAnchorPtr &cond_in_anchor); + InDataAnchorPtr &cond_in_anchor); /// /// @brief Get cond info for if node @@ -44,7 +45,7 @@ class CondPass : public BaseNodePass { /// @return Status /// static Status GetCondInfoForIf(const NodePtr &node, ComputeGraphPtr &graph, OutDataAnchorPtr &cond_out_anchor, - InDataAnchorPtr &cond_in_anchor); + InDataAnchorPtr &cond_in_anchor); /// /// @brief Get cond info for while node @@ -55,7 +56,7 @@ class CondPass : public BaseNodePass { /// @return Status /// static Status GetCondInfoForWhile(const NodePtr &node, ComputeGraphPtr &graph, OutDataAnchorPtr &cond_out_anchor, - InDataAnchorPtr &cond_in_anchor); + InDataAnchorPtr &cond_in_anchor); /// /// @brief Process Cond Op with non-scalar cond_input @@ -96,8 +97,8 @@ class CondPass : public BaseNodePass { /// @param [in] type /// @return Status /// - Status InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, - const InDataAnchorPtr &in_anchor, const std::string &type); + Status InsertNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor, const InDataAnchorPtr &in_anchor, + const std::string &type); /// /// @brief Add cast node @@ -108,8 +109,8 @@ class CondPass : public BaseNodePass { /// @param [in] dst /// @return NodePtr /// - NodePtr AddCastNode(const ComputeGraphPtr &graph, const std::string &name, const GeTensorDesc &tensor, - DataType src, DataType dst); + NodePtr AddCastNode(const ComputeGraphPtr &graph, const std::string &name, const GeTensorDesc &tensor, DataType src, + DataType dst); }; } // namespace ge -#endif //GE_GRAPH_PASSES_COND_PASS_H +#endif // GE_GRAPH_PASSES_COND_PASS_H diff --git a/ge/graph/passes/cond_remove_pass.cc b/ge/graph/passes/cond_remove_pass.cc index ec26ba3e..1650be92 100644 --- a/ge/graph/passes/cond_remove_pass.cc +++ b/ge/graph/passes/cond_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/cond_remove_pass.h" #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" @@ -28,7 +29,7 @@ const uint32_t kFalseIndex = 0; /// Extra 1 byte store '\0' const int32_t kStrHeadLen = 9; const int32_t kInvalidRetVal = -1; -} +} // namespace namespace ge { Status CondRemovePass::Run(NodePtr &node) { @@ -228,17 +229,16 @@ Status CondRemovePass::ReplaceIfCaseNodeWithPartitioncall(const NodePtr &node, c const auto &output_desc_size = node->GetOpDesc()->GetOutputsSize(); // Create subgraph opdesc & node auto partitioncall_opdesc = - CreateSubgraphOpDesc(save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); + CreateSubgraphOpDesc(save_branch->GetName(), input_desc_size - kConditionIndexNum, output_desc_size); auto partitioncall_node = node->GetOwnerComputeGraph()->AddNode(partitioncall_opdesc); // Link node's peerout anchors to new node's inanchors for (const auto &input_anchor : node->GetAllInAnchors()) { for (const auto &peerout_anchor : input_anchor->GetPeerAnchors()) { if (GraphUtils::AddEdge(peerout_anchor, partitioncall_node->GetInAnchor( - input_anchor->GetIdx() - kConditionIndexNum)) != ge::GRAPH_SUCCESS) { + input_anchor->GetIdx() - kConditionIndexNum)) != ge::GRAPH_SUCCESS) { GELOGE(FAILED, "Add edge failed, from node:%s idx:%d to node:%s idx:%d, input num:%d, output num:%d", peerout_anchor->GetOwnerNode()->GetName().c_str(), peerout_anchor->GetIdx(), - partitioncall_node->GetName().c_str(), input_anchor->GetIdx(), input_desc_size, - output_desc_size); + partitioncall_node->GetName().c_str(), input_anchor->GetIdx(), input_desc_size, output_desc_size); return FAILED; } } @@ -332,4 +332,4 @@ Status CondRemovePass::GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, return SUCCESS; } -} +} // namespace ge diff --git a/ge/graph/passes/cond_remove_pass.h b/ge/graph/passes/cond_remove_pass.h index 72ca64b8..69dd7195 100644 --- a/ge/graph/passes/cond_remove_pass.h +++ b/ge/graph/passes/cond_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_COND_REMOVE_PASS_H #define GE_GRAPH_PASSES_COND_REMOVE_PASS_H diff --git a/ge/graph/passes/constant_folding_pass.cc b/ge/graph/passes/constant_folding_pass.cc index 4db14fc3..80bf7867 100644 --- a/ge/graph/passes/constant_folding_pass.cc +++ b/ge/graph/passes/constant_folding_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,46 +17,27 @@ #include "graph/passes/constant_folding_pass.h" #include + +#include "common/debug/log.h" +#include "common/types.h" +#include "framework/common/debug/ge_log.h" #include "graph/operator_factory.h" +#include "graph/utils/attr_utils.h" #include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" #include "graph/utils/type_utils.h" -#include "init/gelib.h" +#include "inc/kernel.h" namespace ge { const int64_t kStartCallNum = 1; -const std::string kKernelLibName = "aicpu_tf_kernel"; -// tf_kernel.json opsFlag config -const std::string kOpsFlagClose = "0"; - -Status RunOpKernelWithCheck(NodePtr &node, - const vector &inputs, - std::vector &outputs) { - std::shared_ptr instance_ptr = ge::GELib::GetInstance(); - if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized or is finalized."); - return UNSUPPORTED; - } - OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kKernelLibName); - if (kernel_info == nullptr) { - GELOGE(FAILED, "Get op kernel info store %s failed", kKernelLibName.c_str()); - return UNSUPPORTED; - } - - std::string ops_flag; - kernel_info->opsFlagCheck(*node, ops_flag); - if (ops_flag == kOpsFlagClose) { - return UNSUPPORTED; - } - return FoldingPass::RunOpKernel(node, inputs, outputs); -} const std::unordered_map> - &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { + &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { return statistic_of_ge_constant_folding_; } const std::unordered_map> - &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { + &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { return statistic_of_op_constant_folding_; } @@ -82,8 +63,8 @@ Status ConstantFoldingPass::Run(ge::NodePtr &node) { auto inputs = OpDescUtils::GetInputData(input_nodes); vector outputs; // Statistic of ge constant folding kernel - uint64_t start_time = GetCurrentTimestamp(); - auto ret = RunOpKernelWithCheck(node, inputs, outputs); + uint64_t start_time = GetCurrentTimestap(); + auto ret = RunOpKernel(node, inputs, outputs); if (ret != SUCCESS) { auto op_kernel = folding_pass::GetKernelByType(node); if (op_kernel == nullptr) { @@ -93,9 +74,9 @@ Status ConstantFoldingPass::Run(ge::NodePtr &node) { } // Statistic of op and fe constant folding kernel - start_time = GetCurrentTimestamp(); + start_time = GetCurrentTimestap(); ret = op_kernel->Compute(node_desc, inputs, outputs); - uint64_t cost_time = GetCurrentTimestamp() - start_time; + uint64_t cost_time = GetCurrentTimestap() - start_time; if (statistic_of_ge_constant_folding_.find(node->GetType()) != statistic_of_ge_constant_folding_.end()) { uint64_t &cnt = statistic_of_ge_constant_folding_[node->GetType()].first; uint64_t &cur_cost_time = statistic_of_ge_constant_folding_[node->GetType()].second; @@ -119,10 +100,10 @@ Status ConstantFoldingPass::Run(ge::NodePtr &node) { uint64_t &cnt = statistic_of_op_constant_folding_[node->GetType()].first; uint64_t &cost_time = statistic_of_op_constant_folding_[node->GetType()].second; cnt++; - cost_time += GetCurrentTimestamp() - start_time; + cost_time += GetCurrentTimestap() - start_time; } else { statistic_of_op_constant_folding_[node->GetType()] = - std::pair(kStartCallNum, GetCurrentTimestamp() - start_time); + std::pair(kStartCallNum, GetCurrentTimestap() - start_time); } } diff --git a/ge/graph/passes/constant_folding_pass.h b/ge/graph/passes/constant_folding_pass.h index c977157e..683b66f1 100644 --- a/ge/graph/passes/constant_folding_pass.h +++ b/ge/graph/passes/constant_folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ class ConstantFoldingPass : public FoldingPass { Status Run(ge::NodePtr &node) override; const std::unordered_map> &GetGeConstantFoldingPerfStatistic() const; const std::unordered_map> &GetOpConstantFoldingPerfStatistic() const; + private: std::unordered_map> statistic_of_op_constant_folding_; std::unordered_map> statistic_of_ge_constant_folding_; diff --git a/ge/graph/passes/constant_fuse_same_pass.cc b/ge/graph/passes/constant_fuse_same_pass.cc index d0970c59..4197f429 100644 --- a/ge/graph/passes/constant_fuse_same_pass.cc +++ b/ge/graph/passes/constant_fuse_same_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -132,11 +132,11 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, fuse_nodes[map_key].emplace_back(node); GELOGD("ConstantFuseSamePass, format %s, datatype %s, data_size %d, shape_size %zu. node name %s", TypeUtils::FormatToSerialString(map_key.format).c_str(), - TypeUtils::DataTypeToSerialString(map_key.data_type).c_str(), - map_key.data_size, map_key.shape.size(), node->GetName().c_str()); + TypeUtils::DataTypeToSerialString(map_key.data_type).c_str(), map_key.data_size, map_key.shape.size(), + node->GetName().c_str()); } - GELOGI("ConstantFuseSamePass, total_const_nums %d, insert_const_nums %d, fuse_nodes size is %zu.", - total_const_nums, insert_const_nums, fuse_nodes.size()); + GELOGI("ConstantFuseSamePass, total_const_nums %d, insert_const_nums %d, fuse_nodes size is %zu.", total_const_nums, + insert_const_nums, fuse_nodes.size()); } Status ConstantFuseSamePass::MoveOutDataEdges(NodePtr &src_node, NodePtr &dst_node) { diff --git a/ge/graph/passes/constant_fuse_same_pass.h b/ge/graph/passes/constant_fuse_same_pass.h index 4935da84..fffb784c 100755 --- a/ge/graph/passes/constant_fuse_same_pass.h +++ b/ge/graph/passes/constant_fuse_same_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ struct SameConstKey { std::vector shape; public: - bool operator< (const SameConstKey &key) const { + bool operator<(const SameConstKey &key) const { if (data_size != key.data_size) { return data_size < key.data_size; } @@ -66,11 +66,9 @@ class ConstantFuseSamePass : public GraphPass { Status Run(ge::ComputeGraphPtr graph) override; private: - void GetFuseConstNodes(ComputeGraphPtr &graph, - std::map> &fuse_nodes); + void GetFuseConstNodes(ComputeGraphPtr &graph, std::map> &fuse_nodes); Status MoveOutDataEdges(NodePtr &src_node, NodePtr &dst_node); - Status FuseConstNodes(ComputeGraphPtr &graph, - std::map> &fuse_nodes); + Status FuseConstNodes(ComputeGraphPtr &graph, std::map> &fuse_nodes); }; -} // namespace ge -#endif // GE_GRAPH_PASSES_CONSTANT_FUSE_SAME_PASS_H_ +} // namespace ge +#endif // GE_GRAPH_PASSES_CONSTANT_FUSE_SAME_PASS_H_ diff --git a/ge/graph/passes/control_trigger_pass.cc b/ge/graph/passes/control_trigger_pass.cc index e179c64e..0c00d553 100644 --- a/ge/graph/passes/control_trigger_pass.cc +++ b/ge/graph/passes/control_trigger_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/control_trigger_pass.h b/ge/graph/passes/control_trigger_pass.h index 03ddbbd2..44d11cad 100755 --- a/ge/graph/passes/control_trigger_pass.h +++ b/ge/graph/passes/control_trigger_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,15 +25,7 @@ #include "inc/graph_pass.h" namespace ge { -enum ControlNodeType { - kNotControlOp, - kCondSwitch, - kCondMerge, - kLoopSwitchT, - kLoopSwitchF, - kEnter, - kInvalidType -}; +enum ControlNodeType { kNotControlOp, kCondSwitch, kCondMerge, kLoopSwitchT, kLoopSwitchF, kEnter, kInvalidType }; class ControlTriggerPass : public GraphPass { public: diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.cc b/ge/graph/passes/ctrl_edge_transfer_pass.cc index f53dc7be..9454c00d 100755 --- a/ge/graph/passes/ctrl_edge_transfer_pass.cc +++ b/ge/graph/passes/ctrl_edge_transfer_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,6 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/util.h" #include "graph/utils/graph_utils.h" -#include "graph/debug/ge_attr_define.h" namespace ge { /* Pass Explaination: @@ -43,12 +42,6 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { GELOGD("CtrlEdgeTransferPass start running"); GE_CHECK_NOTNULL(graph); - bool is_dynamic_shape = false; - (void)AttrUtils::GetBool(graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); - if (!is_dynamic_shape) { - return SUCCESS; - } - for (ge::NodePtr &n : graph->GetDirectNode()) { auto op_desc = n->GetOpDesc(); if (op_desc == nullptr) { @@ -65,14 +58,15 @@ Status CtrlEdgeTransferPass::Run(ge::ComputeGraphPtr graph) { for (auto &in_control_node : n->GetInControlNodes()) { GE_CHECK_NOTNULL(in_control_node); - GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), - n->GetInControlAnchor()), "remove edge failed"); + GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(), n->GetInControlAnchor()), + "remove edge failed"); for (auto &out_node : n->GetOutNodes()) { if (out_node == nullptr) { continue; } - GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), - out_node->GetInControlAnchor()), "add edge failed."); + GE_CHK_STATUS_RET( + ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(), out_node->GetInControlAnchor()), + "add edge failed."); } } } diff --git a/ge/graph/passes/ctrl_edge_transfer_pass.h b/ge/graph/passes/ctrl_edge_transfer_pass.h index 1b6a624c..ee981012 100755 --- a/ge/graph/passes/ctrl_edge_transfer_pass.h +++ b/ge/graph/passes/ctrl_edge_transfer_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_CTRL_EDGE_TRANSFER_PASS_H_ #define GE_GRAPH_PASSES_CTRL_EDGE_TRANSFER_PASS_H_ diff --git a/ge/graph/passes/data_pass.cc b/ge/graph/passes/data_pass.cc index 38688848..517e7737 100644 --- a/ge/graph/passes/data_pass.cc +++ b/ge/graph/passes/data_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ namespace ge { Status DataPass::Run(ComputeGraphPtr compute_graph) { GE_CHECK_NOTNULL(compute_graph); - if (compute_graph->GetParentNode() == nullptr) { // for subgraph post process. + if (compute_graph->GetParentNode() == nullptr) { // for subgraph post process. return SUCCESS; } @@ -34,10 +34,10 @@ Status DataPass::Run(ComputeGraphPtr compute_graph) { if (node->GetType() == DATA) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - break; // parent_index not set, Graph from IR. + break; // parent_index not set, Graph from IR. } - return SUCCESS; // Graph from Parser. + return SUCCESS; // Graph from Parser. } } @@ -65,16 +65,16 @@ Status DataPass::Run(ComputeGraphPtr compute_graph) { auto post_func = domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(parent_node->GetType()); if (post_func == nullptr) { - GELOGW("The subgraph post func for node %s type %s is null.", - parent_node->GetName().c_str(), parent_node->GetType().c_str()); + GELOGW("The subgraph post func for node %s type %s is null.", parent_node->GetName().c_str(), + parent_node->GetType().c_str()); return SUCCESS; } auto graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); auto ret = post_func(subgraph_name, graph); if (ret != SUCCESS) { - GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", - graph.GetName().c_str(), parent_node->GetName().c_str(), parent_node->GetType().c_str()); + GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", graph.GetName().c_str(), + parent_node->GetName().c_str(), parent_node->GetType().c_str()); return FAILED; } diff --git a/ge/graph/passes/data_pass.h b/ge/graph/passes/data_pass.h index bce2fd5a..1f6d0f0b 100644 --- a/ge/graph/passes/data_pass.h +++ b/ge/graph/passes/data_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dimension_adjust_pass.cc b/ge/graph/passes/dimension_adjust_pass.cc index fc5fe69f..a734ddc3 100755 --- a/ge/graph/passes/dimension_adjust_pass.cc +++ b/ge/graph/passes/dimension_adjust_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,8 +58,8 @@ Status DimensionAdjustPass::Run(ge::NodePtr &node) { return INTERNAL_ERROR; } if (is_unknown) { - GELOGI("Current node %s, type %s is unknown shape which should be skip.", - node->GetName().c_str(), node->GetType().c_str()); + GELOGI("Current node %s, type %s is unknown shape which should be skip.", node->GetName().c_str(), + node->GetType().c_str()); return SUCCESS; } diff --git a/ge/graph/passes/dimension_adjust_pass.h b/ge/graph/passes/dimension_adjust_pass.h index 685d9694..fa9d2320 100755 --- a/ge/graph/passes/dimension_adjust_pass.h +++ b/ge/graph/passes/dimension_adjust_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dimension_compute_pass.cc b/ge/graph/passes/dimension_compute_pass.cc index dfa2d404..a429e69d 100755 --- a/ge/graph/passes/dimension_compute_pass.cc +++ b/ge/graph/passes/dimension_compute_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #include "graph/passes/dimension_compute_pass.h" #include diff --git a/ge/graph/passes/dimension_compute_pass.h b/ge/graph/passes/dimension_compute_pass.h index ba1a057c..40110757 100644 --- a/ge/graph/passes/dimension_compute_pass.h +++ b/ge/graph/passes/dimension_compute_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dropout_pass.cc b/ge/graph/passes/dropout_pass.cc index 09c297a6..ab88aa23 100644 --- a/ge/graph/passes/dropout_pass.cc +++ b/ge/graph/passes/dropout_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/dropout_pass.h b/ge/graph/passes/dropout_pass.h index f127224e..506ee5d6 100755 --- a/ge/graph/passes/dropout_pass.h +++ b/ge/graph/passes/dropout_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/end_of_sequence_add_control_pass.cc b/ge/graph/passes/end_of_sequence_add_control_pass.cc index d6503d0d..90c0841c 100755 --- a/ge/graph/passes/end_of_sequence_add_control_pass.cc +++ b/ge/graph/passes/end_of_sequence_add_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/end_of_sequence_add_control_pass.h b/ge/graph/passes/end_of_sequence_add_control_pass.h index dcc65848..2540a988 100644 --- a/ge/graph/passes/end_of_sequence_add_control_pass.h +++ b/ge/graph/passes/end_of_sequence_add_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,22 +33,22 @@ class EndOfSequenceAddControlPass : public GraphPass { private: /** - * Get EndOfSequence node in graph, nullptr if not exist. - * @param graph - * @return EndOfSequence node - */ + * Get EndOfSequence node in graph, nullptr if not exist. + * @param graph + * @return EndOfSequence node + */ inline NodePtr GetEndOfSequence(const ComputeGraphPtr &graph) const; /** - * Check whether this node is a data-like node. - * @param node - * @return - */ + * Check whether this node is a data-like node. + * @param node + * @return + */ bool IsDataLikeNode(const NodePtr &node); /** - * Check whether this node is a data-like node. - * @param node - * @return - */ + * Check whether this node is a data-like node. + * @param node + * @return + */ Status AddControlEdge(NodePtr &end_of_sequence, std::vector &target_nodes); }; } // namespace ge diff --git a/ge/graph/passes/enter_pass.cc b/ge/graph/passes/enter_pass.cc index 206d271c..ad3d78fc 100644 --- a/ge/graph/passes/enter_pass.cc +++ b/ge/graph/passes/enter_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/enter_pass.h b/ge/graph/passes/enter_pass.h index 677516ff..73702c38 100644 --- a/ge/graph/passes/enter_pass.h +++ b/ge/graph/passes/enter_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index ce114d86..430cf86d 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,22 +84,6 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { return graph_change ? SUCCESS : NOT_CHANGED; } -bool FlowCtrlPass::CheckMultiDataSet(ComputeGraphPtr &compute_graph) { - int data_set_num = 0; - for (auto &node : compute_graph->GetDirectNode()) { - if (node == nullptr) { - continue; - } - string type; - bool is_found = AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type); - if (is_found && type == "IteratorV2") { - data_set_num++; - } - } - GELOGI("The ComputeGraph contain %d dataSet.", data_set_num); - return (data_set_num > 1) ? true : false; -} - NodePtr FlowCtrlPass::InsertOp(ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, const std::vector &input_list, const std::vector &output_list) { @@ -138,10 +122,11 @@ NodePtr FlowCtrlPass::InsertOp(ComputeGraphPtr &compute_graph, const string &nod NodePtr FlowCtrlPass::InsertStreamSwitchOp(ComputeGraphPtr &compute_graph, const string &switch_name, const NodePtr &loop_cond, const NodePtr &iter_per_loop) { - GE_IF_BOOL_EXEC(loop_cond == nullptr || loop_cond->GetOpDesc() == nullptr, - GELOGE(FAILED, "loop_cond is null"); return nullptr); + GE_IF_BOOL_EXEC(loop_cond == nullptr || loop_cond->GetOpDesc() == nullptr, GELOGE(FAILED, "loop_cond is null"); + return nullptr); GE_IF_BOOL_EXEC(iter_per_loop == nullptr || iter_per_loop->GetOpDesc() == nullptr, - GELOGE(FAILED, "iter_per_loop is nullptr"); return nullptr); + GELOGE(FAILED, "iter_per_loop is nullptr"); + return nullptr); std::vector input_desc_list = {loop_cond->GetOpDesc()->GetOutputDesc(0), iter_per_loop->GetOpDesc()->GetOutputDesc(0)}; std::vector output_desc_list; @@ -166,9 +151,10 @@ NodePtr FlowCtrlPass::InsertStreamSwitchOp(ComputeGraphPtr &compute_graph, const } // stream switch op need switch cond by attr. - GE_IF_BOOL_EXEC(!AttrUtils::SetInt(stream_switch->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, - static_cast(RT_LESS)), - DOMI_LOGE("set ATTR_NAME_STREAM_SWITCH_COND failed"); return nullptr); + GE_IF_BOOL_EXEC( + !AttrUtils::SetInt(stream_switch->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_COND, static_cast(RT_LESS)), + DOMI_LOGE("set ATTR_NAME_STREAM_SWITCH_COND failed"); + return nullptr); return stream_switch; } @@ -218,8 +204,7 @@ Status FlowCtrlPass::AddGlobalStepVariableNode(ComputeGraphPtr &compute_graph) { GeTensorDesc tensor_desc(GeShape({1}), FORMAT_ND, DT_UINT64); std::vector input_desc_list = {}; std::vector output_desc_list = {tensor_desc}; - NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, - input_desc_list, output_desc_list); + NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, input_desc_list, output_desc_list); if (global_step == nullptr) { GELOGE(FAILED, "Add global_step node failed, global_step is null."); return FAILED; @@ -237,8 +222,8 @@ Status FlowCtrlPass::AddGlobalStepVariableNode(ComputeGraphPtr &compute_graph) { NodePtr FlowCtrlPass::InsertAssignOp(ge::ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, const NodePtr &ref_node, const NodePtr &value_node) { - GE_IF_BOOL_EXEC(ref_node == nullptr || value_node == nullptr || - ref_node->GetOpDesc() == nullptr || value_node->GetOpDesc() == nullptr, + GE_IF_BOOL_EXEC(ref_node == nullptr || value_node == nullptr || ref_node->GetOpDesc() == nullptr || + value_node->GetOpDesc() == nullptr, GELOGE(FAILED, "ref node or value node is null"); return nullptr); GeTensorDesc ref_tensor_desc = ref_node->GetOpDesc()->GetOutputDesc(0); @@ -280,7 +265,7 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co */ // Insert AssignAdd node NodePtr assign_add_node = - InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); + InsertAssignOp(compute_graph, ASSIGNADD, NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD, loop_cond_node, loop_inc_node); if (assign_add_node == nullptr || switch_node == nullptr) { GELOGE(PARAM_INVALID, "assign add node or switch node is null"); return FAILED; @@ -291,7 +276,7 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co GE_CHK_STATUS_RET(SetStreamLabel(assign_add_node, active_name), "set stream label failed"); // used for stream assign to find true branch - GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); + GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, {active_name}), "set active label list failed"); // 2. Insert active node NodePtr active_node = InsertOp(compute_graph, STREAMACTIVE, active_name, {}, {}); @@ -301,7 +286,8 @@ Status FlowCtrlPass::CreateIterCtrlTrueBranch(ComputeGraphPtr &compute_graph, co } GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed"); GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); + DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); + return FAILED); // add ctrl edges graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_add_node->GetInControlAnchor()); @@ -326,14 +312,14 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c * loopCond * | * v - * switch --> Assign --> active --> ModelExit + * switch --> Assign * ^ * | * loopReset */ - // Insert Assign node and ctrl edge + // Insert Assign node NodePtr assign_node = - InsertAssignOp(compute_graph, ASSIGN, NODE_NAME_FLOWCTRL_LOOP_ASSIGN, loop_cond_node, loop_reset_node); + InsertAssignOp(compute_graph, ASSIGN, NODE_NAME_FLOWCTRL_LOOP_ASSIGN, loop_cond_node, loop_reset_node); if (assign_node == nullptr || switch_node == nullptr) { GELOGE(PARAM_INVALID, "assign_node or switch node is null"); return FAILED; @@ -341,49 +327,13 @@ Status FlowCtrlPass::CreateIterCtrlFalseBranch(ComputeGraphPtr &compute_graph, c GE_CHK_STATUS_RET(SetStreamLabel(assign_node, switch_node->GetName()), "set stream label failed"); + // 3. Insert ctrl edges graphStatus add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), assign_node->GetInControlAnchor()); if (add_ret != GRAPH_SUCCESS) { GELOGE(FAILED, "Add switch_node to assign_node ctrl edge failed, add_ret=%u.", add_ret); return FAILED; } - if (CheckMultiDataSet(compute_graph)) { - GELOGI("Multi dataSae exist, model_exit node is need."); - // 2. Insert active node and add ctrl edge - string active_name = switch_node->GetName() + "_StreamExitActive"; - NodePtr active_node = InsertOp(compute_graph, STREAMACTIVE, active_name, {}, {}); - if (active_node == nullptr) { - GELOGE(FAILED, "Insert stream active node:%s for IterCtrlTrueStream failed.", active_name.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(SetStreamLabel(active_node, switch_node->GetName()), "set stream label failed"); - GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); - - string model_exit_name = switch_node->GetName() + "_ModelExit"; - GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { model_exit_name }), "set active label list failed"); - - add_ret = GraphUtils::AddEdge(assign_node->GetOutControlAnchor(), active_node->GetInControlAnchor()); - if (add_ret != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add assign_node to active_node ctrl edge failed, add_ret=%u.", add_ret); - return FAILED; - } - - // 3. Insert model exit node and add ctrl edge - NodePtr model_exit_node = InsertOp(compute_graph, MODELEXIT, model_exit_name, {}, {}); - if (model_exit_node == nullptr) { - GELOGE(FAILED, "Insert model_exit node:%s for IterCtrlTrueStream failed.", model_exit_name.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(SetStreamLabel(model_exit_node, model_exit_name), "set stream label failed"); - - add_ret = GraphUtils::AddEdge(active_node->GetOutControlAnchor(), model_exit_node->GetInControlAnchor()); - if (add_ret != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add active_node to model_exit_node ctrl edge failed, add_ret=%u.", add_ret); - return FAILED; - } - } - GELOGI("CreateIterCtrlFalseBranch success."); return SUCCESS; } @@ -465,7 +415,8 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, * itersPerLoop loopCond */ GE_IF_BOOL_EXEC(loop_after_node == nullptr || compute_graph == nullptr, - DOMI_LOGE("loop after node or compute graph is null"); return FAILED); + DOMI_LOGE("loop after node or compute graph is null"); + return FAILED); InDataAnchorPtr in_anchor = loop_after_node->GetInDataAnchor(0); if (in_anchor == nullptr || in_anchor->GetPeerOutAnchor() == nullptr) { GELOGE(FAILED, "Find %s in data anchor failed.", loop_after_node->GetName().c_str()); @@ -520,7 +471,8 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, GE_CHK_STATUS_RET(SetStreamLabel(active_node, active_name), "set stream label failed"); GE_IF_BOOL_EXEC(!AttrUtils::SetBool(active_node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, true), - DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); return FAILED); + DOMI_LOGE("set ATTR_NAME_IS_LOOP_ACTIVE failed"); + return FAILED); add_ret = GraphUtils::AddEdge(switch_node->GetOutControlAnchor(), active_node->GetInControlAnchor()); if (add_ret != GRAPH_SUCCESS) { @@ -530,9 +482,9 @@ Status FlowCtrlPass::AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, } // used for stream assign to find true branch - GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, { active_name }), "set active label list failed"); + GE_CHK_STATUS_RET(SetActiveLabelList(switch_node, {active_name}), "set active label list failed"); // used for stream assign to find active stream - GE_CHK_STATUS_RET(SetActiveLabelList(active_node, { loop_pre_node->GetName() }), "set active label list failed"); + GE_CHK_STATUS_RET(SetActiveLabelList(active_node, {loop_pre_node->GetName()}), "set active label list failed"); return SUCCESS; } } // namespace ge diff --git a/ge/graph/passes/flow_ctrl_pass.h b/ge/graph/passes/flow_ctrl_pass.h index 35270946..a928aaa7 100755 --- a/ge/graph/passes/flow_ctrl_pass.h +++ b/ge/graph/passes/flow_ctrl_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,14 +134,6 @@ class FlowCtrlPass : public GraphPass { /// Other: failed /// Status AddSpecialNodeIteratorCtrl(ComputeGraphPtr &compute_graph, NodePtr &loop_after_node); - - /// - /// add special iterator ctrl nodes(small cycle). - /// @param compute_graph graph - /// @return true: two or more dataSet exist - /// false: only one dataSet exist - /// - bool CheckMultiDataSet(ComputeGraphPtr &compute_graph); }; } // namespace ge diff --git a/ge/graph/passes/folding_pass.cc b/ge/graph/passes/folding_pass.cc index 93dc2c40..b52a3226 100755 --- a/ge/graph/passes/folding_pass.cc +++ b/ge/graph/passes/folding_pass.cc @@ -30,7 +30,6 @@ #include "graph/debug/ge_attr_define.h" #include "ge_local_engine/engine/host_cpu_engine.h" - namespace ge { namespace folding_pass { shared_ptr GetKernelByType(const NodePtr &node) { @@ -84,7 +83,7 @@ NodePtr AddConstNodeToGraph(GeTensorPtr &tensor, ComputeGraphPtr &graph) { } GE_IF_BOOL_EXEC(graph == nullptr, GELOGW("input param graph is null"); return nullptr); - (void) AttrUtils::SetListStr(const_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())); + (void)AttrUtils::SetListStr(const_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())); return graph->AddNodeFront(const_desc); } @@ -113,8 +112,7 @@ NodePtr AddIdentityNodeToGraph(const std::string &name, const GeTensorDesc &tens } } // namespace -Status FoldingPass::RunOpKernel(NodePtr &node, - const vector &inputs, +Status FoldingPass::RunOpKernel(NodePtr &node, const vector &inputs, std::vector &outputs) { return HostCpuEngine::GetInstance().Run(node, inputs, outputs); } @@ -137,8 +135,8 @@ Status FoldingPass::Folding(NodePtr &node, vector &outputs) { auto in_data_nodes = node->GetInDataNodes(); std::unordered_set in_data_nodes_set(in_data_nodes.begin(), in_data_nodes.end()); if (IsolateAndDeleteNode(node, {}) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to isolate and delete node %s, type %s.", - node->GetName().c_str(), node->GetType().c_str()); + GELOGE(INTERNAL_ERROR, "Failed to isolate and delete node %s, type %s.", node->GetName().c_str(), + node->GetType().c_str()); return INTERNAL_ERROR; } for (auto iter = in_data_nodes_set.begin(); iter != in_data_nodes_set.end(); ++iter) { @@ -149,8 +147,8 @@ Status FoldingPass::Folding(NodePtr &node, vector &outputs) { continue; } if (IsolateAndDeleteNode(pre_node, {}) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to isolate and delete in data node %s, type %s.", - pre_node->GetName().c_str(), pre_node->GetType().c_str()); + GELOGE(INTERNAL_ERROR, "Failed to isolate and delete in data node %s, type %s.", pre_node->GetName().c_str(), + pre_node->GetType().c_str()); return INTERNAL_ERROR; } } @@ -188,7 +186,7 @@ Status FoldingPass::DealWithInNodes(NodePtr &node) { node->GetName().c_str()); auto identity_name = node->GetName() + "_ctrl_identity_" + std::to_string(in_data_anchor->GetIdx()); auto identity = - AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); + AddIdentityNodeToGraph(identity_name, node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx()), graph); if (identity == nullptr) { GELOGE(INTERNAL_ERROR, "Failed to add identity node to graph."); return INTERNAL_ERROR; @@ -237,8 +235,8 @@ Status FoldingPass::AddConstNode(NodePtr &node, IndexsToAnchors indexes_to_ancho auto const_node = AddConstNodeToGraph(weight, graph); if (const_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to add dynamic const node, node name:%s, index:%zu.", - node->GetName().c_str(), index); + GELOGE(INTERNAL_ERROR, "Failed to add dynamic const node, node name:%s, index:%zu.", node->GetName().c_str(), + index); return INTERNAL_ERROR; } GELOGI("add const_node:%s, replace node %s, type %s, index %zu.", const_node->GetName().c_str(), diff --git a/ge/graph/passes/folding_pass.h b/ge/graph/passes/folding_pass.h index 745cffd7..0ffd2eb2 100755 --- a/ge/graph/passes/folding_pass.h +++ b/ge/graph/passes/folding_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ - #ifndef GE_GRAPH_PASSES_FOLDING_PASS_H_ #define GE_GRAPH_PASSES_FOLDING_PASS_H_ @@ -29,19 +28,19 @@ namespace ge { namespace folding_pass { shared_ptr GetKernelByType(const NodePtr &node); bool IsNoNeedConstantFolding(const NodePtr &node); -} +} // namespace folding_pass using IndexsToAnchors = std::map>; class FoldingPass : public BaseNodePass { public: static Status RunOpKernel(NodePtr &node, const vector &inputs, vector &outputs); + protected: Status Folding(NodePtr &node, vector &outputs); + private: - Status AddConstNode(NodePtr &node, - IndexsToAnchors indexes_to_anchors, - std::vector &v_weight); + Status AddConstNode(NodePtr &node, IndexsToAnchors indexes_to_anchors, std::vector &v_weight); Status DealWithInNodes(NodePtr &node); Status RemoveNodeKeepingCtrlEdges(NodePtr &node); Status ConnectNodeToInAnchor(InDataAnchorPtr &in_anchor, NodePtr &node, int node_index); diff --git a/ge/graph/passes/for_pass.cc b/ge/graph/passes/for_pass.cc index f3caea35..e913985b 100644 --- a/ge/graph/passes/for_pass.cc +++ b/ge/graph/passes/for_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,17 +28,17 @@ #include "graph/utils/op_desc_utils.h" namespace { - const uint32_t kWhileIInputIndex = 0; - const uint32_t kWhileAbsDeltaInputIndex = 1; - const uint32_t kWhileRangeInputIndex = 2; - const uint32_t kWhileStartInputIndex = 3; - const uint32_t kWhileDeltaInputIndex = 4; - const uint32_t kWhileDataInputIndex = 5; - const uint32_t kSubgraphLoopVarInputIndex = 0; - const uint32_t kSubgraphInputIndex = 1; - const uint32_t kWhileOutputIndex = 5; - const std::string kAbs = "Abs"; -} +const uint32_t kWhileIInputIndex = 0; +const uint32_t kWhileAbsDeltaInputIndex = 1; +const uint32_t kWhileRangeInputIndex = 2; +const uint32_t kWhileStartInputIndex = 3; +const uint32_t kWhileDeltaInputIndex = 4; +const uint32_t kWhileDataInputIndex = 5; +const uint32_t kSubgraphLoopVarInputIndex = 0; +const uint32_t kSubgraphInputIndex = 1; +const uint32_t kWhileOutputIndex = 5; +const std::string kAbs = "Abs"; +} // namespace namespace ge { Status ForPass::Run(NodePtr &node) { @@ -54,12 +54,12 @@ Status ForPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(root_graph); ForInfo for_info; - GE_CHK_STATUS_RET(BuildForInfo(root_graph, node, for_info), - "Build ForInfo failed, node:%s.", node->GetName().c_str()); + GE_CHK_STATUS_RET(BuildForInfo(root_graph, node, for_info), "Build ForInfo failed, node:%s.", + node->GetName().c_str()); WhileInfo while_info; - GE_CHK_STATUS_RET(TranWhileInfo(graph, for_info, while_info), - "Transfer WhileInfo from ForInfo failed, node:%s.", node->GetName().c_str()); + GE_CHK_STATUS_RET(TranWhileInfo(graph, for_info, while_info), "Transfer WhileInfo from ForInfo failed, node:%s.", + node->GetName().c_str()); ComputeGraphPtr cond_graph = BuildCondGraph(while_info); if ((cond_graph == nullptr) || (root_graph->AddSubgraph(cond_graph) != GRAPH_SUCCESS)) { @@ -73,8 +73,8 @@ Status ForPass::Run(NodePtr &node) { return FAILED; } - GE_CHK_STATUS_RET(UpdateForBodyInputMapping(while_info), - "Update InputMapping for for-body-graph failed, node:%s.", node->GetName().c_str()); + GE_CHK_STATUS_RET(UpdateForBodyInputMapping(while_info), "Update InputMapping for for-body-graph failed, node:%s.", + node->GetName().c_str()); // for node has and only has one subgraph GE_CHECK_NOTNULL(node->GetOpDesc()); @@ -190,10 +190,10 @@ Status ForPass::FindInputsAndOutputs(const NodePtr &node, std::vectorGetName().c_str(), index); return FAILED; } - GE_IF_BOOL_EXEC(in_data_anchor->GetPeerOutAnchor() == nullptr, - GELOGW("Get null input by index %d from node %s ", - in_data_anchor->GetIdx(), node->GetName().c_str()); - continue); + GE_IF_BOOL_EXEC( + in_data_anchor->GetPeerOutAnchor() == nullptr, + GELOGW("Get null input by index %d from node %s ", in_data_anchor->GetIdx(), node->GetName().c_str()); + continue); data_inputs.emplace_back(in_data_anchor->GetPeerOutAnchor()); } @@ -270,8 +270,8 @@ Status ForPass::TranWhileInfo(const ComputeGraphPtr &graph, const ForInfo &for_i return FAILED; } - GELOGI("Transfer for_info to while_info succ, for_node:%s, while_node:%s.", - for_name.c_str(), while_info.while_node->GetName().c_str()); + GELOGI("Transfer for_info to while_info succ, for_node:%s, while_node:%s.", for_name.c_str(), + while_info.while_node->GetName().c_str()); return SUCCESS; } @@ -316,8 +316,8 @@ OpDescPtr ForPass::CreateConstDesc(const std::string &name, int32_t value) { /// @param [out] abs_delta_input /// @return Status /// -Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for_info, - OutDataAnchorPtr &range_input, OutDataAnchorPtr &abs_delta_input) { +Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for_info, OutDataAnchorPtr &range_input, + OutDataAnchorPtr &abs_delta_input) { std::string for_name = for_info.for_node->GetName(); GELOGD("Begin to create loop_count input, node:%s", for_name.c_str()); @@ -332,16 +332,16 @@ Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for // i * |delta| < |limit-start| PartialGraphBuilder graph_builder; graph_builder.SetOwnerGraph(graph) - .AddExistNode(for_info.start->GetOwnerNode()) - .AddExistNode(for_info.limit->GetOwnerNode()) - .AddExistNode(for_info.delta->GetOwnerNode()) - .AddNode(CreateOpDesc(sub_name_0, SUB, false)) - .AddNode(CreateOpDesc(abs_name_0, kAbs, true)) - .AddNode(CreateOpDesc(abs_name_1, kAbs, true)) - .AddDataLink(delta->GetOwnerNode()->GetName(), delta->GetIdx(), abs_name_0, 0) - .AddDataLink(limit->GetOwnerNode()->GetName(), limit->GetIdx(), sub_name_0, 0) - .AddDataLink(start->GetOwnerNode()->GetName(), start->GetIdx(), sub_name_0, 1) - .AddDataLink(sub_name_0, 0, abs_name_1, 0); + .AddExistNode(for_info.start->GetOwnerNode()) + .AddExistNode(for_info.limit->GetOwnerNode()) + .AddExistNode(for_info.delta->GetOwnerNode()) + .AddNode(CreateOpDesc(sub_name_0, SUB, false)) + .AddNode(CreateOpDesc(abs_name_0, kAbs, true)) + .AddNode(CreateOpDesc(abs_name_1, kAbs, true)) + .AddDataLink(delta->GetOwnerNode()->GetName(), delta->GetIdx(), abs_name_0, 0) + .AddDataLink(limit->GetOwnerNode()->GetName(), limit->GetIdx(), sub_name_0, 0) + .AddDataLink(start->GetOwnerNode()->GetName(), start->GetIdx(), sub_name_0, 1) + .AddDataLink(sub_name_0, 0, abs_name_1, 0); graphStatus error_code = GRAPH_SUCCESS; std::string error_msg; @@ -380,12 +380,9 @@ Status ForPass::CreateLoopInput(const ComputeGraphPtr &graph, const ForInfo &for OpDescPtr ForPass::CreateOpDesc(const std::string &name, const std::string &type, bool io_equal_flag) { OpDescBuilder op_desc_builder(name, type); if (io_equal_flag) { - op_desc_builder.AddInput("x") - .AddOutput("y"); + op_desc_builder.AddInput("x").AddOutput("y"); } else { - op_desc_builder.AddInput("x1") - .AddInput("x2") - .AddOutput("y"); + op_desc_builder.AddInput("x1").AddInput("x2").AddOutput("y"); } return op_desc_builder.Build(); @@ -480,8 +477,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { if (peer_out_anchor == nullptr) { continue; } - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_data_anchor), - "Add data-edge %s:%d->%s:%d failed.", + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_data_anchor), "Add data-edge %s:%d->%s:%d failed.", peer_out_anchor->GetOwnerNode()->GetName().c_str(), peer_out_anchor->GetIdx(), while_node->GetName().c_str(), i); } @@ -492,17 +488,16 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { GE_CHECK_NOTNULL(out_data_anchor); for (auto &peer_in_anchor : while_info.data_outputs[i]) { GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_data_anchor, peer_in_anchor), - "Add data-edge %s:%d->%s:%d failed.", - while_node->GetName().c_str(), i + kWhileOutputIndex, - peer_in_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetIdx()); + "Add data-edge %s:%d->%s:%d failed.", while_node->GetName().c_str(), + i + kWhileOutputIndex, peer_in_anchor->GetOwnerNode()->GetName().c_str(), + peer_in_anchor->GetIdx()); } } InControlAnchorPtr in_ctrl_anchor = while_node->GetInControlAnchor(); GE_CHECK_NOTNULL(in_ctrl_anchor); for (auto &peer_out_anchor : while_info.ctrl_inputs) { - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_ctrl_anchor), - "Add ctrl-edge %s->%s failed.", + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(peer_out_anchor, in_ctrl_anchor), "Add ctrl-edge %s->%s failed.", peer_out_anchor->GetOwnerNode()->GetName().c_str(), in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); } @@ -510,8 +505,7 @@ Status ForPass::BuildWhileLink(const WhileInfo &while_info) { OutControlAnchorPtr out_ctrl_anchor = while_node->GetOutControlAnchor(); GE_CHECK_NOTNULL(out_ctrl_anchor); for (auto &peer_in_anchor : while_info.ctrl_outputs) { - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_ctrl_anchor, peer_in_anchor), - "Add ctrl-edge %s->%s failed.", + GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_ctrl_anchor, peer_in_anchor), "Add ctrl-edge %s->%s failed.", out_ctrl_anchor->GetOwnerNode()->GetName().c_str(), peer_in_anchor->GetOwnerNode()->GetName().c_str()); } @@ -538,11 +532,11 @@ ComputeGraphPtr ForPass::BuildCondGraph(WhileInfo &while_info) { graph_builder.AddNode(CreateOpDesc(less_name, LESS, false)); // Set Input - graph_builder.SetInput(kWhileIInputIndex, { mul_name }, { 0 }) - .SetInput(kWhileAbsDeltaInputIndex, { mul_name }, { 1 }) - .SetInput(kWhileRangeInputIndex, { less_name }, { 1 }) - .SetUselessInput(kWhileStartInputIndex) - .SetUselessInput(kWhileDeltaInputIndex); + graph_builder.SetInput(kWhileIInputIndex, {mul_name}, {0}) + .SetInput(kWhileAbsDeltaInputIndex, {mul_name}, {1}) + .SetInput(kWhileRangeInputIndex, {less_name}, {1}) + .SetUselessInput(kWhileStartInputIndex) + .SetUselessInput(kWhileDeltaInputIndex); size_t input_num = while_info.data_inputs.size(); for (size_t i = kWhileDataInputIndex; i < input_num; i++) { graph_builder.SetUselessInput(i); @@ -594,9 +588,9 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { std::string mul_name = "Mul"; std::string add_name_1 = "Add_1"; graph_builder.AddNode(CreateConstDesc(const_name, 1)) - .AddNode(CreateOpDesc(add_name_0, ADD, false)) - .AddNode(CreateOpDesc(mul_name, MUL, false)) - .AddNode(CreateOpDesc(add_name_1, ADD, false)); + .AddNode(CreateOpDesc(add_name_0, ADD, false)) + .AddNode(CreateOpDesc(mul_name, MUL, false)) + .AddNode(CreateOpDesc(add_name_1, ADD, false)); // Add Subgraph node auto input_num = static_cast(while_info.data_inputs.size()); @@ -606,13 +600,13 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { graph_builder.AddNode(CreateSubgraphOpDesc(sub_graph_node_name, sub_graph_input_num, sub_graph_output_num)); // Set Input - graph_builder.SetInput(kWhileIInputIndex, { add_name_0, mul_name }, { 0, 0 }) - .SetUselessInput(kWhileAbsDeltaInputIndex) - .SetUselessInput(kWhileRangeInputIndex) - .SetInput(kWhileStartInputIndex, { add_name_1 }, { 0 }) - .SetInput(kWhileDeltaInputIndex, { mul_name }, { 1 }); + graph_builder.SetInput(kWhileIInputIndex, {add_name_0, mul_name}, {0, 0}) + .SetUselessInput(kWhileAbsDeltaInputIndex) + .SetUselessInput(kWhileRangeInputIndex) + .SetInput(kWhileStartInputIndex, {add_name_1}, {0}) + .SetInput(kWhileDeltaInputIndex, {mul_name}, {1}); for (uint32_t i = 0; i < input_num - kWhileDataInputIndex; i++) { - graph_builder.SetInput(i + kWhileDataInputIndex, { sub_graph_node_name }, { i + kSubgraphInputIndex }); + graph_builder.SetInput(i + kWhileDataInputIndex, {sub_graph_node_name}, {i + kSubgraphInputIndex}); } // Add Outputs @@ -626,8 +620,8 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { // Add Edges graph_builder.AddDataLink(const_name, 0, add_name_0, 1) - .AddDataLink(mul_name, 0, add_name_1, 1) - .AddDataLink(add_name_1, 0, sub_graph_node_name, kSubgraphLoopVarInputIndex); + .AddDataLink(mul_name, 0, add_name_1, 1) + .AddDataLink(add_name_1, 0, sub_graph_node_name, kSubgraphLoopVarInputIndex); // Add Input-Mapping std::map input_mapping; @@ -674,8 +668,7 @@ ComputeGraphPtr ForPass::BuildBodyGraph(WhileInfo &while_info) { /// OpDescPtr ForPass::CreateSubgraphOpDesc(const std::string &name, uint32_t input_num, uint32_t output_num) { OpDescBuilder op_desc_builder(name, PARTITIONEDCALL); - op_desc_builder.AddDynamicInput("args", input_num) - .AddDynamicOutput("output", output_num); + op_desc_builder.AddDynamicInput("args", input_num).AddDynamicOutput("output", output_num); OpDescPtr op_desc = op_desc_builder.Build(); if (op_desc == nullptr) { @@ -717,4 +710,3 @@ Status ForPass::UpdateForBodyInputMapping(const WhileInfo &while_info) { return SUCCESS; } } // namespace ge - diff --git a/ge/graph/passes/for_pass.h b/ge/graph/passes/for_pass.h index d6f307d1..f25655f8 100644 --- a/ge/graph/passes/for_pass.h +++ b/ge/graph/passes/for_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_FOR_PASS_H #define GE_GRAPH_PASSES_FOR_PASS_H @@ -34,8 +35,16 @@ struct ForInfo { struct WhileInfo { WhileInfo() - : while_node(nullptr), sub_graph_node(nullptr), i(nullptr), abs_delta(nullptr), range(nullptr), - start(nullptr), delta(nullptr), for_body(nullptr), while_cond(nullptr), while_body(nullptr) {} + : while_node(nullptr), + sub_graph_node(nullptr), + i(nullptr), + abs_delta(nullptr), + range(nullptr), + start(nullptr), + delta(nullptr), + for_body(nullptr), + while_cond(nullptr), + while_body(nullptr) {} ge::NodePtr while_node; ge::NodePtr sub_graph_node; ge::OutDataAnchorPtr i; @@ -187,4 +196,4 @@ class ForPass : public BaseNodePass { static OpDescPtr CreateSubgraphOpDesc(const std::string &name, uint32_t input_num, uint32_t output_num); }; } // namespace ge -#endif //GE_GRAPH_PASSES_FOR_PASS_H +#endif // GE_GRAPH_PASSES_FOR_PASS_H diff --git a/ge/graph/passes/get_original_format_pass.cc b/ge/graph/passes/get_original_format_pass.cc index e743f190..8c3c84f9 100644 --- a/ge/graph/passes/get_original_format_pass.cc +++ b/ge/graph/passes/get_original_format_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,9 +97,9 @@ Status GetOriginalFormatPass::SetOriginalFormat(const ge::ComputeGraphPtr &graph OpDescPtr tmpSecondOpPtr = bias_node_ptr->GetInDataNodes().at(1)->GetOpDesc(); GE_CHECK_NOTNULL(tmpSecondOpPtr); GE_IF_BOOL_EXEC( - !AttrUtils::GetInt(tmp_first_op_ptr, ATTR_NAME_FORMAT, first_input_format), continue_flag = true; break); + !AttrUtils::GetInt(tmp_first_op_ptr, ATTR_NAME_FORMAT, first_input_format), continue_flag = true; break); GE_IF_BOOL_EXEC( - !AttrUtils::GetInt(tmpSecondOpPtr, ATTR_NAME_FORMAT, second_input_format), continue_flag = true; break); + !AttrUtils::GetInt(tmpSecondOpPtr, ATTR_NAME_FORMAT, second_input_format), continue_flag = true; break); if (first_input_format != second_input_format) { GELOGW("biasadd node is followed two nodes with different format, get original format failed"); diff --git a/ge/graph/passes/get_original_format_pass.h b/ge/graph/passes/get_original_format_pass.h index 66e0222e..813fb2bf 100755 --- a/ge/graph/passes/get_original_format_pass.h +++ b/ge/graph/passes/get_original_format_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/global_step_insert_pass.cc b/ge/graph/passes/global_step_insert_pass.cc index 4431fc3d..460f6ad6 100755 --- a/ge/graph/passes/global_step_insert_pass.cc +++ b/ge/graph/passes/global_step_insert_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,13 +28,11 @@ #include "graph/passes/pass_utils.h" namespace ge { -NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph, - const string &node_type, - const string &node_name, +NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, const std::vector &input_list, const std::vector &output_list) { OpDescPtr op_desc = MakeShared(node_name, node_type); - GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(FAILED,"Make OpDesc failed"); return nullptr); + GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(FAILED, "Make OpDesc failed"); return nullptr); for (auto &input_desc : input_list) { graphStatus graph_status = op_desc->AddInputDesc(input_desc); @@ -52,11 +50,11 @@ NodePtr GlobalStepInsertPass::InsertOp(ComputeGraphPtr &compute_graph, } } - GE_IF_BOOL_EXEC(compute_graph == nullptr, GELOGE(FAILED,"compute_graph is nullptr"); return nullptr); + GE_IF_BOOL_EXEC(compute_graph == nullptr, GELOGE(FAILED, "compute_graph is nullptr"); return nullptr); NodePtr node = compute_graph->AddNode(op_desc); GE_IF_BOOL_EXEC(node == nullptr, - GELOGE(FAILED, "add node failed, name:%s, type:%s.", node_name.c_str(), node_type.c_str()); - return nullptr); + GELOGE(FAILED, "add node failed, name:%s, type:%s.", node_name.c_str(), node_type.c_str()); + return nullptr); GELOGI("Insert op success, name:%s, type:%s.", node_name.c_str(), node_type.c_str()); return node; @@ -83,8 +81,7 @@ Status GlobalStepInsertPass::Run(ComputeGraphPtr compute_graph) { GeTensorDesc tensor_desc(GeShape({1}), FORMAT_ND, DT_UINT64); std::vector input_desc_list = {}; std::vector output_desc_list = {tensor_desc}; - NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, - input_desc_list, output_desc_list); + NodePtr global_step = InsertOp(compute_graph, VARIABLE, NODE_NAME_GLOBAL_STEP, input_desc_list, output_desc_list); if (global_step == nullptr) { GELOGE(FAILED, "Add global_step node failed, global_step is null."); return FAILED; diff --git a/ge/graph/passes/global_step_insert_pass.h b/ge/graph/passes/global_step_insert_pass.h index da83e93a..46bc85d6 100755 --- a/ge/graph/passes/global_step_insert_pass.h +++ b/ge/graph/passes/global_step_insert_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ namespace ge { /// in order to make the global step variable place in known subgraph /// class GlobalStepInsertPass : public GraphPass { -public: + public: /// /// @param compute_graph graph /// @return SUCCESS: do success @@ -38,7 +38,8 @@ public: /// Other: failed /// Status Run(ComputeGraphPtr compute_graph) override; -private: + + private: /// /// Universal insert node to graph. /// @param compute_graph graph @@ -48,12 +49,9 @@ private: /// @param output_list output desc list /// @return the inserted node. if insert failed return nullptr. /// - NodePtr InsertOp(ComputeGraphPtr &compute_graph, - const string &node_type, - const string &node_name, - const std::vector &input_list, - const std::vector &output_list); + NodePtr InsertOp(ComputeGraphPtr &compute_graph, const string &node_type, const string &node_name, + const std::vector &input_list, const std::vector &output_list); }; -} // namespace ge +} // namespace ge #endif // GE_GRAPH_PASSES_GLOBAL_STEP_INSERT_PASS_H_ \ No newline at end of file diff --git a/ge/graph/passes/guarantee_const_pass.cc b/ge/graph/passes/guarantee_const_pass.cc index a2d8f262..f099c01d 100644 --- a/ge/graph/passes/guarantee_const_pass.cc +++ b/ge/graph/passes/guarantee_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/guarantee_const_pass.h b/ge/graph/passes/guarantee_const_pass.h index 1f297944..7f289a10 100755 --- a/ge/graph/passes/guarantee_const_pass.h +++ b/ge/graph/passes/guarantee_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_group_pass.cc b/ge/graph/passes/hccl_group_pass.cc index bbfd9b56..d8f11434 100644 --- a/ge/graph/passes/hccl_group_pass.cc +++ b/ge/graph/passes/hccl_group_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/hccl_group_pass.h b/ge/graph/passes/hccl_group_pass.h index dbe15e96..059710ce 100644 --- a/ge/graph/passes/hccl_group_pass.h +++ b/ge/graph/passes/hccl_group_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ namespace ge { class HcclGroupPass : public BaseNodePass { public: Status Run(NodePtr &node) override; + private: Status MarkGroupForFusedNode(NodePtr &fused_node); }; diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index 21747f42..b8787476 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,8 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { } GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), - GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); + GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); + return FAILED); if (!node_input_mutable) { continue; } diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index e73a5483..44b40241 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ class HcclMemcpyPass : public GraphPass { std::string CheckDuplicateName(const std::string &node_name); Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor); + const InDataAnchorPtr &hccl_in_anchor); std::unordered_map node_num_map_; }; diff --git a/ge/graph/passes/identity_pass.cc b/ge/graph/passes/identity_pass.cc index 5a54e391..57b7c46d 100755 --- a/ge/graph/passes/identity_pass.cc +++ b/ge/graph/passes/identity_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,14 +47,14 @@ Status CheckIdentityUsable(const NodePtr &node, bool &usable) { auto in_node_opdesc = in_node->GetOpDesc(); GE_CHECK_NOTNULL(in_node_opdesc); // near entrance of subgraph || near subgraph - if ((in_node->GetType() == DATA && NodeUtils::IsSubgraphInput(in_node)) - || !in_node_opdesc->GetSubgraphInstanceNames().empty()) { + if ((in_node->GetType() == DATA && NodeUtils::IsSubgraphInput(in_node)) || + !in_node_opdesc->GetSubgraphInstanceNames().empty()) { usable = true; return SUCCESS; } - GE_CHK_STATUS_RET(GetOriginalType(in_node, node_type), - "Failed to get node type from node %s", node->GetName().c_str()); + GE_CHK_STATUS_RET(GetOriginalType(in_node, node_type), "Failed to get node type from node %s", + node->GetName().c_str()); bool need_skip = (node_type != SWITCH) && (node_type != REFSWITCH) && (node_type != SWITCHN); if (need_skip) { GELOGD("skip identity %s connected to switch", node->GetName().c_str()); @@ -70,13 +70,12 @@ Status CheckIdentityUsable(const NodePtr &node, bool &usable) { auto out_node_opdesc = out_node->GetOpDesc(); GE_CHECK_NOTNULL(out_node_opdesc); // near output of subgraph || near subgraph - if (NodeUtils::IsSubgraphOutput(out_node) - || !out_node_opdesc->GetSubgraphInstanceNames().empty()) { + if (NodeUtils::IsSubgraphOutput(out_node) || !out_node_opdesc->GetSubgraphInstanceNames().empty()) { usable = true; return SUCCESS; } - GE_CHK_STATUS_RET(GetOriginalType(out_node, node_type), - "Failed to get node type from node %s", node->GetName().c_str()); + GE_CHK_STATUS_RET(GetOriginalType(out_node, node_type), "Failed to get node type from node %s", + node->GetName().c_str()); if ((node_type != MERGE) && (node_type != REFMERGE)) { GELOGD("skip identity %s connected to merge", node->GetName().c_str()); break; diff --git a/ge/graph/passes/identity_pass.h b/ge/graph/passes/identity_pass.h index a0d3f032..a4a80efc 100644 --- a/ge/graph/passes/identity_pass.h +++ b/ge/graph/passes/identity_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/infershape_pass.cc b/ge/graph/passes/infershape_pass.cc index 7b8f7b50..cacca584 100755 --- a/ge/graph/passes/infershape_pass.cc +++ b/ge/graph/passes/infershape_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,6 @@ #include "analyzer/analyzer.h" #include "framework/common/util.h" #include "graph/shape_refiner.h" -#include "graph/utils/graph_utils.h" namespace ge { Status InferShapePass::Run(NodePtr &node) { @@ -30,13 +29,9 @@ Status InferShapePass::Run(NodePtr &node) { // select INFERSHAPE failed info auto graph = node->GetOwnerComputeGraph(); GE_CHECK_NOTNULL(graph); - auto root_graph = ge::GraphUtils::FindRootGraph(graph); - GE_CHECK_NOTNULL(root_graph); - analyzer::DataInfo analyze_info{root_graph->GetSessionID(), root_graph->GetGraphID(), - analyzer::INFER_SHAPE, node, "InferShapeFailed!"}; + analyzer::DataInfo analyze_info{graph->GetSessionID(), graph->GetGraphID(), analyzer::INFER_SHAPE, node, + "InferShapeFailed!"}; (void)Analyzer::GetInstance()->DoAnalyze(analyze_info); - (void)Analyzer::GetInstance()->SaveAnalyzerDataToFile(root_graph->GetSessionID(), - root_graph->GetGraphID()); GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str()); return GE_GRAPH_INFERSHAPE_FAILED; diff --git a/ge/graph/passes/infershape_pass.h b/ge/graph/passes/infershape_pass.h index 30cf0472..9e4df9a6 100644 --- a/ge/graph/passes/infershape_pass.h +++ b/ge/graph/passes/infershape_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/input_output_connection_identify_pass.cc b/ge/graph/passes/input_output_connection_identify_pass.cc index 0d198dfb..45560bf5 100644 --- a/ge/graph/passes/input_output_connection_identify_pass.cc +++ b/ge/graph/passes/input_output_connection_identify_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -165,8 +165,8 @@ Status InputOutputConnectionIdentifyPass::ProcessOutputNode(const NodePtr &node, } Status InputOutputConnectionIdentifyPass::SetNodeAttrOfConnectingInputOutput( - const map> &connect_input_node_idx, - const map> &connect_output_node_idx) { + const map> &connect_input_node_idx, + const map> &connect_output_node_idx) { for (const auto &iter : connect_input_node_idx) { GE_CHECK_NOTNULL(iter.first); if (iter.first->GetOpDesc() != nullptr) { diff --git a/ge/graph/passes/input_output_connection_identify_pass.h b/ge/graph/passes/input_output_connection_identify_pass.h index 97ed315d..0dd32102 100755 --- a/ge/graph/passes/input_output_connection_identify_pass.h +++ b/ge/graph/passes/input_output_connection_identify_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/isolated_op_remove_pass.cc b/ge/graph/passes/isolated_op_remove_pass.cc index 5c9093e9..152104eb 100644 --- a/ge/graph/passes/isolated_op_remove_pass.cc +++ b/ge/graph/passes/isolated_op_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/isolated_op_remove_pass.h b/ge/graph/passes/isolated_op_remove_pass.h index 3b7fe7d1..f17df21a 100755 --- a/ge/graph/passes/isolated_op_remove_pass.h +++ b/ge/graph/passes/isolated_op_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/iterator_op_pass.cc b/ge/graph/passes/iterator_op_pass.cc index 1ec2bba9..656ed390 100644 --- a/ge/graph/passes/iterator_op_pass.cc +++ b/ge/graph/passes/iterator_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,8 +68,8 @@ Status IteratorOpPass::Run(ge::ComputeGraphPtr graph) { int64_t loop_per_iter = 0; ge::GeTensorDesc ge_tensor_desc; - Status status = VarManager::Instance(graph->GetSessionID())->GetCurVarDesc(NODE_NAME_FLOWCTRL_LOOP_PER_ITER, - ge_tensor_desc); + Status status = + VarManager::Instance(graph->GetSessionID())->GetCurVarDesc(NODE_NAME_FLOWCTRL_LOOP_PER_ITER, ge_tensor_desc); GE_IF_BOOL_EXEC(status != SUCCESS, GELOGW("Fail to Get var_desc of NODE_NAME_FLOWCTRL_LOOP_PER_ITER failed."); continue); Status ret; @@ -78,8 +78,8 @@ Status IteratorOpPass::Run(ge::ComputeGraphPtr graph) { // EOS will not be considered if ret is not SUCCESS. GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGW("Set rt context RT_CTX_NORMAL_MODE failed."); continue); - status = GetVariableValue(graph->GetSessionID(), ge_tensor_desc, NODE_NAME_FLOWCTRL_LOOP_PER_ITER, - &loop_per_iter); + status = + GetVariableValue(graph->GetSessionID(), ge_tensor_desc, NODE_NAME_FLOWCTRL_LOOP_PER_ITER, &loop_per_iter); ret = SetRtContext(graph->GetSessionID(), graph->GetGraphID(), rtContext_t(), RT_CTX_GEN_MODE); // The following process will be affected if ret is not SUCCESS. @@ -144,8 +144,7 @@ ge::NodePtr IteratorOpPass::InsertEndOfSequenceNode(const ge::NodePtr &pre_node, auto out_anchor = pre_node->GetOutDataAnchor(0); ge::graphStatus status; status = GraphUtils::AddEdge(out_anchor, end_of_seq_node->GetInDataAnchor(0)); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, - "Graph add EndOfSequence op input edge fail, dst node: %s.", + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Graph add EndOfSequence op input edge fail, dst node: %s.", end_of_seq_node->GetName().c_str()); // EOS(control) --> subsequent of memcpy OutControlAnchorPtr out_ctrl_anchor = end_of_seq_node->GetOutControlAnchor(); @@ -158,10 +157,8 @@ ge::NodePtr IteratorOpPass::InsertEndOfSequenceNode(const ge::NodePtr &pre_node, } status = GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor); GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, - "Graph add EndOfSequence op out ctrl edge fail, dst node: %s.", - out_node->GetName().c_str()); - GELOGI("Graph add EndOfSequence op out ctrl edge, dst node: %s.", - out_node->GetName().c_str()); + "Graph add EndOfSequence op out ctrl edge fail, dst node: %s.", out_node->GetName().c_str()); + GELOGI("Graph add EndOfSequence op out ctrl edge, dst node: %s.", out_node->GetName().c_str()); } return end_of_seq_node; @@ -232,18 +229,19 @@ ge::NodePtr IteratorOpPass::InsertMemcpyAsyncNode(const ge::NodePtr &pre_node, c } // Control out OutControlAnchorPtr out_ctrl_anchor = pre_node->GetOutControlAnchor(); - GE_IF_BOOL_EXEC(out_ctrl_anchor != nullptr, - for (auto &peer_in_ctrl_anchor : out_ctrl_anchor->GetPeerInControlAnchors()) { - ge::graphStatus status = GraphUtils::RemoveEdge(out_ctrl_anchor, peer_in_ctrl_anchor); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Remove edge failed, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - status = GraphUtils::AddEdge(memcpy_async_node->GetOutControlAnchor(), peer_in_ctrl_anchor); - GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, - "Graph add memcpyAsync op out ctrl edge fail, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - GELOGI("Graph add memcpyAsync op out ctrl edge, dst node: %s.", - peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); - }); + GE_IF_BOOL_EXEC( + out_ctrl_anchor != nullptr, for (auto &peer_in_ctrl_anchor + : out_ctrl_anchor->GetPeerInControlAnchors()) { + ge::graphStatus status = GraphUtils::RemoveEdge(out_ctrl_anchor, peer_in_ctrl_anchor); + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, "Remove edge failed, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + status = GraphUtils::AddEdge(memcpy_async_node->GetOutControlAnchor(), peer_in_ctrl_anchor); + GE_CHK_BOOL_EXEC(status == GRAPH_SUCCESS, return nullptr, + "Graph add memcpyAsync op out ctrl edge fail, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + GELOGI("Graph add memcpyAsync op out ctrl edge, dst node: %s.", + peer_in_ctrl_anchor->GetOwnerNode()->GetName().c_str()); + }); GELOGI("Insert memcpyAsync op success."); return memcpy_async_node; @@ -282,8 +280,8 @@ ge::OpDescPtr IteratorOpPass::CreateMemcpyAsyncOp(const ge::NodePtr &pre_node) { } Status IteratorOpPass::SetRtContext(uint64_t session_id, uint32_t graph_id, rtContext_t rt_context, rtCtxMode_t mode) { - GELOGI("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, - graph_id, static_cast(mode), ge::GetContext().DeviceId()); + GELOGI("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, + static_cast(mode), ge::GetContext().DeviceId()); GE_CHK_RT_RET(rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId())); GE_CHK_RT_RET(rtCtxSetCurrent(rt_context)); diff --git a/ge/graph/passes/iterator_op_pass.h b/ge/graph/passes/iterator_op_pass.h index d9303358..77e80600 100644 --- a/ge/graph/passes/iterator_op_pass.h +++ b/ge/graph/passes/iterator_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/link_gen_mask_nodes_pass.cc b/ge/graph/passes/link_gen_mask_nodes_pass.cc index 9bd991aa..4f122fb2 100755 --- a/ge/graph/passes/link_gen_mask_nodes_pass.cc +++ b/ge/graph/passes/link_gen_mask_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/link_gen_mask_nodes_pass.h b/ge/graph/passes/link_gen_mask_nodes_pass.h index 12d68f1b..f9979ab1 100644 --- a/ge/graph/passes/link_gen_mask_nodes_pass.h +++ b/ge/graph/passes/link_gen_mask_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_agnostic_pass.cc b/ge/graph/passes/mark_agnostic_pass.cc index 77fa64fb..6f520dd8 100644 --- a/ge/graph/passes/mark_agnostic_pass.cc +++ b/ge/graph/passes/mark_agnostic_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,42 +13,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/mark_agnostic_pass.h" -#include "graph/utils/node_utils.h" +#include "utils/node_utils.h" namespace ge { Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { for (const auto &node : graph->GetDirectNode()) { auto node_type = NodeUtils::GetNodeType(*node); if (node_type == SWITCH || node_type == REFSWITCH || node_type == SWITCHN) { - GELOGD("Mark format agnostic and continuous for switch node %s", node->GetName().c_str()); - const OpDescPtr op_desc = node->GetOpDesc(); - const GeTensorDescPtr op_tensor = op_desc->MutableInputDesc(0); - if (op_tensor == nullptr) { - GELOGD("Op: %s, Index:0,has no input", node->GetName().c_str()); - continue; - } - AttrUtils::SetInt(op_tensor, "_format_continuous", 1); - AttrUtils::SetInt(node->GetOpDesc(), "_format_agnostic", 1); - AttrUtils::SetListInt(node->GetOpDesc(), "_format_agnostic_except_input", std::vector({1})); - continue; - } - if (node_type == IDENTITY) { - GELOGD("Mark format agnostic for identity node %s", node->GetName().c_str()); + GELOGD("Mark format agnostic for switch ndoe %s", node->GetName().c_str()); AttrUtils::SetInt(node->GetOpDesc(), "_format_agnostic", 1); AttrUtils::SetListInt(node->GetOpDesc(), "_format_agnostic_except_input", std::vector({1})); continue; } if (node_type == MERGE || node_type == REFMERGE) { - GELOGD("Mark format agnostic and continuous for merge node %s", node->GetName().c_str()); - const OpDescPtr op_desc = node->GetOpDesc(); - const GeTensorDescPtr op_tensor = op_desc->MutableOutputDesc(0); - if (op_tensor == nullptr) { - GELOGD("Op: %s, Index:0,has no output", node->GetName().c_str()); - continue; - } - AttrUtils::SetInt(op_tensor, "_format_continuous", 1); + GELOGD("Mark format agnostic for merge node %s", node->GetName().c_str()); AttrUtils::SetInt(node->GetOpDesc(), "_format_agnostic", 1); AttrUtils::SetListInt(node->GetOpDesc(), "_format_agnostic_except_output", std::vector({1})); continue; diff --git a/ge/graph/passes/mark_agnostic_pass.h b/ge/graph/passes/mark_agnostic_pass.h index 9c581abe..7fd3189d 100644 --- a/ge/graph/passes/mark_agnostic_pass.h +++ b/ge/graph/passes/mark_agnostic_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_MARK_AGNOSTIC_PASS_H_ #define GE_MARK_AGNOSTIC_PASS_H_ @@ -23,6 +24,6 @@ class MarkAgnosticPass : public GraphPass { public: Status Run(ComputeGraphPtr graph) override; }; -} +} // namespace ge -#endif //GE_MARK_AGNOSTIC_PASS_H_ +#endif // GE_MARK_AGNOSTIC_PASS_H_ diff --git a/ge/graph/passes/mark_graph_unknown_status_pass.cc b/ge/graph/passes/mark_graph_unknown_status_pass.cc index d8f5feff..7106e58c 100644 --- a/ge/graph/passes/mark_graph_unknown_status_pass.cc +++ b/ge/graph/passes/mark_graph_unknown_status_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,24 +16,17 @@ #include "graph/passes/mark_graph_unknown_status_pass.h" #include "graph/utils/node_utils.h" -#include "graph/debug/ge_attr_define.h" namespace ge { Status MarkGraphUnknownStatusPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); bool is_unknown_shape = false; - bool forced_unknown = false; for (const auto &node : graph->GetDirectNode()) { GE_CHK_STATUS_RET(ge::NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), "Get node[%s] shape status failed!", node->GetName().c_str()); if (is_unknown_shape) { break; } - if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, forced_unknown) && forced_unknown) { - GELOGD("node %s was marked as unknown shape.", node->GetName().c_str()); - is_unknown_shape = true; - break; - } } graph->SetGraphUnknownFlag(is_unknown_shape); GELOGD("mark graph [%s] unknown status success! value is %d", graph->GetName().c_str(), is_unknown_shape); diff --git a/ge/graph/passes/mark_graph_unknown_status_pass.h b/ge/graph/passes/mark_graph_unknown_status_pass.h index a1148c6e..662e321c 100644 --- a/ge/graph/passes/mark_graph_unknown_status_pass.h +++ b/ge/graph/passes/mark_graph_unknown_status_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_same_addr_pass.cc b/ge/graph/passes/mark_same_addr_pass.cc index 2441d0bd..0ed151d3 100644 --- a/ge/graph/passes/mark_same_addr_pass.cc +++ b/ge/graph/passes/mark_same_addr_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/mark_same_addr_pass.h b/ge/graph/passes/mark_same_addr_pass.h index 518fe418..ebfcf6b2 100644 --- a/ge/graph/passes/mark_same_addr_pass.h +++ b/ge/graph/passes/mark_same_addr_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/memcpy_addr_async_pass.cc b/ge/graph/passes/memcpy_addr_async_pass.cc index 3ede39a7..934f4737 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/ge/graph/passes/memcpy_addr_async_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -99,13 +99,13 @@ Status MemcpyAddrAsyncPass::AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, GELOGI("Insert memcpy_addr_async for known graph."); auto sub_graph = user_data_for_known_->GetOwnerComputeGraph(); NodePtr memcpy_addr_async_node = - CreateMemcpyAddrAsyncNode(sub_graph, peer_out_anchor_for_known_, out_of_user_data_for_known_); + CreateMemcpyAddrAsyncNode(sub_graph, peer_out_anchor_for_known_, out_of_user_data_for_known_); GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr, GELOGE(INTERNAL_ERROR, "CreateMemcpyAddrAsyncNode for known failed."); return INTERNAL_ERROR); Status ret = - InsertMemcpyAddrAsyncNode(peer_out_anchor_for_known_, in_anchor_for_known_, memcpy_addr_async_node); + InsertMemcpyAddrAsyncNode(peer_out_anchor_for_known_, in_anchor_for_known_, memcpy_addr_async_node); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "InsertMemcpyAddrAsyncNode for known failed."); return ret); } } @@ -136,7 +136,7 @@ void MemcpyAddrAsyncPass::FindUserDataForNonDynamic(const ge::NodePtr &parent_no OutDataAnchorPtr out_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(out_anchor == nullptr, GELOGE(INTERNAL_ERROR, "Cannot find out_anchor of %s.", parent_node->GetName().c_str()); - return); + return ); NodePtr in_node = out_anchor->GetOwnerNode(); GELOGI("in_node of parent_node is %s.", in_node->GetName().c_str()); if (in_node->GetType() == DATA) { @@ -261,9 +261,7 @@ Status MemcpyAddrAsyncPass::InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeG auto in_node = NodeUtils::GetInDataNodeByIndex(*node, in_data_anchor->GetIdx()); GE_CHECK_NOTNULL(in_node); auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); - if ((in_node->GetType() != CONSTANT) && - (in_node->GetType() != CONSTANTOP) && - (in_node->GetType() != DATA)) { + if ((in_node->GetType() != CONSTANT) && (in_node->GetType() != CONSTANTOP) && (in_node->GetType() != DATA)) { continue; } auto desc = in_node->GetOpDesc(); diff --git a/ge/graph/passes/memcpy_addr_async_pass.h b/ge/graph/passes/memcpy_addr_async_pass.h index 0f22d10b..a70fcbdd 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.h +++ b/ge/graph/passes/memcpy_addr_async_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_pass.cc b/ge/graph/passes/merge_pass.cc index 61aab4aa..8e691518 100644 --- a/ge/graph/passes/merge_pass.cc +++ b/ge/graph/passes/merge_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,9 +97,9 @@ bool MergePass::IsNeedChangeIndexToConstant(NodePtr &node) const { for (const auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { if (peer_in_anchor != nullptr && peer_in_anchor->GetOwnerNode() != nullptr) { GELOGI( - "[%s] MergePass, value_index link to other node, " - "change it to be Constant.", - node->GetName().c_str()); + "[%s] MergePass, value_index link to other node, " + "change it to be Constant.", + node->GetName().c_str()); return true; } } @@ -159,14 +159,15 @@ Status MergePass::CreateConstByValue(NodePtr &node, int value_index, OpDescPtr & // 3. create attr value of Constant, is a tensor GeTensorPtr const_tensor_ptr = - MakeShared(original_out_tensor_desc, reinterpret_cast(&value_index), sizeof(int)); + MakeShared(original_out_tensor_desc, reinterpret_cast(&value_index), sizeof(int)); if (const_tensor_ptr == nullptr) { GELOGE(FAILED, "[%s] Make shared of Constant tensor failed.", constant_name.c_str()); return FAILED; } GE_IF_BOOL_EXEC(!AttrUtils::SetTensor(op_desc, ATTR_NAME_WEIGHTS, const_tensor_ptr), - GELOGE(FAILED, "get ATTR_NAME_WEIGHTS failed"); return FAILED); + GELOGE(FAILED, "get ATTR_NAME_WEIGHTS failed"); + return FAILED); // 4. set Constant output desc GE_CHK_STATUS_RET(op_desc->AddOutputDesc(original_out_tensor_desc), "add out put desc failed"); diff --git a/ge/graph/passes/merge_pass.h b/ge/graph/passes/merge_pass.h index 53582ff6..ef586713 100755 --- a/ge/graph/passes/merge_pass.h +++ b/ge/graph/passes/merge_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index 0ff05c23..34daa681 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -134,7 +134,7 @@ Status MergeToStreamMergePass::AddMemcpyAsyncNodes(const ComputeGraphPtr &graph, GE_CHK_BOOL_EXEC(active_node != nullptr, return FAILED, "Create StreamActive node failed."); GE_CHK_STATUS(GraphUtils::AddEdge(active_node->GetOutControlAnchor(), node->GetInControlAnchor()), "StreamActive add ctrl edge failed."); - if (SetActiveLabelList(active_node, { node->GetName() }) != SUCCESS) { + if (SetActiveLabelList(active_node, {node->GetName()}) != SUCCESS) { GELOGE(FAILED, "SetActiveLabelList for node %s failed.", active_node->GetName().c_str()); return FAILED; } @@ -193,7 +193,7 @@ NodePtr MergeToStreamMergePass::CreateActiveNode(const ComputeGraphPtr &graph, c GE_CHK_BOOL_EXEC(active_node != nullptr, return nullptr, "Create StreamActive node failed."); GE_IF_BOOL_EXEC(GraphUtils::AddEdge(node->GetOutControlAnchor(), active_node->GetInControlAnchor()) != SUCCESS, GELOGE(INTERNAL_ERROR, "add edge failed"); - return nullptr); + return nullptr); GE_IF_BOOL_EXEC(SetSwitchBranchNodeLabel(active_node, node_name) != SUCCESS, GELOGE(INTERNAL_ERROR, "set switch branch node label failed"); return nullptr); diff --git a/ge/graph/passes/merge_to_stream_merge_pass.h b/ge/graph/passes/merge_to_stream_merge_pass.h index 6eb2b22c..9f713989 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.h +++ b/ge/graph/passes/merge_to_stream_merge_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index 732844e5..80355ca7 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ #include "common/formats/utils/formats_trans_utils.h" #include "common/ge/ge_util.h" -#include "graph/common/local_context.h" #include "graph/preprocess/multi_batch_options.h" #include "graph/utils/node_utils.h" #include "graph/utils/op_desc_utils.h" @@ -34,7 +33,6 @@ const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case"; const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data"; const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const"; const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex"; -const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; } // namespace Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { @@ -55,13 +53,6 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { return INTERNAL_ERROR; } - // parser data dynamic info from atc parameter --input_shape - if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims, - data_to_dynamic_info_) != SUCCESS) { - GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); - return PARAM_INVALID; - } - (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); ComputeGraphPtr branch = MakeShared(graph->GetName()); if (branch == nullptr) { @@ -127,8 +118,8 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { if (data_node->GetType() == DATA) { direct_output_[i] = data_node->GetName(); GE_CHK_GRAPH_STATUS_RET( - GraphUtils::RemoveEdge(data_node->GetOutDataAnchor(kDataOutIndex), output->GetInDataAnchor(i)), - "Remove edge failed"); + GraphUtils::RemoveEdge(data_node->GetOutDataAnchor(kDataOutIndex), output->GetInDataAnchor(i)), + "Remove edge failed"); } } @@ -174,15 +165,6 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { } } - std::vector data_name_order; - for (auto &item : GetLocalOmgContext().user_input_dims) { - data_name_order.push_back(item.first); - } - if (!AttrUtils::SetListStr(op_desc, ATTR_USER_DESIGNEATE_SHAPE_ORDER, data_name_order)) { - GELOGE(FAILED, "Failed to add user designate shape order attr on case node %s", - op_desc->GetName().c_str()); - return FAILED; - } GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed"); GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed"); @@ -293,8 +275,8 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex"); op_builder.AddInput("x", data_node->GetOpDesc()->GetOutputDesc(0)) - .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); + .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) + .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); const OpDescPtr op_desc = op_builder.Build(); if (op_desc == nullptr) { @@ -352,8 +334,8 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", - data->GetName().c_str(), case_node_->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", data->GetName().c_str(), + case_node_->GetName().c_str()); return FAILED; } @@ -393,8 +375,8 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", - data->GetName().c_str(), case_node_->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", data->GetName().c_str(), + case_node_->GetName().c_str()); return FAILED; } all_const_nodes.emplace_back(data); @@ -409,7 +391,6 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { // Const no InputDesc, Data need InputDesc. (void)op_desc->AddInputDesc(op_desc->GetOutputDesc(kDataOutIndex)); (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); - (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1); } all_const_nodes_.swap(all_const_nodes); @@ -442,8 +423,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { const auto it = direct_output_.find(i); if (it == direct_output_.end()) { if (GraphUtils::AddEdge(case_node_->GetOutDataAnchor(i), node->GetInDataAnchor(i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Case:%s to NetOutput:%s", - case_node_->GetName().c_str(), node->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Case:%s to NetOutput:%s", case_node_->GetName().c_str(), + node->GetName().c_str()); return FAILED; } } else { @@ -453,8 +434,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { return GE_GRAPH_GRAPH_NODE_NULL; } if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(kDataOutIndex), node->GetInDataAnchor(i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Data:%s to NetOutput:%s", - data_node->GetName().c_str(), node->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Data:%s to NetOutput:%s", data_node->GetName().c_str(), + node->GetName().c_str()); return FAILED; } } @@ -473,7 +454,6 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { /// Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - auto data_name = data->GetName(); const auto &dims = data_shape.GetDims(); if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { return SUCCESS; @@ -484,10 +464,9 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { int64_t max_size = 0; for (size_t i = 0; i < batch_shapes_.size(); ++i) { int64_t size = 1; - for (auto dim : data_to_dynamic_info_.at(data_name).at(i)) { + for (auto dim : batch_shapes_[i]) { if (INT64_MAX / dim < size) { - GELOGE(PARAM_INVALID, "The shape %s size overflow", - formats::ShapeToString(data_to_dynamic_info_.at(data_name).at(i)).c_str()); + GELOGE(PARAM_INVALID, "The shape %s size overflow", formats::ShapeToString(batch_shapes_[i]).c_str()); return PARAM_INVALID; } size *= dim; @@ -498,17 +477,17 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { } } - return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), data, data_shape); + return SetShapeToData(batch_shapes_[max_shape_index], data, data_shape); } /// /// @ingroup ge /// @brief Set shape to Data node in branch. /// @param [in] const NodePtr &data: data in branch. -/// @param [in] size_t index: The batch index. +/// @param [in] const std::vector &shapes: dims of shape. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::UpdateShapeToData(const NodePtr &data, size_t index) { +Status MultiBatchClonePass::UpdataShapeToData(const NodePtr &data, const vector &shapes) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); const auto &dims = data_shape.GetDims(); if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { @@ -516,16 +495,7 @@ Status MultiBatchClonePass::UpdateShapeToData(const NodePtr &data, size_t index) } (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); - auto data_name = data->GetName(); - size_t pos = data_name.find(kMultiBatchNodePostfix); - if (pos == string::npos) { - GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", - kMultiBatchNodePostfix.c_str(), data_name.c_str()); - return FAILED; - } - - auto parent_name = data_name.substr(0, pos); - return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(index), data, data_shape); + return SetShapeToData(shapes, data, data_shape); } /// @@ -564,38 +534,42 @@ Status MultiBatchClonePass::SetShapeToData(const vector &shapes, const /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) { + const std::string name = graph->GetName() + "_branche_"; const auto &op_desc = case_node_->GetOpDesc(); for (size_t i = 0; i < batch_shapes_.size(); ++i) { std::vector input_nodes; std::vector output_nodes; - const std::string postfix = kMultiBatchNodePostfix + std::to_string(i); - ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes); + const std::string prefix = "branche_" + std::to_string(i) + "_"; + ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, prefix, input_nodes, output_nodes); if (subgraph == nullptr) { GELOGE(FAILED, "Create multi-batch case node failed"); return FAILED; } - subgraph->SetName("Batch_" + std::to_string(i)); + subgraph->SetName(name + std::to_string(i)); subgraph->SetParentNode(case_node_); subgraph->SetParentGraph(graph); - graph->AddSubgraph(subgraph->GetName(), subgraph); + (void)AttrUtils::SetStr(subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT); - const string key_name = "branches" + std::to_string(i); + graph->AddSubgraph(subgraph->GetName(), subgraph); + + const std::string key_name = "branches" + std::to_string(i); op_desc->AddSubgraphName(key_name); op_desc->SetSubgraphInstanceName(i, subgraph->GetName()); for (const auto &data : input_nodes) { - GE_CHK_STATUS_RET(UpdateShapeToData(data, i), "Update %s failed", subgraph->GetName().c_str()); + GE_CHK_STATUS_RET(UpdataShapeToData(data, batch_shapes_[i]), "Update %s failed", subgraph->GetName().c_str()); } } // Origninal graph take as first subgraph, update node name. for (const auto &n : branch->GetDirectNode()) { const auto &op_desc = n->GetOpDesc(); - op_desc->SetName(n->GetName() + kMultiBatchNodePostfix + "0"); + op_desc->SetName("branche_0_" + n->GetName()); + if (n->GetType() == DATA) { - GE_CHK_STATUS_RET(UpdateShapeToData(n, 0), "Update %s failed", branch->GetName().c_str()); + GE_CHK_STATUS_RET(UpdataShapeToData(n, batch_shapes_[0]), "Update %s failed", branch->GetName().c_str()); } } diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h index 1155dfc8..0d52b738 100755 --- a/ge/graph/passes/multi_batch_clone_pass.h +++ b/ge/graph/passes/multi_batch_clone_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -107,10 +107,10 @@ class MultiBatchClonePass : public GraphPass { /// @ingroup ge /// @brief Set shape to Data node in branch. /// @param [in] const NodePtr &data: data in branch. - /// @param [in] size_t index: The batch index. + /// @param [in] const std::vector &shapes: dims of shape. /// @return 0: SUCCESS / others: FAILED /// - Status UpdateShapeToData(const NodePtr &data, size_t index); + Status UpdataShapeToData(const NodePtr &data, const std::vector &shapes); /// /// @ingroup ge @@ -165,7 +165,6 @@ class MultiBatchClonePass : public GraphPass { std::map direct_output_; std::map all_branch_output_; - std::map>> data_to_dynamic_info_; NodePtr case_node_; }; diff --git a/ge/graph/passes/multi_batch_pass.cc b/ge/graph/passes/multi_batch_pass.cc index 70a09065..32152a6f 100644 --- a/ge/graph/passes/multi_batch_pass.cc +++ b/ge/graph/passes/multi_batch_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -96,34 +96,6 @@ Status MultiBatchPass::ClearStatus() { } /// -/// @ingroup ge -/// @brief Set batch label for Case mode. -/// @param [in] const ComputeGraphPtr &graph: Root/Case graph. -/// @param [in] const NodePtr &case_node: Case Node. -/// @return 0: SUCCESS / others: FAILED -/// -Status MultiBatchPass::SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr &case_node) { - const auto &func_desc = case_node->GetOpDesc(); - if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { - GELOGD("Graph: %s Not multi-batch, Node: %s", graph->GetName().c_str(), case_node->GetName().c_str()); - return SUCCESS; - } - - const auto &dynamic_branch_names = func_desc->GetSubgraphInstanceNames(); - for (size_t i = 0; i < dynamic_branch_names.size(); ++i) { - const auto &subgraph = graph->GetSubgraph(dynamic_branch_names[i]); - GE_CHECK_NOTNULL(subgraph); - - const string batch_label = "Batch_" + std::to_string(i); - for (const auto &node : subgraph->GetDirectNode()) { - (void)AttrUtils::SetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label); - } - } - - return SUCCESS; -} - -/// /// @brief Replace & Combine SwitchN nodes /// @param [in] graph /// @param [out] pred_value @@ -131,10 +103,6 @@ Status MultiBatchPass::SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr /// Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchorPtr &pred_value) { for (const NodePtr &node : graph->GetDirectNode()) { - if (node->GetType() == CASE) { - GE_CHK_STATUS_RET(SetCaseLabel(graph, node), "Set batch label failed"); - continue; - } if (node->GetType() != SWITCHN) { continue; } diff --git a/ge/graph/passes/multi_batch_pass.h b/ge/graph/passes/multi_batch_pass.h index a714992a..1806229f 100644 --- a/ge/graph/passes/multi_batch_pass.h +++ b/ge/graph/passes/multi_batch_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,8 +43,7 @@ class MultiBatchPass : public GraphPass { bool CheckDims(const std::vector> &output_shape) const; NodePtr CreateSwitchCaseNode(const ComputeGraphPtr &graph, const std::string &name, - const OutDataAnchorPtr &pred_value, - const std::vector> &batch_shape, + const OutDataAnchorPtr &pred_value, const std::vector> &batch_shape, const std::vector> &combined_batch); Status BypassSwitchN(const NodePtr &switch_n_node, const NodePtr &switch_case_node); Status AttachLabel(const NodePtr &switch_case_node); @@ -54,15 +53,6 @@ class MultiBatchPass : public GraphPass { Status AttachLabelOnly(uint32_t batch_num); Status GetUserDesignateShape(); - /// - /// @ingroup ge - /// @brief Set batch label for Case mode. - /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. - /// @param [in] const NodePtr &case_node: Case Node. - /// @return 0: SUCCESS / others: FAILED - /// - Status SetCaseLabel(const ComputeGraphPtr &graph, const NodePtr &case_node); - std::vector switch_n_nodes_; std::vector bypass_nodes_; std::vector> batch_head_nodes_; diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index e3f2b71a..8ded625c 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,9 +34,9 @@ namespace ge { static std::map output_type_str_to_datatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, - {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, - {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, + {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, + {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; // the size of user defined output datatype or format string after split by ":". const size_t kUserDefinedElementCount = 2; @@ -436,7 +436,7 @@ Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraph Status NetOutputPass::CreateNetOutputNode(OpDescPtr &net_output_desc, const ge::ComputeGraphPtr &graph) { // Only flush subgraph name string node_name = - (graph->GetParentGraph() != nullptr) ? (graph->GetName() + "_" + NODE_NAME_NET_OUTPUT) : NODE_NAME_NET_OUTPUT; + (graph->GetParentGraph() != nullptr) ? (graph->GetName() + "_" + NODE_NAME_NET_OUTPUT) : NODE_NAME_NET_OUTPUT; net_output_desc = MakeShared(node_name, NETOUTPUT); if (net_output_desc == nullptr) { GELOGE(MEMALLOC_FAILED, "Make shared net output op failed."); @@ -629,7 +629,7 @@ Status NetOutputPass::SetUserDefDTypeAndFormatFromAtcParams(const NodePtr &outpu GELOGD("Add user-define datatype:%s to netoutput node.", TypeUtils::DataTypeToSerialString(output_data_type).c_str()); userdef_dtypes.push_back( - std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(output_data_type))); + std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(output_data_type))); continue; } // Output_node is not set,check if is_output_adjust_hw_layout is set @@ -638,7 +638,7 @@ Status NetOutputPass::SetUserDefDTypeAndFormatFromAtcParams(const NodePtr &outpu // Set DT_FLOAT16 & FORMAT_NC1HWC0 userdef_dtypes.push_back(std::to_string(index).append(":").append(TypeUtils::DataTypeToSerialString(DT_FLOAT16))); userdef_formats.push_back( - std::to_string(index).append(":").append(TypeUtils::FormatToSerialString(FORMAT_NC1HWC0))); + std::to_string(index).append(":").append(TypeUtils::FormatToSerialString(FORMAT_NC1HWC0))); } } if (!userdef_dtypes.empty() && !ge::AttrUtils::SetListStr(op_desc, ATTR_ATC_USER_DEFINE_DATATYPE, userdef_dtypes)) { diff --git a/ge/graph/passes/net_output_pass.h b/ge/graph/passes/net_output_pass.h index b959bd96..567d1246 100644 --- a/ge/graph/passes/net_output_pass.h +++ b/ge/graph/passes/net_output_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index 5cd0f29f..73b3b77e 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h index f8223c20..6f28a618 100755 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/no_use_reshape_remove_pass.cc b/ge/graph/passes/no_use_reshape_remove_pass.cc index 66a798a5..07f58417 100644 --- a/ge/graph/passes/no_use_reshape_remove_pass.cc +++ b/ge/graph/passes/no_use_reshape_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/no_use_reshape_remove_pass.h b/ge/graph/passes/no_use_reshape_remove_pass.h index c142d8d2..7ca36807 100755 --- a/ge/graph/passes/no_use_reshape_remove_pass.h +++ b/ge/graph/passes/no_use_reshape_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/parallel_concat_start_op_pass.cc b/ge/graph/passes/parallel_concat_start_op_pass.cc index 508d9b19..0ac26b91 100755 --- a/ge/graph/passes/parallel_concat_start_op_pass.cc +++ b/ge/graph/passes/parallel_concat_start_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/parallel_concat_start_op_pass.h b/ge/graph/passes/parallel_concat_start_op_pass.h index db9d235a..0f6e754a 100755 --- a/ge/graph/passes/parallel_concat_start_op_pass.h +++ b/ge/graph/passes/parallel_concat_start_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/pass_manager.cc b/ge/graph/passes/pass_manager.cc index 59ede66b..5be54f0a 100644 --- a/ge/graph/passes/pass_manager.cc +++ b/ge/graph/passes/pass_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ #include "omg/omg_inner_types.h" namespace ge { -const vector>& PassManager::GraphPasses() const { return names_to_graph_passes_; } +const vector> &PassManager::GraphPasses() const { return names_to_graph_passes_; } Status PassManager::AddPass(const string &pass_name, GraphPass *pass) { GE_CHECK_NOTNULL(pass); @@ -53,7 +53,7 @@ Status PassManager::Run(const ComputeGraphPtr &graph, vectorGetName().c_str()); return status; } - for (const auto &subgraph :graph->GetAllSubgraphs()) { + for (const auto &subgraph : graph->GetAllSubgraphs()) { GE_CHECK_NOTNULL(subgraph); GE_CHK_STATUS_RET(pass->ClearStatus(), "pass clear status failed for subgraph %s", subgraph->GetName().c_str()); string subgraph_pass_name = pass_name + "::" + graph->GetName(); diff --git a/ge/graph/passes/pass_utils.cc b/ge/graph/passes/pass_utils.cc index 5359ff63..a51b4e29 100644 --- a/ge/graph/passes/pass_utils.cc +++ b/ge/graph/passes/pass_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,8 +93,8 @@ Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, T *b GeTensorDesc output_tensor_desc(out_desc); output_tensor_desc.SetShape(out_shape); - GeTensorPtr output_tensor_ptr = MakeShared( - output_tensor_desc, reinterpret_cast(buf), sizeof(T) * len); + GeTensorPtr output_tensor_ptr = + MakeShared(output_tensor_desc, reinterpret_cast(buf), sizeof(T) * len); if (output_tensor_ptr == nullptr) { GELOGE(MEMALLOC_FAILED, "Make shared failed"); return MEMALLOC_FAILED; diff --git a/ge/graph/passes/pass_utils.h b/ge/graph/passes/pass_utils.h index fbfb3b47..b889a056 100755 --- a/ge/graph/passes/pass_utils.h +++ b/ge/graph/passes/pass_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ class PassUtils { static Status RemoveBranch(const NodePtr &node, std::vector &delete_nodes, std::vector &end_nodes); static Status RemoveInactiveBranchToMerge(const OutDataAnchorPtr &inactive_output_anchor, - std::vector &delete_nodes, std::vector &end_nodes); + std::vector &delete_nodes, std::vector &end_nodes); /// /// check is need iter flow ctrl. diff --git a/ge/graph/passes/permute_pass.cc b/ge/graph/passes/permute_pass.cc index 73d9a7f1..e55edbb2 100644 --- a/ge/graph/passes/permute_pass.cc +++ b/ge/graph/passes/permute_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,7 +43,7 @@ Status PermutePass::Run(ComputeGraphPtr graph) { /// Input format 5D means NHWC in 4D way. So if input origin foramt is NCHW and /// permute paramter list is [0,3,1,2], this permute can be optimised. GE_IF_BOOL_EXEC( - GetLocalOmgContext().format != DOMI_TENSOR_ND, + GetLocalOmgContext().format != DOMI_TENSOR_ND, // Get input origin foramt for (NodePtr &n : graph->GetDirectNode()) { diff --git a/ge/graph/passes/permute_pass.h b/ge/graph/passes/permute_pass.h index 9c4b911e..e4415b6e 100755 --- a/ge/graph/passes/permute_pass.h +++ b/ge/graph/passes/permute_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/placeholder_with_default_pass.cc b/ge/graph/passes/placeholder_with_default_pass.cc index 4c902322..7a72fc36 100644 --- a/ge/graph/passes/placeholder_with_default_pass.cc +++ b/ge/graph/passes/placeholder_with_default_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/placeholder_with_default_pass.h b/ge/graph/passes/placeholder_with_default_pass.h index f2b26933..d48a0a5a 100644 --- a/ge/graph/passes/placeholder_with_default_pass.h +++ b/ge/graph/passes/placeholder_with_default_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prevent_gradient_pass.cc b/ge/graph/passes/prevent_gradient_pass.cc index 402529c3..87c1b3a1 100644 --- a/ge/graph/passes/prevent_gradient_pass.cc +++ b/ge/graph/passes/prevent_gradient_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prevent_gradient_pass.h b/ge/graph/passes/prevent_gradient_pass.h index f1542c22..8fe02b96 100755 --- a/ge/graph/passes/prevent_gradient_pass.h +++ b/ge/graph/passes/prevent_gradient_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/print_op_pass.cc b/ge/graph/passes/print_op_pass.cc index 28b2332b..fba7b712 100755 --- a/ge/graph/passes/print_op_pass.cc +++ b/ge/graph/passes/print_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/print_op_pass.h b/ge/graph/passes/print_op_pass.h index deaf559b..15b0badc 100755 --- a/ge/graph/passes/print_op_pass.h +++ b/ge/graph/passes/print_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prune_pass.cc b/ge/graph/passes/prune_pass.cc index f5f4cbcb..af10c54f 100644 --- a/ge/graph/passes/prune_pass.cc +++ b/ge/graph/passes/prune_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/prune_pass.h b/ge/graph/passes/prune_pass.h index c8cf8247..4bc6f184 100755 --- a/ge/graph/passes/prune_pass.h +++ b/ge/graph/passes/prune_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/ref_identity_delete_op_pass.cc b/ge/graph/passes/ref_identity_delete_op_pass.cc index 95f710f2..5bc0fad6 100644 --- a/ge/graph/passes/ref_identity_delete_op_pass.cc +++ b/ge/graph/passes/ref_identity_delete_op_pass.cc @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "ref_identity_delete_op_pass.h" #include diff --git a/ge/graph/passes/ref_identity_delete_op_pass.h b/ge/graph/passes/ref_identity_delete_op_pass.h index 8363528e..3e42def4 100644 --- a/ge/graph/passes/ref_identity_delete_op_pass.h +++ b/ge/graph/passes/ref_identity_delete_op_pass.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef GE_GRAPH_PASSES_REF_IDENTITY_DELETE_OP_PASS_H_ #define GE_GRAPH_PASSES_REF_IDENTITY_DELETE_OP_PASS_H_ diff --git a/ge/graph/passes/remove_nodes_pass.cc b/ge/graph/passes/remove_nodes_pass.cc index c238f003..b29d6af3 100644 --- a/ge/graph/passes/remove_nodes_pass.cc +++ b/ge/graph/passes/remove_nodes_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "remove_nodes_pass.h" #include "debug/ge_log.h" #include "inc/framework/common/util.h" diff --git a/ge/graph/passes/remove_nodes_pass.h b/ge/graph/passes/remove_nodes_pass.h index 1d4fced9..32acda1b 100644 --- a/ge/graph/passes/remove_nodes_pass.h +++ b/ge/graph/passes/remove_nodes_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_REMOVE_NODES_PASS_H_ #define GE_REMOVE_NODES_PASS_H_ #include "graph/passes/base_pass.h" @@ -29,4 +30,4 @@ class RemoveNodesPass : public BaseNodePass { std::map> remove_node_attr_names_to_arg_; }; } // namespace ge -#endif //GE_REMOVE_NODES_PASS_H_ +#endif // GE_REMOVE_NODES_PASS_H_ diff --git a/ge/graph/passes/replace_transshape_pass.cc b/ge/graph/passes/replace_transshape_pass.cc index 9004df4e..28a8244d 100644 --- a/ge/graph/passes/replace_transshape_pass.cc +++ b/ge/graph/passes/replace_transshape_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -96,7 +96,7 @@ Status ReplaceTransShapePass::ReplaceTransShapeNode(ComputeGraphPtr &graph, Node void ReplaceTransShapePass::CopyControlEdges(NodePtr &old_node, NodePtr &new_node, bool input_check_flag) { GE_CHECK_NOTNULL_JUST_RETURN(old_node); GE_CHECK_NOTNULL_JUST_RETURN(new_node); - GE_IF_BOOL_EXEC(old_node == new_node, return); + GE_IF_BOOL_EXEC(old_node == new_node, return ); for (NodePtr &node : old_node->GetInControlNodes()) { auto out_control_anchor = node->GetOutControlAnchor(); GE_IF_BOOL_EXEC(!out_control_anchor->IsLinkedWith(new_node->GetInControlAnchor()), { @@ -133,8 +133,8 @@ void ReplaceTransShapePass::RemoveControlEdges(NodePtr &node) { } void ReplaceTransShapePass::ReplaceControlEdges(NodePtr &old_node, NodePtr &new_node) { - GE_IF_BOOL_EXEC(old_node == new_node, return); + GE_IF_BOOL_EXEC(old_node == new_node, return ); CopyControlEdges(old_node, new_node); RemoveControlEdges(old_node); } -} +} // namespace ge diff --git a/ge/graph/passes/replace_transshape_pass.h b/ge/graph/passes/replace_transshape_pass.h index 0620ed2d..6673b11d 100644 --- a/ge/graph/passes/replace_transshape_pass.h +++ b/ge/graph/passes/replace_transshape_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/replace_with_empty_const_pass.cc b/ge/graph/passes/replace_with_empty_const_pass.cc index 171c76d0..212b1979 100644 --- a/ge/graph/passes/replace_with_empty_const_pass.cc +++ b/ge/graph/passes/replace_with_empty_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,7 +89,7 @@ Status ReplaceWithEmptyConstPass::ReplaceWithEmptyConst(NodePtr &node_to_replace } // Repalce data anchors - for (const auto &anchor_idx: shape_2_out_idx.second) { + for (const auto &anchor_idx : shape_2_out_idx.second) { if (GraphUtils::ReplaceNodeDataAnchors(const_node, node_to_replace, {}, {anchor_idx}) != GRAPH_SUCCESS) { GELOGE(FAILED, "[%s] ReplaceNodeAnchors failed.", node_to_replace->GetName().c_str()); return FAILED; diff --git a/ge/graph/passes/replace_with_empty_const_pass.h b/ge/graph/passes/replace_with_empty_const_pass.h index 5083c699..495b75b3 100644 --- a/ge/graph/passes/replace_with_empty_const_pass.h +++ b/ge/graph/passes/replace_with_empty_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc index f0987ff5..013c8af4 100644 --- a/ge/graph/passes/reshape_recovery_pass.cc +++ b/ge/graph/passes/reshape_recovery_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/reshape_recovery_pass.h" #include "common/ge/ge_util.h" @@ -49,8 +50,8 @@ Status InsertReshapeIfNeed(const NodePtr &node) { GE_CHECK_NOTNULL(src_tensor); for (auto dst_anchor : src_anchor->GetPeerInDataAnchors()) { auto dst_node = dst_anchor->GetOwnerNode(); - GELOGD("Try insert reshape between %s[%d] and %s[%d] to keep the shape continues", - node->GetName().c_str(), src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx()); + GELOGD("Try insert reshape between %s[%d] and %s[%d] to keep the shape continues", node->GetName().c_str(), + src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx()); GE_CHECK_NOTNULL(dst_node); GE_CHECK_NOTNULL(dst_node->GetOpDesc()); auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); @@ -63,12 +64,12 @@ Status InsertReshapeIfNeed(const NodePtr &node) { GE_CHECK_NOTNULL(reshape); auto ret = GraphUtils::InsertNodeBetweenDataAnchors(src_anchor, dst_anchor, reshape); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to insert reshape between node %s and %s", - node->GetName().c_str(), dst_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "Failed to insert reshape between node %s and %s", node->GetName().c_str(), + dst_node->GetName().c_str()); return INTERNAL_ERROR; } - GELOGI("Insert reshape between %s and %s to keep the shape continues", - node->GetName().c_str(), dst_node->GetName().c_str()); + GELOGI("Insert reshape between %s and %s to keep the shape continues", node->GetName().c_str(), + dst_node->GetName().c_str()); } } } diff --git a/ge/graph/passes/reshape_recovery_pass.h b/ge/graph/passes/reshape_recovery_pass.h index f16d5efb..b3ab1baa 100644 --- a/ge/graph/passes/reshape_recovery_pass.h +++ b/ge/graph/passes/reshape_recovery_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_RESHAPE_RECOVERY_PASS_H_ #define GE_RESHAPE_RECOVERY_PASS_H_ #include "inc/graph_pass.h" @@ -23,4 +24,4 @@ class ReshapeRecoveryPass : public GraphPass { }; } // namespace ge -#endif //GE_RESHAPE_RECOVERY_PASS_H_ +#endif // GE_RESHAPE_RECOVERY_PASS_H_ diff --git a/ge/graph/passes/reshape_remove_pass.cc b/ge/graph/passes/reshape_remove_pass.cc index ffa6a485..0f6d52d1 100755 --- a/ge/graph/passes/reshape_remove_pass.cc +++ b/ge/graph/passes/reshape_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,7 @@ Status ReshapeRemovePass::Run(NodePtr &node) { bool is_shape_unknown = false; if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, can not be deleted.", - node->GetName().c_str()); + GELOGI("op:%s is unknown shape, can not be deleted.", node->GetName().c_str()); return SUCCESS; } } diff --git a/ge/graph/passes/reshape_remove_pass.h b/ge/graph/passes/reshape_remove_pass.h index c89caf86..044bbdb7 100644 --- a/ge/graph/passes/reshape_remove_pass.h +++ b/ge/graph/passes/reshape_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_add_control_pass.cc b/ge/graph/passes/resource_pair_add_control_pass.cc index 432bff9e..bba8ee71 100755 --- a/ge/graph/passes/resource_pair_add_control_pass.cc +++ b/ge/graph/passes/resource_pair_add_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_add_control_pass.h b/ge/graph/passes/resource_pair_add_control_pass.h index 5e1a4465..02ebd78f 100644 --- a/ge/graph/passes/resource_pair_add_control_pass.h +++ b/ge/graph/passes/resource_pair_add_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_remove_control_pass.cc b/ge/graph/passes/resource_pair_remove_control_pass.cc index 83fc7081..00d97798 100755 --- a/ge/graph/passes/resource_pair_remove_control_pass.cc +++ b/ge/graph/passes/resource_pair_remove_control_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/resource_pair_remove_control_pass.h b/ge/graph/passes/resource_pair_remove_control_pass.h index 80f6b3ef..ab40b130 100644 --- a/ge/graph/passes/resource_pair_remove_control_pass.h +++ b/ge/graph/passes/resource_pair_remove_control_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/same_transdata_breadth_fusion_pass.cc b/ge/graph/passes/same_transdata_breadth_fusion_pass.cc index 5709dcb7..2146a35d 100644 --- a/ge/graph/passes/same_transdata_breadth_fusion_pass.cc +++ b/ge/graph/passes/same_transdata_breadth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -223,7 +223,7 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkDataOutput2PreNode(const NodeP } graphStatus SameTransdataBreadthFusionPass::ReLinkOutDataPeerInControlNodes2PreNode( - const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor) { + const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor) { GE_CHECK_NOTNULL(pre_out_anchor); GE_CHECK_NOTNULL(transdata_node); auto transdata_peer_out_control_anchor = pre_out_anchor->GetOwnerNode()->GetOutControlAnchor(); @@ -278,8 +278,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkTransdataOutput2PreNode(const } graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInControlAnchors( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { GE_CHECK_NOTNULL(transdata_node_keep); GE_CHECK_NOTNULL(pre_out_anchor); auto out_control_anchor = transdata_node_keep->GetOutControlAnchor(); @@ -315,8 +315,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInControlAnchors } graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInDataAnchors( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { GE_CHECK_NOTNULL(transdata_node_keep); GE_CHECK_NOTNULL(pre_out_anchor); auto out_control_anchor = transdata_node_keep->GetOutControlAnchor(); @@ -354,8 +354,8 @@ graphStatus SameTransdataBreadthFusionPass::ReLinkOutControlPeerInDataAnchors( } graphStatus SameTransdataBreadthFusionPass::ReLinkTransdataControlOutput2PreNode( - const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, - const OutControlAnchorPtr &transdata_peer_out_control_anchor) { + const NodePtr &transdata_node_keep, const OutDataAnchorPtr &pre_out_anchor, + const OutControlAnchorPtr &transdata_peer_out_control_anchor) { if (ReLinkOutControlPeerInControlAnchors(transdata_node_keep, pre_out_anchor, transdata_peer_out_control_anchor) != GRAPH_SUCCESS) { return GRAPH_FAILED; @@ -595,8 +595,8 @@ void SameTransdataBreadthFusionPass::CopyTensorDesc(const ConstGeTensorDescPtr & } graphStatus SameTransdataBreadthFusionPass::LinkNewCastNode2RemainTransdata( - const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, - const NodePtr &transdata_node_keep) { + const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, + const NodePtr &transdata_node_keep) { for (size_t i = 1; i < same_transdata_nodes.size(); ++i) { int anchors_index = same_transdata_nodes[i]; bool reuse_nodes = AllNodeBeforeTransdataHasOneDataOut(anchors_index); @@ -734,9 +734,8 @@ graphStatus SameTransdataBreadthFusionPass::AddCastNode(const ComputeGraphPtr &g } graphStatus SameTransdataBreadthFusionPass::GetSubGraphsBetweenNormalAndTransdataNode( - OutDataAnchorPtr &out_anchor, - std::vector>> &sub_graphs_out, - std::vector> &nodes_list) { + OutDataAnchorPtr &out_anchor, std::vector>> &sub_graphs_out, + std::vector> &nodes_list) { graphStatus ret = GRAPH_SUCCESS; if (out_anchor == nullptr) { GELOGE(GRAPH_FAILED, "out data anchor is null!This should not happen!"); diff --git a/ge/graph/passes/same_transdata_breadth_fusion_pass.h b/ge/graph/passes/same_transdata_breadth_fusion_pass.h index 92e559a0..a6a3bb26 100755 --- a/ge/graph/passes/same_transdata_breadth_fusion_pass.h +++ b/ge/graph/passes/same_transdata_breadth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,9 +34,10 @@ class SameTransdataBreadthFusionPass : public GraphPass { private: graphStatus ExtractTransNode(const ComputeGraphPtr &graph); - graphStatus GetSubGraphsBetweenNormalAndTransdataNode(OutDataAnchorPtr &out_anchor, - std::vector>> &sub_graphs_out, - std::vector> &nodes_list); + graphStatus GetSubGraphsBetweenNormalAndTransdataNode( + OutDataAnchorPtr &out_anchor, + std::vector>> &sub_graphs_out, + std::vector> &nodes_list); void GetSubGraphNodesInfo(); @@ -44,9 +45,7 @@ class SameTransdataBreadthFusionPass : public GraphPass { std::set GetInControlIdentityNodes(const NodePtr &node, int subgraph_index); OpDescPtr GetCastOp(const GeTensorDesc &in_desc, const GeTensorDesc &out_desc); - graphStatus AddCastNode(const ComputeGraphPtr &graph, - int anchors_index, - OutDataAnchorPtr &pre_out_anchor, + graphStatus AddCastNode(const ComputeGraphPtr &graph, int anchors_index, OutDataAnchorPtr &pre_out_anchor, NodePtr &first_link_node); void GetSameTransdataNode(vector &same_transdata_nodes); @@ -54,12 +53,10 @@ class SameTransdataBreadthFusionPass : public GraphPass { graphStatus ReLinkTransdataOutput2PreNode(const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor, const NodePtr &relink_node); - graphStatus RelinkTransdataControlEdge(ComputeGraphPtr graph, - NodePtr transdata_node_remove, + graphStatus RelinkTransdataControlEdge(ComputeGraphPtr graph, NodePtr transdata_node_remove, NodePtr transdata_node_keep); - graphStatus LinkNewCastNode2RemainTransdata(const ComputeGraphPtr &graph, - const vector &same_transdata_nodes, + graphStatus LinkNewCastNode2RemainTransdata(const ComputeGraphPtr &graph, const vector &same_transdata_nodes, const OutDataAnchorPtr &transdata_out_anchor, const NodePtr &transdata_node_keep); @@ -79,8 +76,7 @@ class SameTransdataBreadthFusionPass : public GraphPass { graphStatus RelinkInControlEdge(const NodePtr &node_src, const NodePtr &node_dst); - graphStatus ReLinkDataOutput2PreNode(const NodePtr &transdata_node, - const OutDataAnchorPtr &pre_out_anchor, + graphStatus ReLinkDataOutput2PreNode(const NodePtr &transdata_node, const OutDataAnchorPtr &pre_out_anchor, const NodePtr &relink_node); graphStatus ReLinkOutDataPeerInControlNodes2PreNode(const NodePtr &transdata_node, diff --git a/ge/graph/passes/save_pass.cc b/ge/graph/passes/save_pass.cc index a2e34b1d..49196206 100755 --- a/ge/graph/passes/save_pass.cc +++ b/ge/graph/passes/save_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,7 +47,8 @@ Status SavePass::Run(ge::ComputeGraphPtr graph) { out_index.emplace_back(out_anchor->GetIdx()); ge::OpDescPtr op_desc = peer_node->GetOpDesc(); GE_IF_BOOL_EXEC(!ge::AttrUtils::SetStr(op_desc, kVarAttrVarIsSave, kVarIsSave), - GELOGE(INTERNAL_ERROR, "get kVarAttrVarIsSave failed"); return INTERNAL_ERROR); + GELOGE(INTERNAL_ERROR, "get kVarAttrVarIsSave failed"); + return INTERNAL_ERROR); } } } diff --git a/ge/graph/passes/save_pass.h b/ge/graph/passes/save_pass.h index 512dfa62..ce8c8a7a 100755 --- a/ge/graph/passes/save_pass.h +++ b/ge/graph/passes/save_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/set_input_output_offset_pass.cc b/ge/graph/passes/set_input_output_offset_pass.cc index beac831c..58c3be85 100644 --- a/ge/graph/passes/set_input_output_offset_pass.cc +++ b/ge/graph/passes/set_input_output_offset_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,9 +80,9 @@ Status SetInputOutputOffsetPass::SetInputOffsetForFusion(const std::vectorGetName().c_str(), parent_index, data->GetName().c_str()); } } for (const auto &data_nodes : graph_nodes) { if (data_nodes.second.size() != graph_nodes.begin()->second.size()) { - GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]", - data_nodes.first->GetName().c_str(), data_nodes.second.size(), graph_nodes.begin()->second.size()); + GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]", data_nodes.first->GetName().c_str(), + data_nodes.second.size(), graph_nodes.begin()->second.size()); return FAILED; } } @@ -195,8 +195,8 @@ bool SubexpressionMigrationPass::GetAssociatedNodes(const NodePtr &node, mapGetInDataAnchor(i); const auto &out_anchor = in_anchor->GetPeerOutAnchor(); if (out_anchor == nullptr) { - inputs[i] = kInvalidParent; - continue; + inputs[i] = kInvalidParent; + continue; } // Has none Data input node, Can not move to parent. @@ -302,7 +302,7 @@ Status SubexpressionMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra continue; } - GELOGI("Move to parent: %s, parent index: %u", base_node->GetName().c_str(), base_idx); + GELOGI("Move to parent: %s", base_node->GetName().c_str()); if (AppendParallelNode(graph_nodes, func_node, outputs) != SUCCESS) { return FAILED; } @@ -335,12 +335,12 @@ Status SubexpressionMigrationPass::AppendParallelNode(map append_num; for (auto &groups : graph_nodes) { const auto &subgraph = groups.first; auto &data_nodes = groups.second; - item.second = func_node->GetAllInDataAnchorsSize() + append_num[subgraph]; // Update to valid parent index. + uint32_t data_index = data_nodes.size(); + item.second = data_index + kCaseInputBase; // Update to valid parent index. std::string data_name = subgraph->GetName() + "_data_" + std::to_string(item.second); OpDescBuilder op_builder(data_name, DATA); @@ -350,7 +350,6 @@ Status SubexpressionMigrationPass::AppendParallelNode(mapGetName().c_str()); return FAILED; @@ -361,13 +360,11 @@ Status SubexpressionMigrationPass::AppendParallelNode(mapAddNode(op_desc); - GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), item.second); } // Add InputTensor to functional Node. - GE_CHK_GRAPH_STATUS_RET(NodeUtils::AppendInputAnchor(func_node, item.second + 1), "Append input failed"); + NodeUtils::AppendInputAnchor(func_node, item.second + 1); migration_append_ = true; } @@ -388,7 +385,7 @@ Status SubexpressionMigrationPass::DetachParallelNode(const mapGetAllInDataAnchors()) { const auto &out_anchor = in_anchor->GetPeerOutAnchor(); if (out_anchor == nullptr) { - continue; + continue; } GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); @@ -415,12 +412,12 @@ Status SubexpressionMigrationPass::DetachParallelNode(const mapGetOpDesc()->GetOutputDesc(i); const auto &data_desc = data_node->GetOpDesc(); - (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc); // Set Data Input to new connect Node. - (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc); // Set Data Output to new connect Node. + (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc); // Set Data Input to new connect Node. + (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc); // Set Data Output to new connect Node. for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { if (in_anchor == nullptr) { - continue; + continue; } GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); const auto &owner_node = in_anchor->GetOwnerNode(); @@ -455,7 +452,7 @@ Status SubexpressionMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra GELOGE(FAILED, "Node: %s parent index %u not found", attach->GetName().c_str(), i); return FAILED; } - if (it_idx->second == kInvalidParent) { // Not connect, Skip. + if (it_idx->second == kInvalidParent) { // Not connect, Skip. continue; } @@ -471,13 +468,13 @@ Status SubexpressionMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra if (it_idx == outputs.end()) { return FAILED; } - if (it_idx->second == kInvalidParent) { // Not connect, Skip. + if (it_idx->second == kInvalidParent) { // Not connect, Skip. continue; } const auto &out_desc = attach->GetOpDesc()->GetOutputDesc(i); const auto &func_desc = func_node->GetOpDesc(); - (void)func_desc->UpdateInputDesc(it_idx->second, out_desc); // Set Data Input to new connect Node. + (void)func_desc->UpdateInputDesc(it_idx->second, out_desc); // Set Data Input to new connect Node. const auto &in_anchor = func_node->GetInDataAnchor(it_idx->second); const auto &out_anchor = in_anchor->GetPeerOutAnchor(); diff --git a/ge/graph/passes/subexpression_migration_pass.h b/ge/graph/passes/subexpression_migration_pass.h index d2733fcf..fbe28cae 100755 --- a/ge/graph/passes/subexpression_migration_pass.h +++ b/ge/graph/passes/subexpression_migration_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,8 @@ #include #include -using std::set; using std::map; +using std::set; namespace ge { class SubexpressionMigrationPass : public GraphPass { @@ -64,8 +64,8 @@ class SubexpressionMigrationPass : public GraphPass { /// @param [in] anchor_idx: Anchor index of node. /// @return true: Same / false: not same /// - bool IsParallelNodeSame(const map> &graph_nodes, - const NodePtr &base_node, uint32_t node_idx, uint32_t anchor_idx); + bool IsParallelNodeSame(const map> &graph_nodes, const NodePtr &base_node, + uint32_t node_idx, uint32_t anchor_idx); /// /// @ingroup ge @@ -78,8 +78,8 @@ class SubexpressionMigrationPass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node, - map> &graph_nodes, - const NodePtr &data_base, uint32_t data_idx); + map> &graph_nodes, const NodePtr &data_base, + uint32_t data_idx); /// /// @ingroup ge @@ -104,8 +104,8 @@ class SubexpressionMigrationPass : public GraphPass { /// @param [in] outputs: Parent index of Node output. /// @return 0: SUCCESS / others: FAILED /// - Status AppendParallelNode(map> &graph_nodes, - const NodePtr &func_node, map &outputs); + Status AppendParallelNode(map> &graph_nodes, const NodePtr &func_node, + map &outputs); /// /// @ingroup ge diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc deleted file mode 100644 index 579b2424..00000000 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ /dev/null @@ -1,569 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "subgraph_const_migration_pass.h" - -#include "graph/utils/node_utils.h" -#include "ge_local_engine/engine/host_cpu_engine.h" -#include "graph/passes/folding_pass.h" - -namespace ge { -constexpr uint32_t kDataOutIndex = 0; -constexpr uint32_t kCaseInputBase = 1; -constexpr uint32_t kInvalidParent = 0x7fffffffU; - -bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) { - if ((src_node == nullptr) && (dst_node == nullptr)) { - return true; - } - - if ((src_node == nullptr) || (dst_node == nullptr)) { - return false; - } - - if (src_node->GetType() != dst_node->GetType()) { - return false; - } - - if ((src_node->GetInControlNodes().size() != dst_node->GetInControlNodes().size()) || - (src_node->GetOutDataNodesSize() != dst_node->GetOutDataNodesSize())) { - return false; - } - - set related_parent; - const auto in_nodes = src_node->GetInControlNodes(); - for (uint32_t i = 0; i < in_nodes.size(); ++i) { - const auto owner_node = in_nodes.at(i); - uint32_t parent_index = 0; - if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - return false; - } - - related_parent.insert(parent_index); - } - - for (const auto &in_node : dst_node->GetInControlNodes()) { - uint32_t parent_index = 0; - if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - return false; - } - - if (related_parent.count(parent_index) == 0) { - return false; - } - } - - return true; -} - -/*********************************************************************************************************************** - +-----------+ - | Data | - +-----------+ - | - | - +-----------+ - | Cast | - +-----------+ - | - | - +-----------+ +-----------+ +-----------+ - | TransData | | Data | | Data | - +-----------+ +-----------+ +-----------+ - \ | / - \ | / - \ | / - \ | / - +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ - | Data | | Data | | Data | | Data | | Data | | Data | | Conv2D | - +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ - \ \ | / / | | - \ \ | / / | | - \ \ | / / | | - \ \ | / / | | - \ +-----------+ / | +-----------+ - +---------------| Const |----------------+ | | Pooling | - +-----------+ | +-----------+ - \ | / - \ | / - \ +-----------+ / - +-----------------------------------| Conv2D |------+ - +-----------+ - | - | - +-----------+ - | Node | - +-----------+ -***********************************************************************************************************************/ -Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) { - GE_CHECK_NOTNULL(graph); - if (graph->GetParentGraph() != nullptr) { - GELOGD("Subgraph %s skip the SubgraphConstMigrationPass", graph->GetName().c_str()); - return SUCCESS; - } - - GELOGD("Begin to run Subgraph Const Migration on graph: %s", graph->GetName().c_str()); - for (const auto &node : graph->GetDirectNode()) { - if (node->GetType() != CASE) { - continue; - } - - const auto &func_desc = node->GetOpDesc(); - if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { - GELOGD("Not multi-batch, Skip Case: %s", node->GetName().c_str()); - continue; - } - - do { - migration_append_ = false; - map> graph_datas; - if (ClassifyDataNodes(graph, func_desc, graph_datas) != SUCCESS) { - return FAILED; - } - - if (graph_datas.empty()) { - GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str()); - break; - } - - // {subgraph0, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}} - // {subgraph1, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}} - // {subgraph2, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}} - const auto base_nodes = graph_datas.begin()->second; // Need copy. - for (const auto &node_item : base_nodes) { - if (GraphNodeMigration(graph, node, graph_datas, node_item.second, node_item.first) != SUCCESS) { - return FAILED; - } - } - } while (migration_append_); - } - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief Get all Data nodes for all subgraph. -/// @param [in] graph: Root compute graph. -/// @param [in] func_desc: functional OpDesc of Case. -/// @param [out] graph_datas: Data groups of subgraph. -/// @return 0: SUCCESS / others: FAILED -/// -Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc, - map> &graph_datas) { - for (const auto &name : func_desc->GetSubgraphInstanceNames()) { - const auto &subgraph = graph->GetSubgraph(name); - if (subgraph == nullptr) { - GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s", name.c_str()); - return GE_GRAPH_EMPTY_SUBGRAPH; - } - - auto &data_nodes = graph_datas[subgraph]; - for (auto &data : subgraph->GetDirectNode()) { - if (data->GetType() != DATA) { - continue; - } - - uint32_t parent_index = 0; - if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(FAILED, "Parent index not found, name: %s", data->GetName().c_str()); - return FAILED; - } - - data_nodes[parent_index] = data; - GELOGD("%s, Parent index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, data->GetName().c_str()); - } - } - - auto iter = graph_datas.begin(); - if (iter == graph_datas.end()) { - return SUCCESS; - } - for (const auto &data_nodes : graph_datas) { - if (data_nodes.second.size() != iter->second.size()) { - GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]", - data_nodes.first->GetName().c_str(), data_nodes.second.size(), iter->second.size()); - return FAILED; - } - } - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief Get all Data nodes for all subgraph. -/// @param [in] node: Const node of subgraph. -/// @param [out] inputs: parent index to Const. -/// @param [out] outputs: Data groups of subgraph. -/// @return true: SUCCESS / false: FAILED -/// -bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map &inputs, - map &outputs) { - for (uint32_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) { - outputs[i] = kInvalidParent; - } - - uint32_t out_index = 0; - const auto in_nodes = node->GetInAllNodes(); - for (size_t i = 0; i < in_nodes.size(); ++i) { - const auto owner_node = in_nodes.at(i); - if (owner_node->GetType() != DATA) { - return false; - } - - uint32_t parent_index = 0; - if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - return false; - } - - // Input Data feed other Node, need add new Data. - inputs[i] = parent_index; - if ((out_index == outputs.size()) && owner_node->GetOutDataNodes().empty()) { - outputs[out_index] = parent_index; - ++out_index; - } - } - - return true; -} - -/// -/// @ingroup ge -/// @brief Get all Data nodes for all subgraph. -/// @param [in] graph_nodes: Data groups of subgraph. -/// @param [in] data_base: Data Node for migration. -/// @param [in] data_idx: Data groups of subgraph. -/// @param [in] data_idx: Data groups of subgraph. -/// @return true: Same / false: not same -/// -bool SubgraphConstMigrationPass::IsParallelNodeSame(const map> &graph_datas, - const NodePtr &const_node, uint32_t parent_index, size_t index) { - auto it = graph_datas.begin(); - for (++it; it != graph_datas.end(); ++it) { - const auto &data_nodes = it->second; - auto data_it = data_nodes.find(parent_index); - if (data_it == data_nodes.end()) { - GELOGE(FAILED, "Data: %s not fount, index: %u", const_node->GetName().c_str(), parent_index); - return false; - } - - const auto &work_data = data_it->second; - const auto &out_anchor = work_data->GetOutControlAnchor(); - const auto &in_anchors = out_anchor->GetPeerInControlAnchors(); - if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) { - GELOGW("Node anchors not same, Data: %s -> %s anchor size: %zu, index: %zu", - work_data->GetName().c_str(), const_node->GetName().c_str(), in_anchors.size(), index); - return false; - } - - const auto &in_anchor = in_anchors.at(index); - const auto &work_node = in_anchor->GetOwnerNode(); - if (work_node == nullptr) { - GELOGE(FAILED, "Data: %s not found, parent: %u, index: %zu", const_node->GetName().c_str(), parent_index, index); - return false; - } - - if (!IsSameOpNode(const_node, work_node)) { - GELOGI("OpDesc not same: %s %s, parent: %u, index: %zu", - const_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, index); - return false; - } - } - - return true; -} - -/// -/// @ingroup ge -/// @brief Migration subgraph Node to Root -/// @param [in] graph: Root compute graph. -/// @param [in] func_node: functional Node of Case. -/// @param [in] graph_nodes: Data groups of subgraph. -/// @param [in] data_base: Data Node for migration. -/// @param [in] data_idx: Data groups of subgraph. -/// @return 0: SUCCESS / others: FAILED -/// -Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node, - map> &graph_datas, - const NodePtr &data_node, uint32_t parent_index) { - bool can_extrapolation = false; - do { - can_extrapolation = false; - const auto &out_anchor = data_node->GetOutControlAnchor(); - const auto &in_anchors = out_anchor->GetPeerInControlAnchors(); - for (size_t i = in_anchors.size(); i > 0; --i) { - const auto &in_anchor = in_anchors.at(i - 1); - const auto &work_node = in_anchor->GetOwnerNode(); - GELOGD("Data: %s, node: %s, parent: %u, index: %zu", - data_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, i); - if (work_node->GetType() != CONSTANT) { - continue; - } - - // Get associated Data, if Data feed other nodes, need append new Data. - map inputs; - map outputs; - if (!GetAssociatedNodes(work_node, inputs, outputs)) { - continue; - } - - if (!IsParallelNodeSame(graph_datas, work_node, parent_index, i - 1)) { - continue; - } - - GELOGI("Move node: %s, parent: %u, index: %zu", work_node->GetName().c_str(), parent_index, i); - if (AppendParallelNode(graph_datas, func_node, outputs) != SUCCESS) { - return FAILED; - } - - if (MoveNodeToParent(graph, func_node, graph_datas, parent_index, i - 1, inputs, outputs) != SUCCESS) { - return FAILED; - } - can_extrapolation = true; - break; - } - } while (can_extrapolation); - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief Append Input Tensor for functional node. -/// @param [in] graph_nodes: Data groups of subgraph. -/// @param [in] func_node: functional Node of Case. -/// @param [in] outputs: Parent index of Node output. -/// @return 0: SUCCESS / others: FAILED -/// -Status SubgraphConstMigrationPass::AppendParallelNode(map> &graph_datas, - const NodePtr &func_node, map &outputs) { - // If outputs index invalid, add Data and Input Tensor. - for (auto &item : outputs) { - if (item.second != kInvalidParent) { - continue; - } - - // Add Data to subgraph. - map append_num; - for (auto &groups : graph_datas) { - const auto &subgraph = groups.first; - auto &data_nodes = groups.second; - - item.second = func_node->GetAllInDataAnchorsSize() + append_num[subgraph]; // Update to valid parent index. - const auto data_name = subgraph->GetName() + "_data_" + std::to_string(item.second); - - OpDescBuilder op_builder(data_name, DATA); - const OpDescPtr op_desc = op_builder.AddInput("x").AddOutput("y").Build(); - if (op_desc == nullptr) { - GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed"); - return OUT_OF_MEMORY; - } - - uint32_t data_index = item.second - kCaseInputBase; - if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) { - GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str()); - return FAILED; - } - - if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, item.second)) { - GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str()); - return FAILED; - } - - append_num[subgraph]++; - data_nodes[item.second] = subgraph->AddNode(op_desc); - GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), item.second); - } - - // Add InputTensor to functional Node. - NodeUtils::AppendInputAnchor(func_node, item.second + 1); - } - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief Delete Node from all subgraph. -/// @param [in] graph_nodes: Data groups of subgraph. -/// @param [in] detach: Node will move to parent. -/// @param [in] outputs: Parent index of Node output. -/// @return 0: SUCCESS / others: FAILED -/// -Status SubgraphConstMigrationPass::DetachParallelNode(const map &graph_datas, const NodePtr &detach, - const map &outputs) { - // Break Data and Move node. - const auto &in_anchor = detach->GetInControlAnchor(); - const auto &out_anchors = in_anchor->GetPeerOutControlAnchors(); - for (size_t i = out_anchors.size(); i > 0; --i) { - const auto &out_anchor = out_anchors.at(i - 1); - GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); - const auto &owner_node = out_anchor->GetOwnerNode(); - GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), detach->GetName().c_str()); - } - - // Break Move and follow, Link Data and follow. - for (uint32_t i = 0; i < detach->GetAllOutDataAnchorsSize(); ++i) { - auto it_idx = outputs.find(i); - if (it_idx == outputs.end()) { - GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i); - return FAILED; - } - - auto it_data = graph_datas.find(it_idx->second); - if (it_data == graph_datas.end()) { - GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i); - return FAILED; - } - - const auto &data_node = it_data->second; - const auto &out_anchor = detach->GetOutDataAnchor(i); - - const auto &out_desc = detach->GetOpDesc()->GetOutputDesc(i); - const auto &data_desc = data_node->GetOpDesc(); - (void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc); // Set Data Input to new connect Node. - (void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc); // Set Data Output to new connect Node. - - for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { - if (in_anchor == nullptr) { - continue; - } - GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); - const auto &owner_node = in_anchor->GetOwnerNode(); - GELOGI("Remove Edge: %s %s", detach->GetName().c_str(), owner_node->GetName().c_str()); - - const auto &data_out_anchor = data_node->GetOutDataAnchor(kDataOutIndex); - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_out_anchor, in_anchor), "Add edge failed"); - GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), owner_node->GetName().c_str()); - } - } - - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief Move Node to Parent Graph. -/// @param [in] graph: Parent compute graph. -/// @param [in] func_node: functional Node of Case. -/// @param [in] attach: Node will move to parent. -/// @param [in] inputs: Parent index of Node input. -/// @param [in] outputs: Parent index of Node output. -/// @return 0: SUCCESS / others: FAILED -/// -Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, - const NodePtr &attach, const map &inputs, - const map &outputs) { - GE_CHECK_NOTNULL(attach); - for (const auto item : inputs) { - if (item.second == kInvalidParent) { // Not connect, Skip. - continue; - } - - const auto &in_anchor = func_node->GetInDataAnchor(item.second); - const auto &out_anchor = in_anchor->GetPeerOutAnchor(); - const auto &owner_node = out_anchor->GetOwnerNode(); - const auto &in_control = attach->GetInControlAnchor(); - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(owner_node->GetOutControlAnchor(), in_control), "Add edge failed"); - GELOGI("Add Edge: %s %s", owner_node->GetName().c_str(), attach->GetName().c_str()); - } - - for (const auto &item : outputs) { - const auto &func_desc = func_node->GetOpDesc(); - const auto &out_desc = attach->GetOpDesc()->GetOutputDesc(item.second); - (void)func_desc->UpdateInputDesc(item.second, out_desc); // Set Data Input to new connect Node. - - const auto &in_anchor = func_node->GetInDataAnchor(item.second); - const auto &out_anchor = in_anchor->GetPeerOutAnchor(); - if (out_anchor != nullptr) { - GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed"); - const auto &owner_node = out_anchor->GetOwnerNode(); - GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str()); - } - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(attach->GetOutDataAnchor(item.first), in_anchor), "Add edge failed"); - GELOGI("Add Edge: %s %s", attach->GetName().c_str(), func_node->GetName().c_str()); - } - - (void)graph->AddNode(attach); - (void)attach->SetOwnerComputeGraph(graph); - GELOGI("Add Node: %s %s", graph->GetName().c_str(), attach->GetName().c_str()); - return SUCCESS; -} - -/// -/// @ingroup ge -/// @brief Move node to Parent graph. -/// @param [in] graph: Root compute graph. -/// @param [in] func_node: functional Node of Case. -/// @param [in] graph_nodes: Data groups of subgraph. -/// @param [in] index: anchor index of move Node. -/// @param [in] inputs: Parent index of Node input. -/// @param [in] outputs: Parent index of Node output. -/// @return 0: SUCCESS / others: FAILED -/// -Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node, - const map> &graph_datas, - uint32_t parent_index, uint32_t index, - const map &inputs, - const map &outputs) { - if (inputs.empty()) { - GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str()); - return FAILED; - } - - NodePtr move_node; - for (auto &groups : graph_datas) { - const auto &subgraph = groups.first; - const auto &data_nodes = groups.second; - auto it = data_nodes.find(parent_index); - if (it == data_nodes.end()) { - GELOGE(FAILED, "Graph: %s, Data: %u node not found", subgraph->GetName().c_str(), parent_index); - return FAILED; - } - - const auto &base_data = it->second; - const auto &out_anchor = base_data->GetOutControlAnchor(); - const auto &in_anchors = out_anchor->GetPeerInControlAnchors(); - if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) { - GELOGE(FAILED, "Data: %s, anchor size: %zu, index: %u not found", - base_data->GetName().c_str(), in_anchors.size(), index); - return FAILED; - } - - const auto &in_anchor = in_anchors.at(index); - move_node = in_anchor->GetOwnerNode(); - if (move_node == nullptr) { - GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index); - return FAILED; - } - - if (DetachParallelNode(data_nodes, move_node, outputs) != SUCCESS) { - GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index); - return FAILED; - } - - GE_CHK_GRAPH_STATUS_RET(subgraph->RemoveNode(move_node), "Remove node failed"); - GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str()); - } - - if (AttachParallelNode(graph, func_node, move_node, inputs, outputs) != SUCCESS) { - return FAILED; - } - - migration_append_ = true; - return SUCCESS; -} -} // namespace ge diff --git a/ge/graph/passes/subgraph_const_migration_pass.h b/ge/graph/passes/subgraph_const_migration_pass.h deleted file mode 100755 index 3c087852..00000000 --- a/ge/graph/passes/subgraph_const_migration_pass.h +++ /dev/null @@ -1,138 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_ -#define GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_ - -#include "graph/types.h" -#include "inc/graph_pass.h" - -#include -#include -#include -#include - -using std::set; -using std::map; - -namespace ge { -class SubgraphConstMigrationPass : public GraphPass { - public: - Status Run(ComputeGraphPtr graph) override; - - private: - /// - /// @ingroup ge - /// @brief Get all Data nodes for all subgraph. - /// @param [in] graph: Root compute graph. - /// @param [in] func_desc: functional OpDesc of Case. - /// @param [out] graph_datas: Data groups of subgraph. - /// @return 0: SUCCESS / others: FAILED - /// - Status ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc, - map> &graph_datas); - - /// - /// @ingroup ge - /// @brief Get all Data nodes for all subgraph. - /// @param [in] node: Const node of subgraph. - /// @param [in] func_desc: functional OpDesc of Case. - /// @param [out] graph_nodes: Data groups of subgraph. - /// @return true: SUCCESS / false: FAILED - /// - bool GetAssociatedNodes(const NodePtr &node, map &inputs, map &outputs); - - /// - /// @ingroup ge - /// @brief Get all Data nodes for all subgraph. - /// @param [in] graph_nodes: Data groups of subgraph. - /// @param [in] data_base: Data Node for migration. - /// @param [in] data_idx: Data groups of subgraph. - /// @param [in] data_idx: Data groups of subgraph. - /// @return true: Same / false: not same - /// - bool IsParallelNodeSame(const map> &graph_nodes, - const NodePtr &const_node, uint32_t parent_index, size_t index); - - /// - /// @ingroup ge - /// @brief Migration subgraph Node to Root - /// @param [in] graph: Root compute graph. - /// @param [in] func_node: functional Node of Case. - /// @param [in] graph_nodes: Data groups of subgraph. - /// @param [in] data_base: Data Node for migration. - /// @param [in] data_idx: Data groups of subgraph. - /// @return 0: SUCCESS / others: FAILED - /// - Status GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node, - map> &graph_nodes, - const NodePtr &data_base, uint32_t data_idx); - - /// - /// @ingroup ge - /// @brief Move node to Parent graph. - /// @param [in] graph: Root compute graph. - /// @param [in] func_node: functional Node of Case. - /// @param [in] graph_nodes: Data groups of subgraph. - /// @param [in] anchor_idx: anchor index of move Node. - /// @param [in] inputs: Parent index of Node input. - /// @param [in] outputs: Parent index of Node output. - /// @return 0: SUCCESS / others: FAILED - /// - Status MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node, - const map> &graph_nodes, - uint32_t parent_index, uint32_t anchor_idx, - const map &inputs, const map &outputs); - - /// - /// @ingroup ge - /// @brief Append Input Tensor for functional node. - /// @param [in] graph_nodes: Data groups of subgraph. - /// @param [in] func_node: functional Node of Case. - /// @param [in] outputs: Parent index of Node output. - /// @return 0: SUCCESS / others: FAILED - /// - Status AppendParallelNode(map> &graph_nodes, - const NodePtr &func_node, map &outputs); - - /// - /// @ingroup ge - /// @brief Delete Node from all subgraph. - /// @param [in] graph_nodes: Data groups of subgraph. - /// @param [in] detach: Node will move to parent. - /// @param [in] outputs: Parent index of Node output. - /// @return 0: SUCCESS / others: FAILED - /// - Status DetachParallelNode(const map &graph_datas, const NodePtr &detach, - const map &outputs); - - /// - /// @ingroup ge - /// @brief Move Node to Parent Graph. - /// @param [in] graph: Parent compute graph. - /// @param [in] func_node: functional Node of Case. - /// @param [in] attach: Node will move to parent. - /// @param [in] inputs: Parent index of Node input. - /// @param [in] outputs: Parent index of Node output. - /// @return 0: SUCCESS / others: FAILED - /// - Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, const NodePtr &attach, - const map &inputs, const map &outputs); - - bool migration_append_{false}; -}; -} // namespace ge -#endif // GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_ \ No newline at end of file diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index 04e28aaf..fd71e65b 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,7 +79,7 @@ Status SubgraphPass::SubgraphInputNode(const ComputeGraphPtr &graph, const NodeP std::vector in_anchors; for (const InDataAnchorPtr &peer_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { input_continues_required_flag = - input_continues_required_flag || IsInputContinuesRequired(peer_in_anchor->GetOwnerNode()); + input_continues_required_flag || IsInputContinuesRequired(peer_in_anchor->GetOwnerNode()); in_anchors.emplace_back(peer_in_anchor); } // Data->InputContinuesRequiredOp in subgraph need memcpy. @@ -262,7 +262,7 @@ Status SubgraphPass::InsertInputMemcpy(const ComputeGraphPtr &graph, const std:: for (size_t i = 0; i < data_nodes.size(); i++) { // Data node has and only has one output in_builder.AddInput("x" + std::to_string(i), data_nodes[i]->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y" + std::to_string(i), data_nodes[i]->GetOpDesc()->GetOutputDesc(0)); + .AddOutput("y" + std::to_string(i), data_nodes[i]->GetOpDesc()->GetOutputDesc(0)); } GELOGD("Insert memcpy after data_nodes of while_body %s.", graph->GetName().c_str()); NodePtr in_memcpy = graph->AddNode(in_builder.Build()); @@ -304,7 +304,7 @@ Status SubgraphPass::InsertOutputMemcpy(const ComputeGraphPtr &graph, const Node for (size_t i = 0; i < output_node->GetAllInDataAnchorsSize(); i++) { if (bypass_index.count(i) == 0) { out_builder.AddInput("x" + std::to_string(i), output_node->GetOpDesc()->GetInputDesc(i)) - .AddOutput("y" + std::to_string(i), output_node->GetOpDesc()->GetInputDesc(i)); + .AddOutput("y" + std::to_string(i), output_node->GetOpDesc()->GetInputDesc(i)); } } GELOGD("Insert memcpy before NetOutput of while_body %s.", graph->GetName().c_str()); @@ -440,8 +440,8 @@ Status SubgraphPass::InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDat NodePtr in_node = out_anchor->GetOwnerNode(); OpDescBuilder op_desc_builder(name, IDENTITY); OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) - .Build(); + .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) + .Build(); (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) { GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str()); @@ -463,8 +463,8 @@ Status SubgraphPass::InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDat Status SubgraphPass::InsertNodeBetween(const OutDataAnchorPtr &src, const std::vector &dsts, const NodePtr &insert_node, uint32_t input_index, uint32_t output_index) { if (GraphUtils::AddEdge(src, insert_node->GetInDataAnchor(input_index)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add data_edge %s:%d->%s:%u failed.", - src->GetOwnerNode()->GetName().c_str(), src->GetIdx(), insert_node->GetName().c_str(), input_index); + GELOGE(FAILED, "Add data_edge %s:%d->%s:%u failed.", src->GetOwnerNode()->GetName().c_str(), src->GetIdx(), + insert_node->GetName().c_str(), input_index); return FAILED; } for (const auto &dst : dsts) { @@ -472,11 +472,9 @@ Status SubgraphPass::InsertNodeBetween(const OutDataAnchorPtr &src, const std::v dst->GetOwnerNode()->GetName().c_str()); if ((GraphUtils::RemoveEdge(src, dst) != GRAPH_SUCCESS) || (GraphUtils::AddEdge(insert_node->GetOutDataAnchor(output_index), dst) != GRAPH_SUCCESS)) { - GELOGE(FAILED, "Replace data_edge %s:%d->%s:%d by %s:%u->%s:%d failed.", - src->GetOwnerNode()->GetName().c_str(), src->GetIdx(), - dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx(), - insert_node->GetName().c_str(), output_index, - dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx()); + GELOGE(FAILED, "Replace data_edge %s:%d->%s:%d by %s:%u->%s:%d failed.", src->GetOwnerNode()->GetName().c_str(), + src->GetIdx(), dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx(), insert_node->GetName().c_str(), + output_index, dst->GetOwnerNode()->GetName().c_str(), dst->GetIdx()); return FAILED; } } diff --git a/ge/graph/passes/subgraph_pass.h b/ge/graph/passes/subgraph_pass.h index 6e518ace..7ff2019f 100644 --- a/ge/graph/passes/subgraph_pass.h +++ b/ge/graph/passes/subgraph_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_data_edges_bypass.cc b/ge/graph/passes/switch_data_edges_bypass.cc index ce2b715b..d7f5d90f 100644 --- a/ge/graph/passes/switch_data_edges_bypass.cc +++ b/ge/graph/passes/switch_data_edges_bypass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,7 +102,7 @@ NodePtr AddIdentityAfterNode(const NodePtr &node, int index) { } auto identity_opdesc = - MakeShared("SwitchDataEdgesByPass_Identity_" + std::to_string(identity_counter), IDENTITY); + MakeShared("SwitchDataEdgesByPass_Identity_" + std::to_string(identity_counter), IDENTITY); if (identity_opdesc == nullptr) { GELOGE(OUT_OF_MEMORY, "Failed to add identity after node %s index %d", node->GetName().c_str(), index); return nullptr; diff --git a/ge/graph/passes/switch_data_edges_bypass.h b/ge/graph/passes/switch_data_edges_bypass.h index 25f71d20..8c2f492a 100644 --- a/ge/graph/passes/switch_data_edges_bypass.h +++ b/ge/graph/passes/switch_data_edges_bypass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_SWITCH_DATA_EDGES_BYPASS_H_ #define GE_SWITCH_DATA_EDGES_BYPASS_H_ @@ -22,9 +23,10 @@ namespace ge { class SwitchDataEdgesBypass : public GraphPass { public: Status Run(ComputeGraphPtr graph) override; + private: Status BypassSwitch(const NodePtr &node); }; } // namespace ge -#endif //GE_SWITCH_DATA_EDGES_BYPASS_H_ \ No newline at end of file +#endif // GE_SWITCH_DATA_EDGES_BYPASS_H_ \ No newline at end of file diff --git a/ge/graph/passes/switch_dead_branch_elimination.cc b/ge/graph/passes/switch_dead_branch_elimination.cc index 9358c9c3..dd7ace60 100644 --- a/ge/graph/passes/switch_dead_branch_elimination.cc +++ b/ge/graph/passes/switch_dead_branch_elimination.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/switch_dead_branch_elimination.h b/ge/graph/passes/switch_dead_branch_elimination.h index fdefb5c0..4f2b9f02 100644 --- a/ge/graph/passes/switch_dead_branch_elimination.h +++ b/ge/graph/passes/switch_dead_branch_elimination.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_SWITCH_DEAD_BRANCH_ELIMINATION_H_ #define GE_GRAPH_PASSES_SWITCH_DEAD_BRANCH_ELIMINATION_H_ @@ -24,8 +25,7 @@ class SwitchDeadBranchElimination : public BaseNodePass { Status Run(NodePtr &node) override; private: - Status DeleteSwitchNode(NodePtr &node, NodePtr &pred_node, - const OutDataAnchorPtr &active_out_data_anchor); + Status DeleteSwitchNode(NodePtr &node, NodePtr &pred_node, const OutDataAnchorPtr &active_out_data_anchor); }; } // namespace ge diff --git a/ge/graph/passes/switch_logic_remove_pass.cc b/ge/graph/passes/switch_logic_remove_pass.cc index a6758e86..dafa3ae1 100644 --- a/ge/graph/passes/switch_logic_remove_pass.cc +++ b/ge/graph/passes/switch_logic_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,9 +37,7 @@ char const *GetOutputNameFromIndex(int index) { return "UNKNOWN"; } -inline bool IsSwitch(const std::string &type) { - return type == SWITCH || type == REFSWITCH; -} +inline bool IsSwitch(const std::string &type) { return type == SWITCH || type == REFSWITCH; } Status GetPredNode(const NodePtr &switch_node, PredNodeAndOut &pred_node_index) { GE_CHECK_NOTNULL(switch_node); @@ -50,16 +48,13 @@ Status GetPredNode(const NodePtr &switch_node, PredNodeAndOut &pred_node_index) } auto pred_node_anchor = pred_in_anchor->GetPeerOutAnchor(); if (pred_node_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "Failed to get pred node for switch %s, node peer out anchor", + GELOGE(INTERNAL_ERROR, "Failed to get pred node for switch %s, node peer out anchor", switch_node->GetName().c_str()); return INTERNAL_ERROR; } auto pred_node = pred_node_anchor->GetOwnerNode(); if (pred_node == nullptr) { - GELOGE(INTERNAL_ERROR, - "Failed to get pred node for switch %s, null node", - switch_node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "Failed to get pred node for switch %s, null node", switch_node->GetName().c_str()); return INTERNAL_ERROR; } pred_node_index.first = pred_node; @@ -111,8 +106,8 @@ Status SwitchLogicRemovePass::Run(NodePtr &node) { continue; } GELOGI("The switch nodes cascaded %s and %s have the save pred node %s, the %s can be remove", - node->GetName().c_str(), dst_node->GetName().c_str(), - pred_node_and_out.first->GetName().c_str(), dst_node->GetName().c_str()); + node->GetName().c_str(), dst_node->GetName().c_str(), pred_node_and_out.first->GetName().c_str(), + dst_node->GetName().c_str()); ret = RemoveSwitchNodeLogically(i, dst_node); if (ret != SUCCESS) { return ret; @@ -137,8 +132,8 @@ Status SwitchLogicRemovePass::RemoveSwitchNodeLogically(int parent_index, NodePt continue; } - GELOGI("Remove inactivate branch %s(%d) from switch %s", - GetOutputNameFromIndex(i), i, switch_node->GetName().c_str()); + GELOGI("Remove inactivate branch %s(%d) from switch %s", GetOutputNameFromIndex(i), i, + switch_node->GetName().c_str()); std::vector deleted_nodes; std::vector end_nodes; auto ret = PassUtils::RemoveInactiveBranchToMerge(out_anchor, deleted_nodes, end_nodes); @@ -148,20 +143,18 @@ Status SwitchLogicRemovePass::RemoveSwitchNodeLogically(int parent_index, NodePt for (auto &node : deleted_nodes) { GE_CHECK_NOTNULL(node); - GELOGD("Remove node %s from inactivate branch from switch %s", - node->GetName().c_str(), switch_node->GetName().c_str()); + GELOGD("Remove node %s from inactivate branch from switch %s", node->GetName().c_str(), + switch_node->GetName().c_str()); AddNodeDeleted(node); } for (auto &node : end_nodes) { GE_CHECK_NOTNULL(node); - GELOGD("Add end node %s to re-pass list, for inactivate branch from switch %s", - node->GetName().c_str(), switch_node->GetName().c_str()); + GELOGD("Add end node %s to re-pass list, for inactivate branch from switch %s", node->GetName().c_str(), + switch_node->GetName().c_str()); AddRePassNode(node); } } - GELOGI("Remove switch node cascaded %s, replace out index %d", - switch_node->GetName().c_str(), parent_index); + GELOGI("Remove switch node cascaded %s, replace out index %d", switch_node->GetName().c_str(), parent_index); return IsolateAndDeleteNode(switch_node, isolate_map); } } // namespace ge - diff --git a/ge/graph/passes/switch_logic_remove_pass.h b/ge/graph/passes/switch_logic_remove_pass.h index dc679978..b711cc73 100644 --- a/ge/graph/passes/switch_logic_remove_pass.h +++ b/ge/graph/passes/switch_logic_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_SWITCH_LOGIC_REMOVE_PASS_H_ #define GE_GRAPH_PASSES_SWITCH_LOGIC_REMOVE_PASS_H_ #include "graph/passes/base_pass.h" @@ -21,6 +22,7 @@ namespace ge { class SwitchLogicRemovePass : public BaseNodePass { public: Status Run(NodePtr &node) override; + private: Status RemoveSwitchNodeLogically(int parent_index, NodePtr &switch_node); }; diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index 529480a6..6c0d545d 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,7 +85,7 @@ Status SwitchToStreamSwitchPass::CheckCycleDependence(const ComputeGraphPtr &gra NodePtr cond_node = peer_out_anchor->GetOwnerNode(); auto iter = cond_switch_map.find(cond_node); if (iter == cond_switch_map.end()) { - cond_switch_map[cond_node] = { node }; + cond_switch_map[cond_node] = {node}; } else { iter->second.emplace_back(node); } @@ -104,7 +104,7 @@ Status SwitchToStreamSwitchPass::CheckCycleDependence(const ComputeGraphPtr &gra /// @return void /// void SwitchToStreamSwitchPass::MarkCycleDependence( - const std::unordered_map> &cond_switch_map) { + const std::unordered_map> &cond_switch_map) { std::stack out_nodes; NodePtr tmp_node = nullptr; std::unordered_set visited; @@ -130,8 +130,8 @@ void SwitchToStreamSwitchPass::MarkCycleDependence( out_nodes.push(out_node); continue; } - GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, - GELOGW("set cyclic dependence attr failed."); return ); + GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, GELOGW("set cyclic dependence attr failed."); + return ); auto map_iter = switch_cyclic_map_.find(out_node); if (map_iter == switch_cyclic_map_.end()) { switch_cyclic_map_[out_node] = {tmp_node->GetName()}; @@ -442,7 +442,7 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) GE_CHK_BOOL_EXEC(active_node != nullptr, return FAILED, "Create StreamActive node failed."); GE_CHK_STATUS(GraphUtils::AddEdge(cast_node->GetOutControlAnchor(), active_node->GetInControlAnchor()), "StreamActive add ctl edge failed."); - if (SetActiveLabelList(active_node, { cast_node->GetName() }) != SUCCESS) { + if (SetActiveLabelList(active_node, {cast_node->GetName()}) != SUCCESS) { GELOGE(FAILED, "Set active_label_list attr for node %s failed.", active_node->GetName().c_str()); return FAILED; } @@ -541,8 +541,7 @@ NodePtr SwitchToStreamSwitchPass::CreateCastOp(const ComputeGraphPtr &graph, con GeTensorDesc tensor_desc = cond_desc->GetOutputDesc(peer_cond_anchor->GetIdx()); tensor_desc.SetDataType(DT_BOOL); - GE_CHK_BOOL_EXEC(cast_desc->AddInputDesc(tensor_desc) == SUCCESS, return nullptr, - "Cast_node add input desc failed."); + GE_CHK_BOOL_EXEC(cast_desc->AddInputDesc(tensor_desc) == SUCCESS, return nullptr, "Cast_node add input desc failed."); tensor_desc.SetDataType(DT_INT32); GE_CHK_BOOL_EXEC(cast_desc->AddOutputDesc(tensor_desc) == SUCCESS, return nullptr, "Cast_node add output desc failed."); @@ -578,7 +577,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons auto resize_value = (int32_t)value; GeTensorDesc data_desc = op_desc->GetInputDesc(1); GeTensorPtr const_value = - MakeShared(data_desc, reinterpret_cast(&resize_value), sizeof(int32_t)); + MakeShared(data_desc, reinterpret_cast(&resize_value), sizeof(int32_t)); if (const_value == nullptr) { GELOGE(FAILED, "Create tensor failed."); return FAILED; @@ -737,8 +736,7 @@ void SwitchToStreamSwitchPass::MoveCtrlEdges(const NodePtr &old_node, const Node } } else { GE_IF_BOOL_EXEC(!out_ctrl_anchor->IsLinkedWith(new_node->GetInControlAnchor()), { - GE_CHK_STATUS(GraphUtils::AddEdge(out_ctrl_anchor, new_node->GetInControlAnchor()), - "Add in ctrl edge failed."); + GE_CHK_STATUS(GraphUtils::AddEdge(out_ctrl_anchor, new_node->GetInControlAnchor()), "Add in ctrl edge failed."); }); } GE_CHK_STATUS(GraphUtils::RemoveEdge(out_ctrl_anchor, old_node->GetInControlAnchor()), diff --git a/ge/graph/passes/switch_to_stream_switch_pass.h b/ge/graph/passes/switch_to_stream_switch_pass.h index 48725230..15fe9dce 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.h +++ b/ge/graph/passes/switch_to_stream_switch_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index 21fb1eaf..5c754f4f 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,9 +41,9 @@ Status TransOpBreadthFusionPass::Run(ge::ComputeGraphPtr graph) { for (auto const &id_to_trans_nodes : ids_to_trans_nodes) { if (id_to_trans_nodes.second.size() > 1) { GELOGI( - "Begin to breath fusion output trans-op-nodes for %s, " - "trans id %s, trans-op count %zu", - node->GetName().c_str(), id_to_trans_nodes.first.c_str(), id_to_trans_nodes.second.size()); + "Begin to breath fusion output trans-op-nodes for %s, " + "trans id %s, trans-op count %zu", + node->GetName().c_str(), id_to_trans_nodes.first.c_str(), id_to_trans_nodes.second.size()); graphStatus status = Fusion(id_to_trans_nodes.second, graph); if (status != GRAPH_SUCCESS) { return FAILED; diff --git a/ge/graph/passes/transop_breadth_fusion_pass.h b/ge/graph/passes/transop_breadth_fusion_pass.h index 9a82259c..8e7799e1 100755 --- a/ge/graph/passes/transop_breadth_fusion_pass.h +++ b/ge/graph/passes/transop_breadth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_depth_fusion_pass.cc b/ge/graph/passes/transop_depth_fusion_pass.cc index 85106e08..afeca3c4 100755 --- a/ge/graph/passes/transop_depth_fusion_pass.cc +++ b/ge/graph/passes/transop_depth_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,11 +71,11 @@ graphStatus TransOpDepthFusionPass::RecursiveInDepth(const InDataAnchorPtr &dst_ temp_depth++; if (temp_depth >= max_recursive_depth) { GELOGI( - "Caution: recursive depth is become %u." - "It's abnormally to have so many trans ops between two normal ops" - "Please check your graph in detail!" - "The search terminate here and continue to another branch.", - temp_depth); + "Caution: recursive depth is become %u." + "It's abnormally to have so many trans ops between two normal ops" + "Please check your graph in detail!" + "The search terminate here and continue to another branch.", + temp_depth); temp_depth--; return GRAPH_SUCCESS; } @@ -166,8 +166,7 @@ bool TransOpDepthFusionPass::CheckNodeCanBeDeleted(const NodePtr &node) { bool is_shape_unknown = false; if (NodeUtils::GetNodeUnknownShapeStatus(*node, is_shape_unknown) == GRAPH_SUCCESS) { if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, can not be deleted.", - node->GetName().c_str()); + GELOGI("op:%s is unknown shape, can not be deleted.", node->GetName().c_str()); return false; } } @@ -268,15 +267,15 @@ graphStatus TransOpDepthFusionPass::RelinkEdges(const OutDataAnchorPtr &new_out_ GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(old_out_anchor, in_data_anchor), "remove edge failed"); GE_CHK_STATUS_RET(GraphUtils::AddEdge(new_out_anchor, in_data_anchor), "add edge failed"); GELOGD( - "relink edges before remove node, remove data edge between node: %s, " - "type: %s and node: %s, type: %s.", - old_out_anchor->GetOwnerNode()->GetName().c_str(), old_out_anchor->GetOwnerNode()->GetType().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); + "relink edges before remove node, remove data edge between node: %s, " + "type: %s and node: %s, type: %s.", + old_out_anchor->GetOwnerNode()->GetName().c_str(), old_out_anchor->GetOwnerNode()->GetType().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); GELOGD( - "relink edges before remove node, add data edge between node: %s, " - "type: %s and node: %s, type: %s.", - new_out_anchor->GetOwnerNode()->GetName().c_str(), new_out_anchor->GetOwnerNode()->GetType().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); + "relink edges before remove node, add data edge between node: %s, " + "type: %s and node: %s, type: %s.", + new_out_anchor->GetOwnerNode()->GetName().c_str(), new_out_anchor->GetOwnerNode()->GetType().c_str(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetOwnerNode()->GetType().c_str()); bool is_linked = false; auto dst_node = in_data_anchor->GetOwnerNode(); @@ -291,10 +290,10 @@ graphStatus TransOpDepthFusionPass::RelinkEdges(const OutDataAnchorPtr &new_out_ auto in_ctrl_anchor = dst_node->GetInControlAnchor(); GE_CHK_STATUS_RET(GraphUtils::AddEdge(out_ctrl_anchor, in_ctrl_anchor), "add edge failed"); GELOGD( - "relink edges before remove node, add control edge between node: %s," - " type: %s and node: %s, type: %s.", - src_node->GetName().c_str(), src_node->GetType().c_str(), dst_node->GetName().c_str(), - dst_node->GetType().c_str()); + "relink edges before remove node, add control edge between node: %s," + " type: %s and node: %s, type: %s.", + src_node->GetName().c_str(), src_node->GetType().c_str(), dst_node->GetName().c_str(), + dst_node->GetType().c_str()); } return GRAPH_SUCCESS; } diff --git a/ge/graph/passes/transop_depth_fusion_pass.h b/ge/graph/passes/transop_depth_fusion_pass.h index 831e7138..cc449893 100755 --- a/ge/graph/passes/transop_depth_fusion_pass.h +++ b/ge/graph/passes/transop_depth_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc index b207abe9..53c9deca 100644 --- a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc +++ b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,18 +66,16 @@ bool TransOpNearbyAllreduceFusionPass::IsSymmetricTransOps(const NodePtr &node1, GE_CHECK_NOTNULL_EXEC(node2_output_desc, return false); // two symmetric trans ops should have symmetric input/output datatype - GELOGD("format: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", - node1_input_desc->GetFormat(), node1_output_desc->GetFormat(), node2_input_desc->GetFormat(), - node2_output_desc->GetFormat()); + GELOGD("format: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", node1_input_desc->GetFormat(), + node1_output_desc->GetFormat(), node2_input_desc->GetFormat(), node2_output_desc->GetFormat()); if (node1_input_desc->GetFormat() != node2_output_desc->GetFormat() || node1_output_desc->GetFormat() != node2_input_desc->GetFormat()) { return false; } // two symmetric trans ops should have symmetric input/output format - GELOGD("datatype: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", - node1_input_desc->GetDataType(), node1_output_desc->GetDataType(), node2_input_desc->GetDataType(), - node2_output_desc->GetDataType()); + GELOGD("datatype: nod1_input=%d, nod1_output=%d, nod2_input=%d, nod2_output=%d", node1_input_desc->GetDataType(), + node1_output_desc->GetDataType(), node2_input_desc->GetDataType(), node2_output_desc->GetDataType()); if (node1_input_desc->GetDataType() != node2_output_desc->GetDataType() || node1_output_desc->GetDataType() != node2_input_desc->GetDataType()) { return false; @@ -136,8 +134,8 @@ Status TransOpNearbyAllreduceFusionPass::RemoveNearbyPairedTransOps(const NodePt GELOGI("in_node=%s, out_node=%s", in_node->GetName().c_str(), out_node->GetName().c_str()); if (!IsSymmetricTransOps(in_node, out_node)) { - GELOGD("ignore asymmetric transop %s and %s for node %s", - in_node->GetName().c_str(), out_node->GetName().c_str(), node->GetName().c_str()); + GELOGD("ignore asymmetric transop %s and %s for node %s", in_node->GetName().c_str(), out_node->GetName().c_str(), + node->GetName().c_str()); continue; } @@ -167,8 +165,8 @@ Status TransOpNearbyAllreduceFusionPass::RemoveNearbyPairedTransOps(const NodePt if (node->GetOpDesc()->UpdateOutputDesc(static_cast(i), output_desc) != GRAPH_SUCCESS) { GELOGE(FAILED, "UpdateOutputDesc"); } - GELOGI("successfully remove paired transop (%s and %s) for node %s", - in_node->GetName().c_str(), out_node->GetName().c_str(), node->GetName().c_str()); + GELOGI("successfully remove paired transop (%s and %s) for node %s", in_node->GetName().c_str(), + out_node->GetName().c_str(), node->GetName().c_str()); } GELOGI("successfully remove %zu pair of transops in total for node %s", removed_node_count, node->GetName().c_str()); return SUCCESS; diff --git a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h index 0cacf062..1cd1eeec 100755 --- a/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h +++ b/ge/graph/passes/transop_nearby_allreduce_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/transop_symmetry_elimination_pass.cc b/ge/graph/passes/transop_symmetry_elimination_pass.cc index 9db3aea1..e217656c 100644 --- a/ge/graph/passes/transop_symmetry_elimination_pass.cc +++ b/ge/graph/passes/transop_symmetry_elimination_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,9 @@ namespace ge { Status TransOpSymmetryEliminationPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); - if (white_list_op.find(node->GetType()) == white_list_op.end()) { return SUCCESS; } + if (white_list_op.find(node->GetType()) == white_list_op.end()) { + return SUCCESS; + } GELOGD("Symmetry Elimination Pass in."); for (const auto &out_anchor : node->GetAllOutDataAnchors()) { GE_CHECK_NOTNULL(out_anchor); @@ -40,7 +42,9 @@ Status TransOpSymmetryEliminationPass::Run(NodePtr &node) { GE_CHECK_NOTNULL(peer_in_anchor); GE_CHECK_NOTNULL(peer_in_anchor->GetOwnerNode()); GE_CHECK_NOTNULL(peer_in_anchor->GetOwnerNode()->GetOpDesc()); - if (!CheckCanBeEliminated(node, peer_in_anchor)) { continue; } + if (!CheckCanBeEliminated(node, peer_in_anchor)) { + continue; + } auto dst_node = peer_in_anchor->GetOwnerNode(); Status ret = EliminateTransOp(node, out_anchor, dst_node, peer_in_anchor); if (ret != SUCCESS) { @@ -72,9 +76,10 @@ bool TransOpSymmetryEliminationPass::CheckCanBeEliminated(const ge::NodePtr &src GE_CHECK_NOTNULL(src_node->GetOpDesc()); auto unknown_dims_num = GetUnknownDimsNum(src_node->GetOpDesc()->GetInputDesc(0)); if (unknown_dims_num != 0 && (unknown_dims_num == UNKNOWN_DIM_NUM || unknown_dims_num > 1)) { - GELOGD("Pre node %s is reshape op which input is dynamic shape and has more than one unknown dimension. " - "Ignore pass.", - src_node->GetName().c_str()); + GELOGD( + "Pre node %s is reshape op which input is dynamic shape and has more than one unknown dimension. " + "Ignore pass.", + src_node->GetName().c_str()); return false; } } else if (src_node->GetType() == ge::TRANSPOSED) { @@ -109,26 +114,26 @@ bool TransOpSymmetryEliminationPass::DescAreSymmetry(const NodePtr &src_node, co bool is_symmetry = true; if (src_node->GetType() == CAST && dst_node->GetType() == CAST) { bool is_format_symmetry = - (src_input_format == dst_output_format) || (dst_output_format == FORMAT_ND) || (src_input_format == FORMAT_ND); + (src_input_format == dst_output_format) || (dst_output_format == FORMAT_ND) || (src_input_format == FORMAT_ND); is_symmetry = (src_input_dtype == dst_output_dtype) && is_format_symmetry; } else { - is_symmetry = (src_input_dtype == dst_output_dtype) && (src_input_shape == dst_output_shape) - && (src_input_format == dst_output_format); + is_symmetry = (src_input_dtype == dst_output_dtype) && (src_input_shape == dst_output_shape) && + (src_input_format == dst_output_format); } if (!is_symmetry) { - GELOGD("Not satisfied symmetry. ignore pass.\n" - "Src node %s input type: %s format: %s shape: %s, " - "dst node %s output type: %s format: %s shape: %s. ", - src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_input_dtype).c_str(), - TypeUtils::FormatToSerialString(src_input_format).c_str(), formats::ShapeToString(src_input_shape).c_str(), - dst_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(dst_output_dtype).c_str(), - TypeUtils::FormatToSerialString(dst_output_format).c_str(), - formats::ShapeToString(dst_output_shape).c_str()); + GELOGD( + "Not satisfied symmetry. ignore pass.\n" + "Src node %s input type: %s format: %s shape: %s, " + "dst node %s output type: %s format: %s shape: %s. ", + src_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(src_input_dtype).c_str(), + TypeUtils::FormatToSerialString(src_input_format).c_str(), formats::ShapeToString(src_input_shape).c_str(), + dst_node->GetName().c_str(), TypeUtils::DataTypeToSerialString(dst_output_dtype).c_str(), + TypeUtils::FormatToSerialString(dst_output_format).c_str(), formats::ShapeToString(dst_output_shape).c_str()); } return is_symmetry; } -int TransOpSymmetryEliminationPass::GetUnknownDimsNum(const GeTensorDesc& node_desc){ +int TransOpSymmetryEliminationPass::GetUnknownDimsNum(const GeTensorDesc &node_desc) { // // unknown_dims_num != 0 , is dynamic shape // unknown_dims_num = UNKNOWN_DIM_NUM , all dims are unknown @@ -137,8 +142,12 @@ int TransOpSymmetryEliminationPass::GetUnknownDimsNum(const GeTensorDesc& node_d int unknown_dims_num = 0; auto ge_shape = node_desc.GetShape(); for (const auto dim : ge_shape.GetDims()) { - if (dim == UNKNOWN_DIM_NUM) { return UNKNOWN_DIM_NUM; } - if (dim == UNKNOWN_DIM) { ++unknown_dims_num; } + if (dim == UNKNOWN_DIM_NUM) { + return UNKNOWN_DIM_NUM; + } + if (dim == UNKNOWN_DIM) { + ++unknown_dims_num; + } } return unknown_dims_num; } @@ -158,10 +167,16 @@ bool TransOpSymmetryEliminationPass::JudgeTransposeDBack2Raw(const NodePtr &src_ vector dst_node_perm; (void)AttrUtils::GetListInt(dst_node->GetOpDesc(), ge::PERMUTE_ATTR_PERM, dst_node_perm); - if (src_node_perm.size() != dst_node_perm.size()) { return false; } + if (src_node_perm.size() != dst_node_perm.size()) { + return false; + } for (size_t src_index = 0; src_index < src_node_perm.size(); ++src_index) { - if (dst_node_perm[src_index] >= static_cast(src_node_perm.size())) { return false; } - if (static_cast(src_index) != src_node_perm[dst_node_perm[src_index]]) { return false; } + if (dst_node_perm[src_index] >= static_cast(src_node_perm.size())) { + return false; + } + if (static_cast(src_index) != src_node_perm[dst_node_perm[src_index]]) { + return false; + } } return true; } @@ -195,7 +210,9 @@ Status TransOpSymmetryEliminationPass::EliminateTransOp(NodePtr &src_node, const } // 4.Add control edge from T1 other input to T2, like reshape second input for (const auto &in_node : src_node->GetInDataNodes()) { - if (in_node->GetName() == pre_normal_node->GetName()) { continue; } + if (in_node->GetName() == pre_normal_node->GetName()) { + continue; + } ret = GraphUtils::AddEdge(in_node->GetOutControlAnchor(), dst_node->GetInControlAnchor()); if (ret != GRAPH_SUCCESS) { GELOGE(FAILED, "Add control edge from %s to %s failed.", in_node->GetName().c_str(), dst_node->GetName().c_str()); diff --git a/ge/graph/passes/transop_symmetry_elimination_pass.h b/ge/graph/passes/transop_symmetry_elimination_pass.h index 3a80ada5..2c89ed48 100644 --- a/ge/graph/passes/transop_symmetry_elimination_pass.h +++ b/ge/graph/passes/transop_symmetry_elimination_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_SYMMETRY_ELIMINATION_PASS_H #define GE_SYMMETRY_ELIMINATION_PASS_H @@ -47,8 +48,7 @@ class TransOpSymmetryEliminationPass : public BaseNodePass { /// @param node_desc: node to be checked /// @return 0 , is not dynamic shape; UNKNOWN_DIM_NUM , all dims are unknown; n , n > 0 , has n dims unknown /// - static int GetUnknownDimsNum(const GeTensorDesc& node_desc); - + static int GetUnknownDimsNum(const GeTensorDesc &node_desc); /// /// judge after two transposed op transform the raw data will be the same diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/ge/graph/passes/transop_without_reshape_fusion_pass.cc index d2b3f1b1..61bca6b8 100644 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.cc +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/passes/transop_without_reshape_fusion_pass.h" #include #include @@ -130,14 +131,6 @@ graphStatus TransOpWithoutReshapeFusionPass::GetSubGraphNodesInfo() { sub_graph_has_reshape_node[i] = true; break; } - if (in_node->GetType() == TRANSPOSE || in_node->GetType() == TRANSPOSED) { - auto input_format = in_node->GetOpDesc()->GetInputDescPtr(0)->GetFormat(); - auto output_format = in_node->GetOpDesc()->GetOutputDescPtr(0)->GetFormat(); - if (input_format == output_format) { - sub_graph_has_reshape_node[i] = true; - break; - } - } auto out_anchor = iter->first; GE_CHECK_NOTNULL(out_anchor); diff --git a/ge/graph/passes/transop_without_reshape_fusion_pass.h b/ge/graph/passes/transop_without_reshape_fusion_pass.h index 2aa2d0f7..4d037957 100755 --- a/ge/graph/passes/transop_without_reshape_fusion_pass.h +++ b/ge/graph/passes/transop_without_reshape_fusion_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ #define GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ @@ -37,19 +38,13 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { void RemoveNousedNodes(const ComputeGraphPtr &graph); void GetBeginOutDescAndEndInDesc(const int index, GeTensorDesc &out_desc, GeTensorDesc &in_desc); - void GetFormatTransferDesc(const GeTensorDesc &out_desc, - const GeTensorDesc &in_desc, - GeTensorDesc &format_transfer_input, - GeTensorDesc &format_transfer_output); + void GetFormatTransferDesc(const GeTensorDesc &out_desc, const GeTensorDesc &in_desc, + GeTensorDesc &format_transfer_input, GeTensorDesc &format_transfer_output); - void GetCastOpDesc(const GeTensorDesc &out_desc, - const GeTensorDesc &in_desc, - GeTensorDesc &cast_input, + void GetCastOpDesc(const GeTensorDesc &out_desc, const GeTensorDesc &in_desc, GeTensorDesc &cast_input, GeTensorDesc &cast_output); - graphStatus FormatFusion(const int index, - OpDescPtr &format_transfer_op, - int32_t &fusion_op_count, + graphStatus FormatFusion(const int index, OpDescPtr &format_transfer_op, int32_t &fusion_op_count, bool &fusion_continue); graphStatus DataTypeFusion(const int index, OpDescPtr &cast_op, int32_t &fusion_op_count); @@ -57,33 +52,27 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { void GetOutDataPeerInControlAnchors(const size_t index, vector> &out_data_peer_in_control_anchors); - void GetInControlPeerOutControlAnchors( - const size_t index, - vector> &in_control_peer_out_control_anchors); + void GetInControlPeerOutControlAnchors(const size_t index, + vector> &in_control_peer_out_control_anchors); - void GetOutControlPeerAnchors( - const size_t index, - vector> &out_control_peer_in_control_anchors, - vector> &out_control_peer_in_data_anchors); + void GetOutControlPeerAnchors(const size_t index, + vector> &out_control_peer_in_control_anchors, + vector> &out_control_peer_in_data_anchors); graphStatus TransOpFuse(const ComputeGraphPtr &graph); bool OpAccuracyAbilityCheck(const OpDescPtr &op_desc); graphStatus GetSubGraphsBetweenNormalNode( - const OutDataAnchorPtr &out_anchor, - vector> - >& sub_graphs_out, - vector> &nodes_list - ); + const OutDataAnchorPtr &out_anchor, vector>> &sub_graphs_out, + vector> &nodes_list); graphStatus GetSubGraphNodesInfo(); void GetControlAnchors(); graphStatus InsertNewTransOp(const ComputeGraphPtr &graph, const OpDescPtr &cast_op, - const OpDescPtr &format_transfer_op, const int index, - const bool insert_cast_first); + const OpDescPtr &format_transfer_op, const int index, const bool insert_cast_first); void EraseInvalidAnchorsPair(); @@ -108,11 +97,8 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { graphStatus RelinkControlEdge(const int index, const OutDataAnchorPtr &out_anchor, const vector &new_trans_nodes); - graphStatus GetTransNode(const ComputeGraphPtr &graph, - const OpDescPtr &cast_op, - const OpDescPtr &format_transfer_op, - const bool insert_cast_first, - std::vector &new_trans_nodes); + graphStatus GetTransNode(const ComputeGraphPtr &graph, const OpDescPtr &cast_op, const OpDescPtr &format_transfer_op, + const bool insert_cast_first, std::vector &new_trans_nodes); void UpdateOutputName(const OutDataAnchorPtr &out_anchor, const InDataAnchorPtr &old_peer_in_anchor, const NodePtr &in_owner_node); @@ -135,8 +121,7 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { static bool FusionFormatSupport(Format format); - vector>> - sub_graph_anchors_; + vector>> sub_graph_anchors_; vector> sub_graph_nodes_; vector transop_num_count_; vector sub_graph_has_reshape_node_; @@ -150,4 +135,3 @@ class TransOpWithoutReshapeFusionPass : public GraphPass { } // namespace ge #endif // GE_GRAPH_PASSES_TRANSOP_WITHOUT_RESHAPE_FUSION_PASS_H_ - diff --git a/ge/graph/passes/transpose_transdata_pass.cc b/ge/graph/passes/transpose_transdata_pass.cc index 7348f143..b9bd59be 100644 --- a/ge/graph/passes/transpose_transdata_pass.cc +++ b/ge/graph/passes/transpose_transdata_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,15 +46,6 @@ Status TransposeTransDataPass::Run(NodePtr &node) { if (op_desc->GetType() != TRANSPOSED) { return SUCCESS; } - auto input_format = op_desc->GetInputDescPtr(0)->GetFormat(); - auto output_format = op_desc->GetOutputDescPtr(0)->GetFormat(); - if (input_format == output_format) { - GELOGW("Node %s input format is %s, output format is %s, should not happend. Ignore pass.", - op_desc->GetName().c_str(), - TypeUtils::FormatToSerialString(input_format).c_str(), - TypeUtils::FormatToSerialString(output_format).c_str()); - return SUCCESS; - } if (CheckOneInAndOneOutDataAnchor(node) != SUCCESS) { return FAILED; } diff --git a/ge/graph/passes/transpose_transdata_pass.h b/ge/graph/passes/transpose_transdata_pass.h index a72893f6..bf42f5de 100644 --- a/ge/graph/passes/transpose_transdata_pass.h +++ b/ge/graph/passes/transpose_transdata_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_args_clean_pass.cc b/ge/graph/passes/unused_args_clean_pass.cc index 83fd0438..62094631 100755 --- a/ge/graph/passes/unused_args_clean_pass.cc +++ b/ge/graph/passes/unused_args_clean_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "unused_args_clean_pass.h" #include "graph/utils/node_utils.h" @@ -69,7 +70,7 @@ bool UnusedArgsCleanPass::UnusedInputTensor(const mapsecond; @@ -160,13 +161,9 @@ Status UnusedArgsCleanPass::UpdateInputTensor(const mapGetPeerOutAnchor(); const auto &out_node = out_anchor->GetOwnerNode(); - const auto &func_desc = func_node->GetOpDesc(); - const auto &old_desc = func_desc->GetInputDesc(parent_index); - (void)func_desc->UpdateInputDesc(update_index, old_desc); - GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(out_anchor, new_anchor), "Add edge failed"); - GELOGI("Add edge success, func node: %s, node: %s, parent index: %u, update index: %u", - func_node->GetName().c_str(), out_node->GetName().c_str(), parent_index, update_index); + GELOGI("Add edge success, func node: %s, node: %s, parent index: %u, update index: %u", func_node->GetName().c_str(), + out_node->GetName().c_str(), parent_index, update_index); GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, old_anchor), "Remove edge failed"); GELOGI("Remove edge success, func node: %s, node: %s", func_node->GetName().c_str(), out_node->GetName().c_str()); @@ -188,7 +185,7 @@ Status UnusedArgsCleanPass::RemoveInputTensor(const map #include -using std::set; using std::map; +using std::set; namespace ge { class UnusedArgsCleanPass : public GraphPass { @@ -41,8 +42,8 @@ class UnusedArgsCleanPass : public GraphPass { /// @param [in] parent_index: parent index for check. /// @return true: unused / false: used /// - bool UnusedInputTensor(const map> &graph_nodes, - const NodePtr &func_node, uint32_t parent_index); + bool UnusedInputTensor(const map> &graph_nodes, const NodePtr &func_node, + uint32_t parent_index); /// /// @ingroup ge @@ -63,8 +64,8 @@ class UnusedArgsCleanPass : public GraphPass { /// @param [in] parent_index: parent index for remove. /// @return 0: SUCCESS / others: FAILED /// - Status RemoveInputTensor(const map> &graph_nodes, - const NodePtr &func_node, uint32_t parent_index); + Status RemoveInputTensor(const map> &graph_nodes, const NodePtr &func_node, + uint32_t parent_index); /// /// @ingroup ge @@ -75,8 +76,8 @@ class UnusedArgsCleanPass : public GraphPass { /// @param [in] unused_num: unused args num. /// @return 0: SUCCESS / others: FAILED /// - Status UpdateInputTensor(const map> &graph_nodes, - const NodePtr &func_node, uint32_t parent_index, uint32_t unused_num); + Status UpdateInputTensor(const map> &graph_nodes, const NodePtr &func_node, + uint32_t parent_index, uint32_t unused_num); }; } // namespace ge #endif // GE_COMMON_CASE_ARGS_CLEAN_H_ diff --git a/ge/graph/passes/unused_const_pass.cc b/ge/graph/passes/unused_const_pass.cc index 7c57c53e..386633b5 100644 --- a/ge/graph/passes/unused_const_pass.cc +++ b/ge/graph/passes/unused_const_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_const_pass.h b/ge/graph/passes/unused_const_pass.h index 6b99f058..3c7f3460 100755 --- a/ge/graph/passes/unused_const_pass.h +++ b/ge/graph/passes/unused_const_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/unused_op_remove_pass.cc b/ge/graph/passes/unused_op_remove_pass.cc index 41f7c828..45bbc291 100644 --- a/ge/graph/passes/unused_op_remove_pass.cc +++ b/ge/graph/passes/unused_op_remove_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,11 +122,10 @@ bool UnusedOpRemovePass::IsExceptions(const NodePtr &node) { GE_CHK_BOOL_EXEC(op_def != nullptr, return false, "opdesc is nullptr"); // permute optimised in permute_pass.cpp if (op_def->GetType() == PERMUTE) { - GE_IF_BOOL_EXEC( - (node->GetInDataNodes().size() != 0 && - (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr && - node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)), - return false); + GE_IF_BOOL_EXEC((node->GetInDataNodes().size() != 0 && + (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr && + node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)), + return false); return true; } return false; diff --git a/ge/graph/passes/unused_op_remove_pass.h b/ge/graph/passes/unused_op_remove_pass.h index b9429cfd..bbc43af5 100755 --- a/ge/graph/passes/unused_op_remove_pass.h +++ b/ge/graph/passes/unused_op_remove_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/var_is_initialized_op_pass.cc b/ge/graph/passes/var_is_initialized_op_pass.cc index b9c752d8..73456a7b 100644 --- a/ge/graph/passes/var_is_initialized_op_pass.cc +++ b/ge/graph/passes/var_is_initialized_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,12 +48,10 @@ Status VarIsInitializedOpPass::Run(NodePtr &node) { if (CheckSrcNode(node, inited) != SUCCESS) { return FAILED; } - GELOGI("The variable inited status %s on node %s", - inited ? "true" : "false", node->GetName().c_str()); + GELOGI("The variable inited status %s on node %s", inited ? "true" : "false", node->GetName().c_str()); ret = ChangeNodeToConstant(node, inited); - GELOGI("Change VarIsInitializedOp %s to be Constant %s end.", - node->GetName().c_str(), inited ? "true" : "false"); + GELOGI("Change VarIsInitializedOp %s to be Constant %s end.", node->GetName().c_str(), inited ? "true" : "false"); return ret; } @@ -61,9 +59,7 @@ Status VarIsInitializedOpPass::CheckSrcNode(const NodePtr &node, bool &inited) c GE_CHECK_NOTNULL(node); auto input_nodes = node->GetInDataNodes(); if (input_nodes.size() != kVarIsInitializedIOCnt) { - GELOGE(FAILED, - "[%s] Node input data nodes size [%zu] is not equal 1.", - node->GetName().c_str(), + GELOGE(FAILED, "[%s] Node input data nodes size [%zu] is not equal 1.", node->GetName().c_str(), input_nodes.size()); return FAILED; } @@ -129,8 +125,7 @@ Status VarIsInitializedOpPass::ProcessInAnchor(NodePtr &node, NodePtr &new_node) GE_CHECK_NOTNULL(new_node); auto in_anchors = node->GetAllInDataAnchors(); auto out_anchors = node->GetAllOutDataAnchors(); - if ((in_anchors.size() != kVarIsInitializedIOCnt) || - (out_anchors.size() != kVarIsInitializedIOCnt)) { + if ((in_anchors.size() != kVarIsInitializedIOCnt) || (out_anchors.size() != kVarIsInitializedIOCnt)) { GELOGE(FAILED, "[%s] Node input/output data anchors" " size [%lu][%lu] is not all equal 1.", @@ -149,8 +144,8 @@ Status VarIsInitializedOpPass::ProcessInAnchor(NodePtr &node, NodePtr &new_node) } auto src_node = peer_out_anchor->GetOwnerNode(); if (GraphUtils::AddEdge(src_node->GetOutControlAnchor(), new_node->GetInControlAnchor()) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to link control edges from var %s to new const %s", - src_node->GetName().c_str(), new_node->GetName().c_str()); + GELOGE(FAILED, "Failed to link control edges from var %s to new const %s", src_node->GetName().c_str(), + new_node->GetName().c_str()); return FAILED; } @@ -253,15 +248,15 @@ Status VarIsInitializedOpPass::UpdateInitedVars(const NodePtr &node) { if (inited_vars != nullptr) { GE_CHECK_NOTNULL(node->GetOpDesc()); nodes_to_inited_vars_[node->GetOpDesc()->GetId()] = inited_vars; - GELOGD("Inited vars on this graph when node %s, inited vars count %zu", - node->GetName().c_str(), inited_vars->size()); + GELOGD("Inited vars on this graph when node %s, inited vars count %zu", node->GetName().c_str(), + inited_vars->size()); } return SUCCESS; } std::set *VarIsInitializedOpPass::CreateInitedVars() { - std::unique_ptr> inited_vars_keeper(new(std::nothrow) std::set()); + std::unique_ptr> inited_vars_keeper(new (std::nothrow) std::set()); if (inited_vars_keeper == nullptr) { GELOGE(OUT_OF_MEMORY, "Failed to alloc set memory"); return nullptr; diff --git a/ge/graph/passes/var_is_initialized_op_pass.h b/ge/graph/passes/var_is_initialized_op_pass.h index 9cfa7b99..37b3f49b 100755 --- a/ge/graph/passes/var_is_initialized_op_pass.h +++ b/ge/graph/passes/var_is_initialized_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_format_pass.cc b/ge/graph/passes/variable_format_pass.cc index bd5300a5..28f6a4f7 100644 --- a/ge/graph/passes/variable_format_pass.cc +++ b/ge/graph/passes/variable_format_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_format_pass.h b/ge/graph/passes/variable_format_pass.h index e2c32903..1a0abe2e 100755 --- a/ge/graph/passes/variable_format_pass.h +++ b/ge/graph/passes/variable_format_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_op_pass.cc b/ge/graph/passes/variable_op_pass.cc index f1843d94..8c34cd36 100644 --- a/ge/graph/passes/variable_op_pass.cc +++ b/ge/graph/passes/variable_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,9 +73,9 @@ Status ByPassTransNode(NodePtr &trans_node, NodePtr &ref_node) { auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); if (prev_trans_node_out_anchor == nullptr) { GELOGW( - "The trans node %s does not have an input, so the ref node %s does" - " not have any inputs after bypass", - trans_node->GetName().c_str(), trans_node->GetName().c_str()); + "The trans node %s does not have an input, so the ref node %s does" + " not have any inputs after bypass", + trans_node->GetName().c_str(), trans_node->GetName().c_str()); } else { ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, ref_in_anchor); if (ret != GRAPH_SUCCESS) { @@ -162,14 +162,14 @@ Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { auto start_iter = fusion_road.begin(); auto end_iter = fusion_road.rbegin(); GELOGD( - "Trans variable data for %s from format %s to %s, shape %s to %s " - "data-type %s to %s, path len %zu success", - node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), - TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), - formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), - formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), - TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), - TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); + "Trans variable data for %s from format %s to %s, shape %s to %s " + "data-type %s to %s, path len %zu success", + node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), + TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), + formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), + formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), + TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), + TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); if (ret != SUCCESS) { @@ -230,9 +230,9 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) { trans_node->GetType().c_str(), var_node->GetName().c_str()); if (trans_node->GetOutDataNodes().size() > 1) { GELOGD( - "The trans node %s type %s connecting with var-ref %s has more" - " than one output data nodes, unlink the edge between them", - trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); + "The trans node %s type %s connecting with var-ref %s has more" + " than one output data nodes, unlink the edge between them", + trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); if (ByPassTransNode(trans_node, ref_node) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to ref %s", trans_node->GetName().c_str(), ref_node->GetName().c_str()); @@ -240,9 +240,9 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) { } } else { GELOGD( - "The trans node %s type %s connecting with var-ref %s has only" - " one output data nodes, isolate and remove it.", - trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); + "The trans node %s type %s connecting with var-ref %s has only" + " one output data nodes, isolate and remove it.", + trans_node->GetName().c_str(), trans_node->GetType().c_str(), ref_node->GetName().c_str()); if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { return GE_GRAPH_VARIABLE_OP_PASS_FAILED; } @@ -280,9 +280,9 @@ Status VariableOpPass::CheckSameAndTransOp(const ge::NodePtr &var_node, bool &is } if (data_index != in_anchor->GetIdx()) { GELOGD( - "Variables only can be fusion with trans nodes, the next node %s" - " type %s index %d does not trans anything(correct index %d)", - out_node->GetName().c_str(), out_node->GetType().c_str(), in_anchor->GetIdx(), data_index); + "Variables only can be fusion with trans nodes, the next node %s" + " type %s index %d does not trans anything(correct index %d)", + out_node->GetName().c_str(), out_node->GetType().c_str(), in_anchor->GetIdx(), data_index); return SUCCESS; } @@ -312,9 +312,9 @@ Status VariableOpPass::CheckSameAndTransOp(const ge::NodePtr &var_node, bool &is } GELOGW( - "trans_op type size for var Node(%s) is over 1, Currently not" - " supported, dataTypeAndFormats is %s.", - var_node->GetName().c_str(), type_and_formats_stream.str().c_str()); + "trans_op type size for var Node(%s) is over 1, Currently not" + " supported, dataTypeAndFormats is %s.", + var_node->GetName().c_str(), type_and_formats_stream.str().c_str()); return SUCCESS; } @@ -591,7 +591,7 @@ Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { Status ret = SUCCESS; for (auto &node : graph->GetDirectNode()) { bool is_var_node = - (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); + (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); if (is_var_node) { if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); diff --git a/ge/graph/passes/variable_op_pass.h b/ge/graph/passes/variable_op_pass.h index 3b18882c..e17980e9 100755 --- a/ge/graph/passes/variable_op_pass.h +++ b/ge/graph/passes/variable_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc deleted file mode 100644 index 3e40e686..00000000 --- a/ge/graph/passes/variable_op_pass_bak.cc +++ /dev/null @@ -1,812 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/variable_op_pass.h" -#include -#include - -#include "common/formats/formats.h" -#include "common/formats/utils/formats_trans_utils.h" -#include "graph/ge_context.h" -#include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" -#include "graph/utils/graph_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" - -namespace ge { -namespace { -const int kTransOpOutIndex = 0; - -Status ByPassTransNode(NodePtr &front_node, NodePtr &back_node) { - GE_CHECK_NOTNULL(front_node); - GE_CHECK_NOTNULL(back_node); - GELOGD("Begin to bypass trans node %s", front_node->GetName().c_str()); - auto ret = GraphUtils::CopyInCtrlEdges(front_node, back_node); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to move control edges from trans " - "node %s to var-ref %s", - front_node->GetName().c_str(), back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - auto back_node_in_anchor = back_node->GetInDataAnchor(0); - if (back_node_in_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "The back node %s does not have an " - "input anchor", - back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - back_node_in_anchor->UnlinkAll(); - auto trans_in_anchor = front_node->GetInDataAnchor(0); - if (trans_in_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "Failed to get the in data anchor from trans" - " node %s type %s", - front_node->GetName().c_str(), front_node->GetType().c_str()); - return INTERNAL_ERROR; - } - auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); - if (prev_trans_node_out_anchor == nullptr) { - GELOGW( - "The trans node %s does not have an input, so the ref node %s does" - " not have any inputs after bypass", - front_node->GetName().c_str(), front_node->GetName().c_str()); - } else { - ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, back_node_in_anchor); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to add edge between ref node %s " - "and the prev node of trans node %s", - back_node->GetName().c_str(), front_node->GetName().c_str()); - return INTERNAL_ERROR; - } - } - return SUCCESS; -} - -bool IsTransSupport(const TransNodeInfo &trans_info) { - if (trans_info.output.GetShape().IsUnknownShape()) { - return false; - } - if (trans_info.node_type == RESHAPE || trans_info.node_type == REFORMAT) { - return true; - } else if (trans_info.node_type == TRANSDATA || trans_info.node_type == TRANSPOSED) { - formats::TransArgs args{nullptr, - trans_info.input.GetFormat(), - trans_info.output.GetFormat(), - trans_info.input.GetShape().GetDims(), - trans_info.output.GetShape().GetDims(), - trans_info.input.GetDataType()}; - return formats::IsTransFormatSupport(args); - } else if (trans_info.node_type == CAST) { - formats::CastArgs datatype_args{nullptr, static_cast(trans_info.input.GetShape().GetShapeSize()), - trans_info.input.GetDataType(), trans_info.output.GetDataType()}; - return formats::IsTransDataTypeSupport(datatype_args); - } else { - return false; - } -} - -std::string GetInAndOutDecsDiff(NodePtr &trans_node, bool reverse = false) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - auto op_desc = trans_node->GetOpDesc(); - GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); - GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); - if (reverse) { - GeTensorDesc tmp_desc = input_desc; - input_desc = output_desc; - output_desc = tmp_desc; - } - auto input_format = input_desc.GetFormat(); - auto input_type = input_desc.GetDataType(); - auto input_shape = input_desc.GetShape(); - auto output_format = output_desc.GetFormat(); - auto output_type = output_desc.GetDataType(); - auto output_shape = output_desc.GetShape(); - std::stringstream diff_key; - diff_key.str(""); - if (input_format != output_format) { - diff_key << static_cast(input_format) << '-' << static_cast(output_format) << '-'; - } else { - diff_key << "*-"; - } - if (input_type != output_type) { - diff_key << static_cast(input_type) << '-' << static_cast(output_type) << '-'; - } else { - diff_key << "*-"; - } - if (!ge::formats::IsShapeEqual(input_shape, output_shape)) { - for (auto dim : input_shape.GetDims()) { - diff_key << dim << '-'; - } - for (auto dim : output_shape.GetDims()) { - diff_key << dim << '-'; - } - } else { - diff_key << "*"; - } - return diff_key.str(); -} -} // namespace - -Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { - if (graph == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to run variable op pass, null graph"); - return INTERNAL_ERROR; - } - - GELOGD("Begin to run variable op pass on graph %s, session %lu, graph id %u", graph->GetName().c_str(), - GetContext().SessionId(), graph->GetGraphID()); - - if (var_accelerate_ctrl_ == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to run var op pass, the variable accelerate control is null"); - return INTERNAL_ERROR; - } - - GELOGD("Begin to generate ref map for variable and refs, graph name:%s.", graph->GetName().c_str()); - if (RenewVarDesc(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to renew var desc on graph"); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GenerateVariableVariableRefMap(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - GELOGD("Begin to fusion variables and trans nodes"); - for (auto &var_to_refs : var_and_var_ref_map_) { - auto &node = var_to_refs.first; - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(var_accelerate_ctrl_); - if (!var_accelerate_ctrl_->IsVarPermitToChangeFormats(node->GetName())) { - GELOGD("The var %s does not permit to change formats, skip it", node->GetName().c_str()); - continue; - } - - VarTransRoad fusion_road; - auto ret = FusionIfNeed(node, fusion_road); - if (ret != SUCCESS) { - return ret; - } - - if (fusion_road.empty()) { - GELOGD("No need to fusion variable %s because it's fusion road is empty", node->GetName().c_str()); - continue; - } - - ret = RenewTransRoadDesc(node, fusion_road); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to renew description fusion road for var %s", node->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - auto start_iter = fusion_road.begin(); - auto end_iter = fusion_road.rbegin(); - GELOGD( - "Trans variable data for %s from format %s to %s, shape %s to %s " - "data-type %s to %s, path len %zu success", - node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), - TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), - formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), - formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), - TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), - TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); - - ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update the format fusion road for var %s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph->GetGraphID()); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update the graph id for var %s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - var_accelerate_ctrl_->SetVarChanged(node->GetName()); - - GELOGD("Begin to update format info for var %s.", node->GetName().c_str()); - std::set node_set({node}); - if (UpdateIOFormatInfo(end_iter->output, node_set) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - // renew var desc if the trans_road is all reshape or reformat - ret = RenewVarDesc(graph->GetSessionID(), node, fusion_road); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - } - - return SUCCESS; -} - -Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road) { - auto var_desc = var->GetOpDesc(); - GE_CHECK_NOTNULL(var_desc); - TransNodeInfo prev_node_info; - prev_node_info.node_type = var->GetType(); - prev_node_info.output = var_desc->GetOutputDesc(0); - // two cases - // fisrt Var->cast->transdata which transdata in fusion road - // the input of transdata is not equal with output of var - // case 1 : suppose input dtype of transdata equal with out dtype - // but not equal with var - // so we make input dtype and output dytpe of transroad equal with var - // case 2: suppose input format of transdata not equal with out format - // and input format not equal with var - // so we make input format equal with var - - for (auto &cur_trans : fusion_road) { - if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) { - cur_trans.output.SetFormat(prev_node_info.output.GetFormat()); - } - if (cur_trans.input.GetDataType() == cur_trans.output.GetDataType()) { - cur_trans.output.SetDataType(prev_node_info.output.GetDataType()); - } - if (ge::formats::IsShapeEqual(cur_trans.input.GetShape(), cur_trans.output.GetShape())) { - cur_trans.output.SetShape(prev_node_info.output.GetShape()); - } - cur_trans.input = prev_node_info.output; - prev_node_info.output = cur_trans.output; - } - return SUCCESS; -} - -Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road) { - bool can_fusion = false; - while (true) { - map> trans_type_to_trans_ops ; - map> trans_type_to_changed_desc; - // record the order of trans op in first path - vector first_path_trans_order; - auto ret = CheckIfCouldBeOptimized(var, first_path_trans_order, trans_type_to_changed_desc, - trans_type_to_trans_ops, can_fusion); - if (ret != SUCCESS) { - GELOGE(FAILED, "Check trans ops after vatiable could be optimized or not failed"); - return ret; - } - - if (!can_fusion) { - break; - } - - vector> delete_var_ref_trans_nodes; - ret = GetAndCheckTransOpOfVarRef(var, can_fusion, trans_type_to_changed_desc, delete_var_ref_trans_nodes); - if (ret != SUCCESS) { - GELOGE(FAILED, "get and check trans op of varref failed"); - return ret; - } - - if (!can_fusion) { - break; - } - - ret = UpdateTransRoad(fusion_road, first_path_trans_order, - trans_type_to_changed_desc, trans_type_to_trans_ops); - if (ret != SUCCESS) { - GELOGE(FAILED, "Update trans road failed"); - return ret; - } - - if (fusion_road.empty()) { - return SUCCESS; - } - - ret = DealFusion(var, fusion_road, trans_type_to_changed_desc, - trans_type_to_trans_ops, delete_var_ref_trans_nodes); - if (ret != SUCCESS) { - return ret; - } - } - return SUCCESS; -} - -Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops){ - vector delete_trans_type; - for (auto &trans_type : first_path_trans_order) { - if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) { - continue; - } - bool delete_flag = false; - for (auto &trans_node : trans_type_to_trans_ops[trans_type]) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - auto out_op_desc = trans_node->GetOpDesc(); - GE_CHECK_NOTNULL(out_op_desc); - TransNodeInfo trans_node_info; - trans_node_info.node_type = trans_node->GetType(); - trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); - trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); - if (!IsTransSupport(trans_node_info)) { - delete_flag = true; - GELOGD("The trans node %s does not support, skip the variable accelerating", trans_node_info.node_type.c_str()); - break; - } - } - if (delete_flag) { - delete_trans_type.push_back(trans_type); - } else { - auto &trans_node = *trans_type_to_trans_ops[trans_type].begin(); - auto out_op_desc = trans_node->GetOpDesc(); - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - TransNodeInfo trans_node_info; - trans_node_info.node_type = trans_node->GetType(); - trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); - trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); - fusion_road.emplace_back(trans_node_info); - } - } - for (auto &trans_type : delete_trans_type) { - trans_type_to_changed_desc.erase(trans_type); - } - return SUCCESS; -} - -Status VariableOpPass::DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, - map> trans_type_to_changed_desc, - map> trans_type_to_trans_ops, - vector> &delete_trans_nodes) { - GE_CHECK_NOTNULL(var_node); - GELOGD("Begin to fusion var %s with trans", var_node->GetName().c_str()); - auto graph = var_node->GetOwnerComputeGraph(); - for (auto &trans_type : trans_type_to_changed_desc) { - for (auto &trans_node : trans_type_to_trans_ops[trans_type.first]) { - GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(), - trans_node->GetType().c_str(), var_node->GetName().c_str()); - if (RenewTransOpDesc(trans_node, true) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GraphUtils::RemoveNodeWithoutRelink(graph, trans_node) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - } - - // Iterate delete_trans_nodes backward, eg a->b->c, delete_trans_nodes:{{b,c},{a,b}} - // we should delete {a,b} first , then b->c,then we can delete {b,c} - // if we delete {b,c} first, then a->c, then we can not get b when we delete {a,b} - for (auto iter = delete_trans_nodes.rbegin(); iter != delete_trans_nodes.rend(); ++iter) { - auto front_node = iter->first; - auto back_node = iter->second; - if (RenewTransOpDesc(front_node, false) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - if (front_node->GetOutDataNodes().size() > 1) { - GELOGD("The trans node %s type %s connecting with var-ref %s has more" - " than one output data nodes, unlink the edge between them", - front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); - if (ByPassTransNode(front_node, back_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to node %s", front_node->GetName().c_str(), - back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - } else { - GELOGD("The trans node %s type %s connecting with %s has only" - " one output data nodes, isolate and remove it.", - front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); - if (GraphUtils::IsolateNode(front_node, {0}) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - if (GraphUtils::RemoveNodeWithoutRelink(graph, front_node) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewTransOpDesc(ge::NodePtr &node, bool is_reverse) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(node->GetType()); - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); - GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); - GeTensorDesc renew_desc = is_reverse ? output_desc : input_desc; - bool format_changed = false; - bool shape_changed = false; - bool dtype_changed = false; - if (input_desc.GetFormat() != output_desc.GetFormat()) { - format_changed = true; - } - if (input_desc.GetDataType() != output_desc.GetDataType()) { - dtype_changed = true; - } - if (!ge::formats::IsShapeEqual(input_desc.GetShape(), output_desc.GetShape())) { - shape_changed = true; - } - auto cur_node = node; - while (TransOpUtil::IsTransOp(cur_node)) { - tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); - auto next_node = is_reverse ? NodeUtils::GetInDataNodeByIndex(*cur_node, tran_in_index) : - cur_node->GetOutDataNodes().at(kTransOpOutIndex); - if (!TransOpUtil::IsTransOp(next_node)) { - break; - } - auto prev_desc = next_node->GetOpDesc(); - tran_in_index = TransOpUtil::GetTransOpDataIndex(next_node->GetType()); - auto mutable_output_desc = prev_desc->MutableOutputDesc(kTransOpOutIndex); - auto mutable_input_desc = prev_desc->MutableInputDesc(tran_in_index); - GE_CHECK_NOTNULL(prev_desc->MutableOutputDesc(kTransOpOutIndex)); - GE_CHECK_NOTNULL(prev_desc->MutableInputDesc(tran_in_index)); - if (shape_changed) { - mutable_input_desc->SetShape(renew_desc.GetShape()); - mutable_output_desc->SetShape(renew_desc.GetShape()); - } - if (dtype_changed) { - mutable_input_desc->SetDataType(renew_desc.GetDataType()); - mutable_output_desc->SetDataType(renew_desc.GetDataType()); - } - if (format_changed) { - mutable_input_desc->SetFormat(renew_desc.GetFormat()); - mutable_output_desc->SetFormat(renew_desc.GetFormat()); - } - cur_node = next_node; - } - return SUCCESS; -} - -Status VariableOpPass::CheckIfCouldBeOptimized(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &flag) { - bool is_match = true; - auto ret = GetSameTransOP(var, first_path_trans_order, trans_type_to_changed_desc, - trans_type_to_trans_ops, is_match); - - if (ret != SUCCESS) { - GELOGE(FAILED, "Get same trans op of variable node: %s failed", var->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (!is_match) { - flag = false; - GELOGI("trans nodes after variable do not meet the condition"); - return SUCCESS; - } - - flag = true; - return SUCCESS; -} - -Status VariableOpPass::GetSameTransOP(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match) { - GELOGD("Begin to get Node: %s trans op info of first path", var->GetName().c_str()); - auto ret = GetFisrtPathTransInfo(var, first_path_trans_order, - trans_type_to_changed_desc, trans_type_to_trans_ops); - if (ret != SUCCESS) { - GELOGE(FAILED, "Get var: %s first path trans info failed", var->GetName().c_str()); - return FAILED; - } - - if (first_path_trans_order.empty()) { - GELOGD("var %s first path has no trans op, not need to pass", var->GetName().c_str()); - is_match = false; - return SUCCESS; - } - - GELOGD("Begin to depth first search Node: %s ", var->GetName().c_str()); - VariableDFS(var, trans_type_to_changed_desc, trans_type_to_trans_ops, is_match); - - return SUCCESS; -} - -void VariableOpPass::VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match) { - std::stack node_stack; - std::stack> path_stack; - for (auto &out_node : node->GetOutDataNodes()) { - if (!is_match) { - break; - } - if (out_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(out_node)) { - is_match = false; - break; - } - node_stack.push(out_node); - path_stack.emplace(vector{out_node}); - while (!node_stack.empty() && is_match) { - auto cur_node = node_stack.top(); - auto cur_path = path_stack.top(); - node_stack.pop(); - path_stack.pop(); - if (cur_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(cur_node)) { - UpdateTransInfo(cur_path, is_match, trans_type_to_changed_desc, trans_type_to_trans_ops); - continue; - } - for (auto &next_node : cur_node->GetOutDataNodes()) { - node_stack.push(next_node); - auto next_path = cur_path; - next_path.push_back(next_node); - path_stack.emplace(next_path); - } - } - } -} - -Status VariableOpPass::UpdateTransInfo(vector &cur_path, bool& is_match, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops) { - GELOGD("Begin to update trans info by path"); - std::set trans_op_occured; - for (auto &trans_node : cur_path) { - auto trans_node_type = trans_node->GetType(); - if (trans_op_occured.find(trans_node_type) != trans_op_occured.end() || - !ge::TransOpUtil::IsTransOp(trans_node_type)) { - continue; - } - trans_op_occured.insert(trans_node_type); - auto desc_diff = GetInAndOutDecsDiff(trans_node); - if (trans_type_to_changed_desc.find(trans_node_type) != trans_type_to_changed_desc.end() && - desc_diff == trans_type_to_changed_desc[trans_node_type].first) { - trans_type_to_changed_desc[trans_node_type].second = true; - auto iter = find(trans_type_to_trans_ops[trans_node_type].begin(), - trans_type_to_trans_ops[trans_node_type].end(), - trans_node); - if (iter == trans_type_to_trans_ops[trans_node_type].end()) { - trans_type_to_trans_ops[trans_node_type].push_back(trans_node); - } - } - } - std::set delete_trans_types; - for (auto &trans_item : trans_type_to_changed_desc) { - if (!trans_item.second.second) { - delete_trans_types.insert(trans_item.first); - } else { - trans_item.second.second = false; - } - } - for (auto& delete_item : delete_trans_types) { - trans_type_to_changed_desc.erase(delete_item); - } - if (trans_type_to_changed_desc.empty()) { - is_match = false; - } - return SUCCESS; -} - -Status VariableOpPass::GetFisrtPathTransInfo(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops) { - auto cur_node = var; - while (cur_node->GetOutDataNodesSize() != 0) { - cur_node = cur_node->GetOutDataNodes().at(0); - GE_CHECK_NOTNULL(cur_node); - if (!ge::TransOpUtil::IsTransOp(cur_node)) { - break; - } - auto cur_node_type = cur_node->GetType(); - // only get the the first occurrence operator of same type - if (trans_type_to_changed_desc.find(cur_node_type) == trans_type_to_changed_desc.end()) { - auto desc_diff = GetInAndOutDecsDiff(cur_node); - trans_type_to_changed_desc[cur_node->GetType()] = make_pair(desc_diff, false); - trans_type_to_trans_ops[cur_node->GetType()] = vector{cur_node}; - first_path_trans_order.push_back(cur_node->GetType()); - } - } - GELOGD("get var %s first path trans info success", var->GetName().c_str()); - return SUCCESS; -} - -Status VariableOpPass::GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, - map> &trans_type_to_changed_desc, - vector> &delete_var_ref_trans_nodes) { - auto iterator = var_and_var_ref_map_.find(var_node); - if (iterator == var_and_var_ref_map_.end()) { - GELOGD("there is no var_ref of node %s", var_node->GetName().c_str()); - return SUCCESS; - } - vector delete_trans_type; - for (auto &trans_type : trans_type_to_changed_desc) { - delete_trans_type.push_back(trans_type.first); - } - for (auto &ref_node : iterator->second) { - GE_CHECK_NOTNULL(ref_node); - auto cur_node = *ref_node->GetInDataNodes().begin(); - auto behind_node = ref_node; - GE_CHECK_NOTNULL(cur_node); - vector tmp_delete_trans_type = delete_trans_type; - while (TransOpUtil::IsTransOp(cur_node)) { - GE_CHECK_NOTNULL(cur_node); - auto iter = find(tmp_delete_trans_type.begin(), tmp_delete_trans_type.end(), cur_node->GetType()); - if (iter != tmp_delete_trans_type.end()) { - CheckTransOpOfVarAndVarRefSymmetry(cur_node, trans_type_to_changed_desc[cur_node->GetType()].first, - pass_check); - if (!pass_check) { - GELOGD("trans op : %s of var ref %s is illegal", cur_node->GetName().c_str(), ref_node->GetName().c_str()); - return SUCCESS; - } - tmp_delete_trans_type.erase(iter); - delete_var_ref_trans_nodes.emplace_back(std::make_pair(cur_node, behind_node)); - } - int tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); - behind_node = cur_node; - cur_node = cur_node->GetInDataNodes().at(tran_in_index); - } - if (!tmp_delete_trans_type.empty()) { - pass_check = false; - return SUCCESS; - } - } - return SUCCESS; -} - -Status VariableOpPass::CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, - bool &is_symmetry){ - auto var_ref_trans_op_desc_diff = GetInAndOutDecsDiff(var_ref_trans_op, true); - is_symmetry = (var_ref_trans_op_desc_diff == desc_diff); - return SUCCESS; -} - -Status VariableOpPass::UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node) { - if (node == nullptr || node->GetOpDesc() == nullptr) { - GELOGE(FAILED, "node or opdesc is nullptr"); - return FAILED; - } - const Format &format = final_output.GetFormat(); - const DataType &data_type = final_output.GetDataType(); - const GeShape &shape = final_output.GetShape(); - GELOGD("last ref is (%s, %s, %lu), var_ref_name is %s.", TypeUtils::DataTypeToSerialString(data_type).c_str(), - TypeUtils::FormatToSerialString(format).c_str(), shape.GetDims().size(), node->GetName().c_str()); - - auto node_desc = node->GetOpDesc()->GetOutputDesc(0); - CopyVariableFormatDataTypeAndShape(final_output, node_desc); - if (node->GetOpDesc()->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGE(FAILED, "update output desc fail."); - return FAILED; - } - GELOGD("node ref is (%s, %s, %lu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetDataType()).c_str(), - TypeUtils::FormatToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetFormat()).c_str(), - node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims().size(), node->GetName().c_str()); - - auto iterator = var_and_var_ref_map_.find(node); - if (iterator == var_and_var_ref_map_.end()) { - auto graph = node->GetOwnerComputeGraph(); - if (GenerateVariableVariableRefMap(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - iterator = var_and_var_ref_map_.find(node); - if (iterator == var_and_var_ref_map_.end()) { - GELOGW("The var node %s which belongs to graph %s can not be found on the graph", node->GetName().c_str(), - node->GetOwnerComputeGraph()->GetName().c_str()); - return SUCCESS; - } - - for (const auto &var_ref_node : iterator->second) { - auto var_ref_node_description = var_ref_node->GetOpDesc(); - GE_CHECK_NOTNULL(var_ref_node_description); - - GELOGD("var_ref_node before is (%s, %s, %zu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(format).c_str(), - shape.GetDims().size(), var_ref_node->GetName().c_str()); - if (var_ref_node_description->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGW("UpdateOutputDesc fail."); - } - if (var_ref_node_description->UpdateInputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGW("UpdateInputDesc fail."); - } - const auto &input_desc = var_ref_node_description->MutableInputDesc(0); - const auto &output_desc = var_ref_node_description->MutableOutputDesc(0); - GE_CHECK_NOTNULL(input_desc); - GE_CHECK_NOTNULL(output_desc); - GELOGD("var_ref_node ref is (%s, %s, %zu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(), - TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), output_desc->GetShape().GetDims().size(), - var_ref_node->GetName().c_str()); - } - - return SUCCESS; -} - -Status VariableOpPass::GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph) { - std::map names_to_var; - std::map> names_to_refs; - GE_CHECK_NOTNULL(compute_graph); - for (auto &node : compute_graph->GetDirectNode()) { - if (node->GetType() != VARIABLE) { - continue; - } - std::string ref_var_name; - if (!ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name)) { - names_to_var[node->GetName()] = node; - } else { - names_to_refs[ref_var_name].insert(node); - } - } - - for (auto &name_to_var : names_to_var) { - var_and_var_ref_map_[name_to_var.second] = names_to_refs[name_to_var.first]; - } - return SUCCESS; -} - -void VariableOpPass::CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, - GeTensorDesc &dst_tensor_desc) { - dst_tensor_desc.SetShape(src_tensor_desc.GetShape()); - dst_tensor_desc.SetFormat(src_tensor_desc.GetFormat()); - dst_tensor_desc.SetDataType(src_tensor_desc.GetDataType()); -} - -Status VariableOpPass::UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes) { - for (auto &need_set_node : nodes) { - auto ret = UpdateVarAndRefOutputFormatInfo(final_output, need_set_node); - if (ret != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { - GE_CHECK_NOTNULL(graph); - // renew var manager desc - Status ret = SUCCESS; - for (auto &node : graph->GetDirectNode()) { - bool is_var_node = - (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); - if (is_var_node) { - if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { - GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); - continue; - } - GELOGD("var manager exist var node[%s], graph name[%s]", node->GetName().c_str(), graph->GetName().c_str()); - GE_CHECK_NOTNULL(node->GetOpDesc()); - ret = ge::VarManager::Instance(graph->GetSessionID())->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road) { - // renew var desc if the trans_road is all reshape or reformat - for (auto &road : fusion_road) { - if (road.node_type != RESHAPE && road.node_type != REFORMAT) { - return SUCCESS; - } - } - - if (!ge::VarManager::Instance(session_id)->IsVarExist(node->GetName())) { - GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); - return SUCCESS; - } - GELOGD("var manager exist var node[%s]", node->GetName().c_str()); - GE_CHECK_NOTNULL(node->GetOpDesc()); - Status ret = ge::VarManager::Instance(session_id)->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - - return SUCCESS; -} - -} // namespace ge diff --git a/ge/graph/passes/variable_op_pass_bak.h b/ge/graph/passes/variable_op_pass_bak.h deleted file mode 100644 index b9fbb90e..00000000 --- a/ge/graph/passes/variable_op_pass_bak.h +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ -#define GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ -#include -#include -#include -#include "graph/common/transop_util.h" -#include "common/formats/utils/formats_trans_utils.h" -#include "graph/utils/node_utils.h" -#include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" -#include "graph/manager/util/variable_accelerate_ctrl.h" -#include "inc/graph_pass.h" - -namespace ge { -namespace variable_op { -struct NodeDesc { - ge::GeTensorDesc input; - ge::GeTensorDesc output; - bool is_update = false; -}; -} // namespace variable_op -class VariableOpPass : public GraphPass { - public: - explicit VariableOpPass(VarAccelerateCtrl *ctrl) : var_accelerate_ctrl_(ctrl) {} - - ~VariableOpPass() override = default; - - Status Run(ge::ComputeGraphPtr graph) override; - - private: - Status UpdateTransRoad(VarTransRoad &fusion_road, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, - map> trans_type_to_changed_desc, - map> trans_type_to_trans_ops, - vector> &delete_trans_nodes); - - Status RenewTransOpDesc(ge::NodePtr &node, bool is_reverse); - - Status RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road); - - Status CheckIfCouldBeOptimized(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &flag); - - Status FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road); - - Status GetSameTransOP(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match); - - Status GetFisrtPathTransInfo(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - void VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match); - - Status UpdateTransInfo(vector &cur_path, bool& is_match, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - Status GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, - map> &trans_type_to_changed_desc, - vector> &delete_var_ref_trans_nodes); - - Status CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, bool &is_symmetry); - - Status UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node); - - Status GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph); - - void CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, GeTensorDesc &dst_tensor_desc); - - Status UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes); - - Status RenewVarDesc(ge::ComputeGraphPtr &graph); - - Status RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road); - - map> var_and_var_ref_map_; - - VarAccelerateCtrl *var_accelerate_ctrl_; -}; -} // namespace ge -#endif // GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ diff --git a/ge/graph/passes/variable_prepare_op_pass.cc b/ge/graph/passes/variable_prepare_op_pass.cc index 9231e4eb..f0e11735 100644 --- a/ge/graph/passes/variable_prepare_op_pass.cc +++ b/ge/graph/passes/variable_prepare_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ namespace ge { std::map>> VariablePrepareOpPass::ref_node_without_prototype_map_ = { - {REFSWITCH, {{0, {0, 1}}}}}; + {REFSWITCH, {{0, {0, 1}}}}}; Status VariablePrepareOpPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); @@ -261,13 +261,13 @@ Status VariablePrepareOpPass::InsertVariableRef(ge::NodePtr &node, int in_index, // Add edge from ref identity node to variable ref node. CHECK_FALSE_EXEC( - ge::GraphUtils::AddEdge(ref_identity_node->GetOutDataAnchor(0), variable_ref_node->GetInDataAnchor(0)) == SUCCESS, - GELOGE(FAILED, "Add data edge between ref_identity and variable_ref failed"); - return FAILED); + ge::GraphUtils::AddEdge(ref_identity_node->GetOutDataAnchor(0), variable_ref_node->GetInDataAnchor(0)) == SUCCESS, + GELOGE(FAILED, "Add data edge between ref_identity and variable_ref failed"); + return FAILED); CHECK_FALSE_EXEC( - ge::GraphUtils::AddEdge(node->GetOutControlAnchor(), variable_ref_node->GetInControlAnchor()) == SUCCESS, - GELOGE(FAILED, "Add control edge between ref_identity and variable_ref failed"); - return FAILED); + ge::GraphUtils::AddEdge(node->GetOutControlAnchor(), variable_ref_node->GetInControlAnchor()) == SUCCESS, + GELOGE(FAILED, "Add control edge between ref_identity and variable_ref failed"); + return FAILED); return SUCCESS; } @@ -280,9 +280,9 @@ Status VariablePrepareOpPass::AddControlEdge(const ge::NodePtr &node, const ge:: NodePtr peer_node = peer_in_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(peer_node); CHECK_FALSE_EXEC( - ge::GraphUtils::AddEdge(variable_ref_node->GetOutControlAnchor(), peer_node->GetInControlAnchor()) == SUCCESS, - GELOGE(FAILED, "Add control edge between variable_ref and ref node's peer node failed"); - return FAILED); + ge::GraphUtils::AddEdge(variable_ref_node->GetOutControlAnchor(), peer_node->GetInControlAnchor()) == SUCCESS, + GELOGE(FAILED, "Add control edge between variable_ref and ref node's peer node failed"); + return FAILED); } } return SUCCESS; diff --git a/ge/graph/passes/variable_prepare_op_pass.h b/ge/graph/passes/variable_prepare_op_pass.h index 4cef5b59..563a9be5 100644 --- a/ge/graph/passes/variable_prepare_op_pass.h +++ b/ge/graph/passes/variable_prepare_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_ref_delete_op_pass.cc b/ge/graph/passes/variable_ref_delete_op_pass.cc index 8e625857..90cfd747 100644 --- a/ge/graph/passes/variable_ref_delete_op_pass.cc +++ b/ge/graph/passes/variable_ref_delete_op_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/graph/passes/variable_ref_delete_op_pass.h b/ge/graph/passes/variable_ref_delete_op_pass.h index 7f6d1274..43db2703 100755 --- a/ge/graph/passes/variable_ref_delete_op_pass.h +++ b/ge/graph/passes/variable_ref_delete_op_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,7 @@ class VariableRefDeleteOpPass : public GraphPass { Status Run(ge::ComputeGraphPtr graph); private: - Status DealVariableRef(ge::ComputeGraphPtr &graph, - ge::NodePtr &variable_ref, + Status DealVariableRef(ge::ComputeGraphPtr &graph, ge::NodePtr &variable_ref, const std::string &ref_var_src_var_name); }; } // namespace ge diff --git a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc index 1c8eb0ec..1321cf20 100644 --- a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc +++ b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "variable_ref_useless_control_out_delete_pass.h" namespace ge { @@ -28,8 +29,8 @@ Status VariableRefUselessControlOutDeletePass::Run(ge::ComputeGraphPtr graph) { } auto src_nodes = node->GetInDataNodes(); if (src_nodes.empty()) { - GELOGW("The variable ref name %s(ref %s) does not has a input node", - node->GetName().c_str(), src_var_name.c_str()); + GELOGW("The variable ref name %s(ref %s) does not has a input node", node->GetName().c_str(), + src_var_name.c_str()); continue; } auto &src_node = src_nodes.at(0); @@ -39,14 +40,12 @@ Status VariableRefUselessControlOutDeletePass::Run(ge::ComputeGraphPtr graph) { auto out_control_anchor = node->GetOutControlAnchor(); for (const auto &dst_node_anchor : out_control_anchor->GetPeerInControlAnchors()) { if (controlled_nodes.count(dst_node_anchor->GetOwnerNode()) > 0) { - GELOGI("Unlink the duplicated control edge from variable ref %s to %s, prev node %s", - node->GetName().c_str(), - dst_node_anchor->GetOwnerNode()->GetName().c_str(), - src_node->GetName().c_str()); + GELOGI("Unlink the duplicated control edge from variable ref %s to %s, prev node %s", node->GetName().c_str(), + dst_node_anchor->GetOwnerNode()->GetName().c_str(), src_node->GetName().c_str()); out_control_anchor->Unlink(dst_node_anchor); } } } return SUCCESS; } -} \ No newline at end of file +} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h index fd9dbb00..307754da 100644 --- a/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h +++ b/ge/graph/passes/variable_ref_useless_control_out_delete_pass.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ #define GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ @@ -28,4 +29,4 @@ class VariableRefUselessControlOutDeletePass : public GraphPass { Status Run(ge::ComputeGraphPtr graph); }; } // namespace ge -#endif //GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ +#endif // GE_VARIABLE_REF_USELESS_CONTROL_OUT_DELETE_PASS_H_ diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 20964b6c..20216941 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -92,7 +92,6 @@ #include "graph/passes/unused_op_remove_pass.h" #include "graph/passes/var_is_initialized_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" -#include "graph/passes/mark_agnostic_pass.h" #include "graph/preprocess/insert_op/util_insert_aipp_op.h" #include "graph/types.h" #include "graph/utils/tensor_utils.h" @@ -119,13 +118,12 @@ #include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" - namespace ge { namespace { static std::map output_type_str_to_datatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, - {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, - {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"INT8", ge::DT_INT8}, {"INT16", ge::DT_INT16}, + {"UINT16", ge::DT_UINT16}, {"UINT8", ge::DT_UINT8}, {"INT32", ge::DT_INT32}, {"INT64", ge::DT_INT64}, + {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; const char *const kMbatchSwitchnName = "mbatch-switch-name"; @@ -161,9 +159,9 @@ OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { } GE_IF_BOOL_EXEC( - tensor->SetData(reinterpret_cast(dst_shape.get()), dim_cnt * sizeof(int32_t)) != GRAPH_SUCCESS, - GELOGE(INTERNAL_ERROR, "tensor set data failed"); - return nullptr;) + tensor->SetData(reinterpret_cast(dst_shape.get()), dim_cnt * sizeof(int32_t)) != GRAPH_SUCCESS, + GELOGE(INTERNAL_ERROR, "tensor set data failed"); + return nullptr;) } GELOGD("Create shape input dim [%s]", dst_ge_shape.ToString().c_str()); @@ -175,11 +173,11 @@ void AddTransNodeAttr(const std::string &node_type, const GeTensorDesc &input, c // For format transfer node, the IR definition has src/dst format attrs if (node_type == TRANSDATA) { GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_SRC_FORMAT, TypeUtils::FormatToSerialString(input.GetFormat())), - GELOGW("SetStr FORMAT_TRANSFER_SRC_FORMAT failed");) + !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_SRC_FORMAT, TypeUtils::FormatToSerialString(input.GetFormat())), + GELOGW("SetStr FORMAT_TRANSFER_SRC_FORMAT failed");) GE_IF_BOOL_EXEC( - !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_DST_FORMAT, TypeUtils::FormatToSerialString(output.GetFormat())), - GELOGW("SetStr FORMAT_TRANSFER_DST_FORMAT failed");) + !AttrUtils::SetStr(op_desc, FORMAT_TRANSFER_DST_FORMAT, TypeUtils::FormatToSerialString(output.GetFormat())), + GELOGW("SetStr FORMAT_TRANSFER_DST_FORMAT failed");) } // For TransposeD node, the IR definition has perm attrs @@ -230,8 +228,8 @@ NodePtr CreateTransNode(const std::string &name, const std::string &node_type, c // for data dump GE_IF_BOOL_EXEC( - !AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), - GELOGW("CreateTransNode: SetListStr failed");) + !AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, std::move(std::vector())), + GELOGW("CreateTransNode: SetListStr failed");) // Default single input and single output auto ret = op_desc->AddInputDesc(input); @@ -556,11 +554,12 @@ Status ModifyDataNetOutputFormatAndShape(OpDescPtr &op_desc, uint32_t index, For ge::TensorUtils::SetSize(*input, size); ge::TensorUtils::SetSize(*output, size); - GELOGI("Modify Data NetOutput format and shape success, node:%s, index:%d, old_shape:%s, old_Format:%s, " - "new_shape:%s, new_format:%s, new_size:%lu", - op_desc->GetName().c_str(), index, formats::JoinToString(old_shape).c_str(), - ge::TypeUtils::FormatToSerialString(old_format).c_str(), formats::JoinToString(dst_shape_dims).c_str(), - ge::TypeUtils::FormatToSerialString(storage_format).c_str(), size); + GELOGI( + "Modify Data NetOutput format and shape success, node:%s, index:%d, old_shape:%s, old_Format:%s, " + "new_shape:%s, new_format:%s, new_size:%lu", + op_desc->GetName().c_str(), index, formats::JoinToString(old_shape).c_str(), + ge::TypeUtils::FormatToSerialString(old_format).c_str(), formats::JoinToString(dst_shape_dims).c_str(), + ge::TypeUtils::FormatToSerialString(storage_format).c_str(), size); } return SUCCESS; @@ -743,8 +742,8 @@ Status ProcessDataNodeDynShape(NodePtr &node_ptr) { return SUCCESS; } -Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tensor_desc_ptr, - Format &storage_format, vector &dst_shape_dims) { +Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tensor_desc_ptr, Format &storage_format, + vector &dst_shape_dims) { GE_CHECK_NOTNULL(op_desc); GE_CHECK_NOTNULL(tensor_desc_ptr); @@ -762,7 +761,8 @@ Status GetStorageFormatAndShape(OpDescPtr &op_desc, const GeTensorDescPtr &tenso op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str(), formats::JoinToString(storage_shape).c_str()); } else { - GELOGE(PARAM_INVALID, "Update node by storage format failed, storage_shape not set. " + GELOGE(PARAM_INVALID, + "Update node by storage format failed, storage_shape not set. " "node: [%s], storage_format [%s]", op_desc->GetName().c_str(), TypeUtils::FormatToSerialString(storage_format).c_str()); return FAILED; @@ -1066,7 +1066,7 @@ Status GraphPrepare::CheckRefOp() { GELOGE(PARAM_INVALID, "CheckRefInputNode failed."); return PARAM_INVALID; } - (void)ref_nodes.insert(node); // no need to check value + (void)ref_nodes.insert(node); // no need to check value } } } @@ -1099,8 +1099,8 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { int64_t tensor_size = 0; graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(output, tensor_size); if (graph_status != GRAPH_SUCCESS) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E19012", {"function", "reason"}, {"GetTensorMemorySizeInBytes", "opname is " + node->GetName()}); + ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, + {"GetTensorMemorySizeInBytes", "opname is " + node->GetName()}); GELOGE(graph_status, "GetTensorMemorySizeInBytes failed!"); return FAILED; } @@ -1627,7 +1627,6 @@ Status GraphPrepare::PrepareOptimize() { try { (void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); (void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); - (void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass", new MarkAgnosticPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index ef0f3ed3..7c6e4013 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,10 +45,8 @@ class GraphPrepare { virtual ~GraphPrepare(); GraphPrepare(const GraphPrepare &in) = delete; GraphPrepare &operator=(const GraphPrepare &in) = delete; - Status PrepareDynShape(ConstGraphPtr graph, - const std::vector &user_input, - ge::ComputeGraphPtr &compute_graph, - uint64_t session_id = 0); + Status PrepareDynShape(ConstGraphPtr graph, const std::vector &user_input, + ge::ComputeGraphPtr &compute_graph, uint64_t session_id = 0); Status RecordAIPPInfo(ge::ComputeGraphPtr &compute_graph); Status PrepareRunningFormatRefiner(); void SetOptions(const GraphManagerOptions &options); @@ -58,8 +56,7 @@ class GraphPrepare { private: Status Init(const ge::Graph &graph, uint64_t session_id = 0); Status CheckGraph(); - Status CheckRefInputNode(const NodePtr &node, const std::string &input_name, - const std::set &ref_nodes); + Status CheckRefInputNode(const NodePtr &node, const std::string &input_name, const std::set &ref_nodes); Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); diff --git a/ge/graph/preprocess/insert_op/base_insert_op.h b/ge/graph/preprocess/insert_op/base_insert_op.h index b0d7a7a6..355aaae6 100644 --- a/ge/graph/preprocess/insert_op/base_insert_op.h +++ b/ge/graph/preprocess/insert_op/base_insert_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,8 +51,7 @@ class InsertOpBase { /// @param [in] graph /// @param [in] aippConfigPath aipp /// - virtual Status InsertAippToGraph(ge::ComputeGraphPtr &graph, std::string &aippConfigPath, - const uint32_t index) = 0; + virtual Status InsertAippToGraph(ge::ComputeGraphPtr &graph, std::string &aippConfigPath, const uint32_t index) = 0; /// /// @ingroup ge_omg diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index b8c51ad1..545fe66f 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -124,11 +124,12 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat return PARAM_INVALID; } } - string errormsg = "its shape size must be in range[3,4] which dynamic aipp is linked, " - "maybe this input is not suitable for dynamic aipp"; - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {data_node->GetName() + " shape size", - to_string(shape.size()), errormsg}); + string errormsg = + "its shape size must be in range[3,4] which dynamic aipp is linked, " + "maybe this input is not suitable for dynamic aipp"; + ErrorManager::GetInstance().ATCReportErrMessage( + "E10001", {"parameter", "value", "reason"}, + {data_node->GetName() + " shape size", to_string(shape.size()), errormsg}); GELOGE(PARAM_INVALID, "The shape size of this node [%s] which linked dynamic aipp must be in range[3, 4], but is %zu", data_node->GetName().c_str(), shape.size()); return PARAM_INVALID; @@ -183,11 +184,6 @@ Status AippOp::InsertAippToGraph(ComputeGraphPtr &graph, std::string &aippConfig GE_CHECK_NOTNULL(graph); NodePtr target_input = nullptr; std::vector> target_edges; - - if (this->ConvertRelatedInputNameToRank() != SUCCESS) { - GELOGE(FAILED, "AippOp: convert related input name to rank failed."); - return FAILED; - } GE_CHK_STATUS_RET(this->GetTargetPosition(graph, target_input, target_edges), "Get data nodes position failed"); std::map out_anchors_to_aipp; @@ -228,8 +224,8 @@ Status AippOp::InsertAippToGraph(ComputeGraphPtr &graph, std::string &aippConfig return SUCCESS; } -NodePtr AippOp::CreateAipp(const OutDataAnchorPtr &out_anchor, - const std::string &aippConfigPath, const uint32_t &index) { +NodePtr AippOp::CreateAipp(const OutDataAnchorPtr &out_anchor, const std::string &aippConfigPath, + const uint32_t &index) { const auto &node = out_anchor->GetOwnerNode(); std::string current_name = node->GetName() + "_" + std::to_string(out_anchor->GetIdx()) + "_huawei_aipp"; auto aipp_opdesc_ptr = MakeShared(current_name, AIPP); @@ -271,18 +267,18 @@ NodePtr AippOp::CreateAipp(const OutDataAnchorPtr &out_anchor, Status AippOp::AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index) { GeAttrValue::NAMED_ATTRS aipp_attr; ConvertParamToAttr(aipp_attr); - GE_CHK_BOOL_RET_STATUS(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), - INTERNAL_ERROR, "Set name attrs for aipp node failed"); + GE_CHK_BOOL_RET_STATUS(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), INTERNAL_ERROR, + "Set name attrs for aipp node failed"); - GE_CHK_BOOL_RET_STATUS(AttrUtils::SetStr(op_desc, kAippConfigPath, aipp_cfg_path), - INTERNAL_ERROR, "Set config file path attr for aipp node failed"); + GE_CHK_BOOL_RET_STATUS(AttrUtils::SetStr(op_desc, kAippConfigPath, aipp_cfg_path), INTERNAL_ERROR, + "Set config file path attr for aipp node failed"); std::vector empty_names; GE_CHK_BOOL_RET_STATUS(AttrUtils::SetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, empty_names), INTERNAL_ERROR, "Set ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES attr for aipp node failed"); - GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), - INTERNAL_ERROR, "Set kCurrentAippIndex attr for aipp node failed"); + GE_CHK_BOOL_RET_STATUS(AttrUtils::SetInt(op_desc, kCurrentAippIndex, index), INTERNAL_ERROR, + "Set kCurrentAippIndex attr for aipp node failed"); // add input/output desc GeTensorDesc tensor; GE_CHK_GRAPH_STATUS_RET(op_desc->AddInputDesc("images", tensor), "Failed to add input images for aipp node"); @@ -393,9 +389,10 @@ Status AippOp::GetStaticTargetNode(const ComputeGraphPtr &graph, NodePtr &data_n return INTERNAL_ERROR; } target = switchn; - GELOGI("Multi-batch/image size and static aipp for data %s, " - "the aipp node will be insert after %s instead of origin data node", - data_node->GetName().c_str(), switchn->GetName().c_str()); + GELOGI( + "Multi-batch/image size and static aipp for data %s, " + "the aipp node will be insert after %s instead of origin data node", + data_node->GetName().c_str(), switchn->GetName().c_str()); return SUCCESS; } @@ -415,38 +412,6 @@ Status AippOp::GetStaticTargetNode(const ComputeGraphPtr &graph, NodePtr &data_n return SUCCESS; } -Status AippOp::ConvertRelatedInputNameToRank() { - GE_CHECK_NOTNULL(aipp_params_); - - string related_input_name = aipp_params_->related_input_name(); - if(related_input_name.empty()) { - return SUCCESS; - } - - std::vector data_top_names = domi::GetContext().data_top_names; - GELOGI("Convert name to rank start: data size[%zu]", data_top_names.size()); - uint32_t index = 0; - bool convert_flag = false; - for (const auto &data_top_name : data_top_names) { - if (related_input_name == data_top_name) { - aipp_params_->set_related_input_rank(index); - convert_flag = true; - GELOGI("AippOp: rank: %u, top name: %s.", index, data_top_name.c_str()); - break; - } - index++; - } - if (!convert_flag) { - string error_msg = "Top name " + related_input_name + "convert rank failed, Please" - " ensure top name in aipp config is the top name of data node."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, "Top name[%s] converts rank failed.", related_input_name.c_str()); - return PARAM_INVALID; - } - - return SUCCESS; -} - Status AippOp::GetTargetPosition(ComputeGraphPtr graph, NodePtr &target_input, std::vector> &target_edges) { @@ -590,8 +555,8 @@ Status AippOp::ValidateParams() { const domi::AippOpParams::AippMode aipp_mode = aipp_params_->aipp_mode(); if (aipp_mode == domi::AippOpParams::dynamic) { AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG( - aipp_params_->max_src_image_size() > 0, PARAM_INVALID, - "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); + aipp_params_->max_src_image_size() > 0, PARAM_INVALID, + "For dynamic AIPP params, max_src_image_size must be set which number should be greater than 0"); } else { AIPP_RETURN_STATUS_AND_REPROT_ERRORMSG(aipp_params_->input_format() != domi::AippOpParams::UNDEFINED, PARAM_INVALID, "Input format of AIPP conf is undefined"); @@ -832,18 +797,15 @@ Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp GeTensorDesc input_tensor(input_shape, FORMAT_ND, DT_UINT8); TensorUtils::SetReuseInput(input_tensor, false); TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); - GE_CHECK_NOTNULL(aipp_node); + const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); string node_name; - // First aippdata name should be definite. - if (graph->FindFirstNodeMatchType(AIPPDATA) == nullptr) { - GELOGI("Current graph has no aippdata node, so the name of it must be definite."); + if (index == 0) { node_name = kDynamicAippData; } else { - node_name = string(kDynamicAippData) + "_" + aipp_node->GetName(); + node_name = string(kDynamicAippData) + "_" + to_string(index); } - GELOGI("Current add aippdata node name is %s", node_name.c_str()); - + ++index; // new add aipp_data ops for dynamic aipp param input OpDescPtr op_desc_ptr_data = MakeShared(node_name, AIPPDATA); GE_CHECK_NOTNULL(op_desc_ptr_data); diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.h b/ge/graph/preprocess/insert_op/ge_aipp_op.h index 5e509dda..64c89b62 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.h +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,6 @@ class AippOp : public InsertOpBase { Status ValidateParams() override; protected: - /// /// @ingroup domi_omg /// @brief Generate Op Desc @@ -61,15 +60,13 @@ class AippOp : public InsertOpBase { Status GetTargetPosition(ge::ComputeGraphPtr graph, ge::NodePtr &target_input, std::vector> &target_edges) override; - Status InsertAippToGraph(ge::ComputeGraphPtr &graph, - std::string &aippConfigPath, - const uint32_t index) override ; + Status InsertAippToGraph(ge::ComputeGraphPtr &graph, std::string &aippConfigPath, const uint32_t index) override; domi::AippOpParams::AippMode GetAippMode() override; private: - AippOp& operator=(const AippOp& aipp_op); - AippOp(const AippOp& aipp_op); + AippOp &operator=(const AippOp &aipp_op); + AippOp(const AippOp &aipp_op); void ConvertParamToAttr(ge::GeAttrValue::NAMED_ATTRS &aipp_attrs); void SetCscDefaultValue(); @@ -82,7 +79,6 @@ class AippOp : public InsertOpBase { Status AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size); Status AddAippAttrbutes(const OpDescPtr &op_desc, const std::string &aipp_cfg_path, const uint32_t &index); Status AddAttrToAippData(const OpDescPtr &aipp_data_op_desc); - Status ConvertRelatedInputNameToRank(); domi::AippOpParams *aipp_params_ = nullptr; ge::NodePtr aipp_node_ = nullptr; @@ -91,4 +87,3 @@ class AippOp : public InsertOpBase { } // namespace ge #endif // GE_GRAPH_PREPROCESS_INSERT_OP_GE_AIPP_OP_H_ - diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 7f4dfbe8..83a16e75 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,6 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" -#include "util_insert_aipp_op.h" using domi::AippOpParams; @@ -116,95 +115,25 @@ void InsertNewOpUtil::ClearNewOps() { } } -Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() { - for (int i = 0; i < insert_op_conf_->aipp_op_size(); i++) { - const domi::AippOpParams *item = insert_op_conf_->mutable_aipp_op(i); - GE_CHECK_NOTNULL(item); - - for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { - const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); - GE_CHECK_NOTNULL(another_item); - if (another_item->related_input_name().empty()) { - string error_msg = "Can not both set related_input_name and related_input_rank!" - " Please ensure param is the same with the first aipp config(related_input_name)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_name)."); - return PARAM_INVALID; - } - if (item->related_input_name() == another_item->related_input_name()) { - string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name" - " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); - return PARAM_INVALID; - } - } - } - - return SUCCESS; -} - -Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() { +Status InsertNewOpUtil::CheckPositionNotRepeat() { for (int i = 0; i < insert_op_conf_->aipp_op_size(); i++) { const domi::AippOpParams *item = insert_op_conf_->mutable_aipp_op(i); - GE_CHECK_NOTNULL(item); for (int j = i + 1; j < insert_op_conf_->aipp_op_size(); j++) { const domi::AippOpParams *another_item = insert_op_conf_->mutable_aipp_op(j); - GE_CHECK_NOTNULL(another_item); - if (!another_item->related_input_name().empty()) { - string error_msg = "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_rank)."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not both set related_input_rank and related_input_name!" - " Please ensure param is the same with the first aipp config(related_input_rank)."); - return PARAM_INVALID; - } - if (item->related_input_rank() == another_item->related_input_rank()) { - string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank" - " param is different in different aipp config."; - ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error_msg}); - GELOGE(PARAM_INVALID, - "Can not insert aipp op to the same postion! Please ensure related_input_rank param " - "is different in different aipp config."); - return PARAM_INVALID; - } + GE_IF_BOOL_EXEC(item->related_input_rank() == another_item->related_input_rank(), + string errormsg = + "Can not insert aipp to the same postion! Please ensure related_input_rank" + " param is different in different aipp config."; + ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {errormsg}); + GELOGE(PARAM_INVALID, + "Can not insert aipp op to the same postion! Please ensure related_input_rank param " + "is different in different aipp config."); + return PARAM_INVALID;); } } return SUCCESS; - -} - -Status InsertNewOpUtil::CheckPositionNotRepeat() { - GE_CHECK_NOTNULL(insert_op_conf_); - - if (insert_op_conf_->aipp_op_size() <= 1) { - GELOGI("Aipp op size[%d] less than 2, no need to check position repeat.", insert_op_conf_->aipp_op_size()); - return SUCCESS; - } - - const domi::AippOpParams *item = insert_op_conf_->mutable_aipp_op(0); - GE_CHECK_NOTNULL(item); - - string related_input_name = item->related_input_name(); - Status ret = FAILED; - if (related_input_name.empty()) { - ret = CheckInputRankPositionNoRepeat(); - } else { - ret = CheckInputNamePositionNotRepeat(); - } - if (ret != SUCCESS) { - GELOGE(FAILED, "Check position not repeat failed."); - return FAILED; - } - - return SUCCESS; } Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { @@ -235,24 +164,23 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) { GE_CHECK_NOTNULL(aippParams); GE_IF_BOOL_EXEC( - aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { - std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); - GE_CHECK_NOTNULL(currAippParam); - GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); - - if (aippMode == domi::AippOpParams::static_) { - GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, - "The input_format of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, - "The src_image_size_w of all aipp_ops after one Data should be the same"); - GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, - "The src_image_size_h of all aipp_ops after one Data should be the same"); - } else { - GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), - PARAM_INVALID, - "The max_src_image_size of all aipp_ops after one Data should be the same"); - } - }); + aippNodes.size() > 1, for (decltype(aippNodes)::size_type i = 1; i < aippNodes.size(); i++) { + std::unique_ptr currAippParam(new (std::nothrow) domi::AippOpParams()); + GE_CHECK_NOTNULL(currAippParam); + GE_CHK_STATUS(GetAippParams(currAippParam, aippNodes[i])); + + if (aippMode == domi::AippOpParams::static_) { + GE_CHK_BOOL_RET_STATUS(aippParams->input_format() == currAippParam->input_format(), PARAM_INVALID, + "The input_format of all aipp_ops after one Data should be the same"); + GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_w() == currAippParam->src_image_size_w(), PARAM_INVALID, + "The src_image_size_w of all aipp_ops after one Data should be the same"); + GE_CHK_BOOL_RET_STATUS(aippParams->src_image_size_h() == currAippParam->src_image_size_h(), PARAM_INVALID, + "The src_image_size_h of all aipp_ops after one Data should be the same"); + } else { + GE_CHK_BOOL_RET_STATUS(aippParams->max_src_image_size() == currAippParam->max_src_image_size(), PARAM_INVALID, + "The max_src_image_size of all aipp_ops after one Data should be the same"); + } + }); } return SUCCESS; @@ -652,7 +580,7 @@ Status InsertNewOpUtil::GetAllAipps(const NodePtr &data_node, const NodePtr &nod Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) { GELOGI("Start to record aipp info to Data."); - std::map data_next_node_map; + std::map> data_next_node_map; for (auto &node : graph->GetDirectNode()) { if (node->GetType() == DATA) { GE_RETURN_IF_ERROR(GetDataRelatedNode(node, data_next_node_map)); diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/ge/graph/preprocess/insert_op/util_insert_aipp_op.h index 52e7ed5d..ae431c32 100644 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.h +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,10 +51,6 @@ class InsertNewOpUtil { Status GetAippParams(const std::unique_ptr &aippParams, const ge::NodePtr &aipp_node); - Status CheckInputNamePositionNotRepeat(); - - Status CheckInputRankPositionNoRepeat(); - Status CheckGraph(const ge::ComputeGraphPtr &graph); InsertNewOpUtil() = default; diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c0ba89f4..336527fb 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "graph/preprocess/multi_batch_copy_graph.h" #include @@ -54,7 +55,9 @@ const int kDataOutIndex = 0; const int kDataInIndex = 0; const int kMergeDataOutIndex = 0; const int kStaticOutput = -1; - +const int kDynmaicDims = -1; +const int kDynamicBatchDynamicDimsNum = 1; +const int kDynamicImgSizeDynamciDimsNum = 2; inline bool IsDataLikeType(const std::string &node_type) { return (node_type == DATA) || (node_type == AIPP); } @@ -110,9 +113,8 @@ NodePtr InsertCopyNode(const NodePtr &node, size_t n) { desc->CopyAttrsFrom(*src_op_desc); for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { auto input_desc = desc->MutableInputDesc(i); - GE_IF_BOOL_EXEC(input_desc == nullptr, - GELOGW("Get null input desc by index %u from node %s when copy from %s", i, - desc->GetName().c_str(), node->GetName().c_str()); + GE_IF_BOOL_EXEC(input_desc == nullptr, GELOGW("Get null input desc by index %u from node %s when copy from %s", i, + desc->GetName().c_str(), node->GetName().c_str()); continue); input_desc->CopyAttrsFrom(src_op_desc->GetInputDesc(i)); @@ -211,16 +213,16 @@ Status MultiBatchGraphCopyer::CopyGraph() { return ret; } + ret = CheckDataShape(origin_data_nodes_); + if (ret != SUCCESS) { + return ret; + } + if (LabelStatus() != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to label status for all nodes."); return INTERNAL_ERROR; } - ret = CheckAndParseDynamicData(); - if (ret != SUCCESS) { - return ret; - } - ret = CreateNewNodes(); if (ret != SUCCESS) { return ret; @@ -314,62 +316,6 @@ Status MultiBatchGraphCopyer::LabelStatus() { return SUCCESS; } -Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){ - size_t unknown_shape_count = 0; - auto data_name_and_shape = GetLocalOmgContext().user_input_dims; - GELOGD("raw data_name_and_shape size: %zu", data_name_and_shape.size()); - for (const auto &node : origin_all_nodes_) { - auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex); - auto data_shape = data_desc.GetShape(); - auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" : - data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others"; - - auto data_name = node->GetName(); - auto branch_status = GetNodeStatus(node); - if (branch_status != kNodeStartNode) { - continue; - } - if (IsAllDimsPositive(data_shape.GetDims())) { - continue; - } - ++unknown_shape_count; - auto iter = find(data_name_order_.begin(), data_name_order_.end(), data_name); - if (iter == data_name_order_.end()) { - if (dynamic_type_ == DynamicType::kDynamicBatch) { - auto ret = CheckDynamicBatchShape(data_shape.GetDims(), data_name); - if (!ret) { - return PARAM_INVALID; - } - } else if (dynamic_type_ == DynamicType::kDynamicImageSize) { - auto ret = CheckDynamicImageSizeShape(data_shape.GetDims(), data_name, data_format); - if (!ret) { - return PARAM_INVALID; - } - } else if (dynamic_type_ == DynamicType::kDynamicDims) { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", - {"parameter", "reason"}, - {"--input_shape", - "all dynamic data must be set in --input_shape"}); - GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", - node->GetName().c_str(), data_shape.ToString().c_str()); - return INTERNAL_ERROR; - } - data_name_and_shape.emplace_back(data_name, data_shape.GetDims()); - } - } - auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); - if (ret != SUCCESS){ - return ret; - } - if (unknown_shape_count == 0) { - ErrorManager::GetInstance().ATCReportErrMessage("E10040"); - GELOGE(PARAM_INVALID, - "Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims"); - return PARAM_INVALID; - } - return SUCCESS; -} - Status MultiBatchGraphCopyer::CreateNewNodes() { shape_data_ = InsertShapeDataNode(); if (shape_data_ == nullptr) { @@ -385,6 +331,10 @@ Status MultiBatchGraphCopyer::CreateNewNodes() { switch (branch_status) { case kNodeStartNode: GELOGD("Name: %s, type: %s, status: kNodeStartNode.", node->GetName().c_str(), node->GetType().c_str()); + ret = UpdateDataToDynamicInfo(node); + if (ret != SUCCESS) { + break; + } ret = InsertSwitchNForData(node); if (ret == SUCCESS) { ret = UpdateMaxShapeToData(node); @@ -702,6 +652,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); auto data_name = data->GetName(); (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); + if (IsAllDimsPositive(data_shape.GetDims())) { GELOGI("The shape of data %s are positive(%s), skip the multi batch process", data->GetName().c_str(), data_shape.ToString().c_str()); @@ -746,7 +697,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { GELOGE(INTERNAL_ERROR, "Failed to add attr value on output %zu tensor", i); return INTERNAL_ERROR; } - (void) AttrUtils::SetListInt(tensor, ATTR_NAME_COMBINED_DYNAMIC_DIMS, shape.GetDims()); + (void)AttrUtils::SetListInt(tensor, ATTR_NAME_COMBINED_DYNAMIC_DIMS, shape.GetDims()); if (switchn_desc->AddOutputDesc("output" + std::to_string(i), tensor) != GRAPH_SUCCESS) { GELOGE(GRAPH_FAILED, "Opdesc AddOutputDesc failed"); return GRAPH_FAILED; @@ -780,6 +731,57 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { data_nodes_to_switchn_[data.get()] = switchn; return SUCCESS; } +Status MultiBatchGraphCopyer::UpdateDataToDynamicInfo(const NodePtr &data) { + auto data_desc = NodeUtils::GetOutputDesc(*data, kDataOutIndex); + auto data_shape = data_desc.GetShape(); + auto data_format = data_desc.GetFormat(); + auto data_name = data->GetName(); + if (IsAllDimsPositive(data_shape.GetDims())) { + return SUCCESS; + } + if (data_to_dynamic_info_.find(data_name) == data_to_dynamic_info_.end()) { + auto data_shape_dims = data_shape.GetDims(); + auto dynamic_dims_num = std::count_if(data_shape_dims.begin(), data_shape_dims.end(), + [&data_shape_dims](int64_t dim) { return dim < 0; }); + if (dynamic_type_ == DynamicType::kDynamicBatch) { + if (dynamic_dims_num != kDynamicBatchDynamicDimsNum || data_shape.GetDim(0) != kDynmaicDims) { + GELOGE(INTERNAL_ERROR, "data: %s shape:%s do not satisfy dynamic batch rule", data->GetName().c_str(), + data_shape.ToString().c_str()); + return INTERNAL_ERROR; + } + } else if (dynamic_type_ == DynamicType::kDynamicImageSize) { + int64_t height = 0; + int64_t width = 0; + if (data_format == FORMAT_NCHW) { + height = data_shape.GetDim(NCHW_DIM_H); + width = data_shape.GetDim(NCHW_DIM_W); + } else if (data_format == FORMAT_NHWC) { + height = data_shape.GetDim(NHWC_DIM_H); + width = data_shape.GetDim(NHWC_DIM_W); + } + if (dynamic_dims_num != kDynamicImgSizeDynamciDimsNum || height != kDynmaicDims || width != kDynmaicDims) { + GELOGE(INTERNAL_ERROR, "data: %s shape:%s do not satisfy dynamic image size rule", data->GetName().c_str(), + data_shape.ToString().c_str()); + return INTERNAL_ERROR; + } + } else if (dynamic_type_ == DynamicType::kDynamicDims) { + GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", data->GetName().c_str(), + data_shape.ToString().c_str()); + return INTERNAL_ERROR; + } + // all data has dynamic dims are not in atc parameter --input_shape + if (data_to_dynamic_info_.empty()) { + vector>> tmp_data_name_and_shape{std::make_pair(data_name, data_shape_dims)}; + auto ret = ParserDataToDynmaicInfo(shapes_, tmp_data_name_and_shape, data_to_dynamic_info_); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "parse data : %s dynamic gear info failed", data_name.c_str()); + return INTERNAL_ERROR; + } + } + data_to_dynamic_info_[data_name] = data_to_dynamic_info_.begin()->second; + } + return SUCCESS; +} Status MultiBatchGraphCopyer::InsertMergeForEdgeNode(const NodePtr &node) { for (auto &in_data_anchor : node->GetAllInDataAnchors()) { auto src_out_anchor = in_data_anchor->GetPeerOutAnchor(); @@ -852,7 +854,7 @@ Status MultiBatchGraphCopyer::LinkEdges() { Status MultiBatchGraphCopyer::LinkDataToSwitchN(const NodePtr &data) { auto switchn = data_nodes_to_switchn_[data.get()]; auto ret = - GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), switchn->GetInDataAnchor(kSwitchNPredIndex)); + GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), switchn->GetInDataAnchor(kSwitchNPredIndex)); GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link shape data %s to switchn %s", shape_data_->GetName().c_str(), switchn->GetName().c_str()); return INTERNAL_ERROR); @@ -1030,11 +1032,18 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { GELOGD("There is no multi-batch options, no need to process multi-batch copy"); return SUCCESS; } + map>> data_to_dynamic_info; + // parser data dynamic info from atc parameter --input_shape + if (ParserDataToDynmaicInfo(shapes, GetLocalOmgContext().user_input_dims, data_to_dynamic_info) != SUCCESS) { + GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); + return PARAM_INVALID; + } DynamicType dynamic_type = DynamicType::kDynamicUnknown; if (!GetLocalOmgContext().dynamic_batch_size.empty()) { dynamic_type = DynamicType::kDynamicBatch; } else if (!GetLocalOmgContext().dynamic_image_size.empty()) { - dynamic_type = DynamicType::kDynamicImageSize;; + dynamic_type = DynamicType::kDynamicImageSize; + ; } else if (!GetLocalOmgContext().dynamic_dims.empty()) { dynamic_type = DynamicType::kDynamicDims; } @@ -1048,6 +1057,7 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { } copyer.SetDynamicType(dynamic_type); copyer.SetUserDesignateShape(user_designate_shape); + copyer.SetDataToDynamicInfo(data_to_dynamic_info); return copyer.CopyGraph(); } @@ -1067,8 +1077,8 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { // +-----------+ / // | Data | --------------->/ // +-----------+ -void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, - set &dynamic_output_index, vector &dynamic_output_dims) { +void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, set &dynamic_output_index, + vector &dynamic_output_dims) { GELOGD("Try get dynamic shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); const auto &func_desc = node->GetOpDesc(); if (!func_desc->HasAttr(ATTR_NAME_BATCH_NUM)) { @@ -1129,8 +1139,8 @@ void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, // +-----------+ /. // | Data | --------------------------------------------------------------------------->/. j = 1 // +-----------+ -void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, - set &dynamic_output_index, vector &dynamic_output_dims) { +void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, set &dynamic_output_index, + vector &dynamic_output_dims) { GELOGD("Try get dynamic shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); const auto &netoutput_desc = node->GetOpDesc(); const auto &inputnode_to_netoutput = node->GetInAllNodes(); @@ -1154,8 +1164,8 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, } // Connect NetOutput directly -void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, - const set &dynamic_output_index, vector &dynamic_output_dims) { +void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, const set &dynamic_output_index, + vector &dynamic_output_dims) { GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); const auto &netoutput_desc = node->GetOpDesc(); const auto &inputnode_to_netoutput = node->GetInAllNodes(); diff --git a/ge/graph/preprocess/multi_batch_copy_graph.h b/ge/graph/preprocess/multi_batch_copy_graph.h index f8aa6ab4..062b98d2 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/ge/graph/preprocess/multi_batch_copy_graph.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ #define GE_GRAPH_PREPROCESS_MULTI_BATCH_COPY_GRAPH_H_ #include @@ -58,9 +59,7 @@ class MultiBatchGraphCopyer { void SetDataToDynamicInfo(const map>> &designate_shape) { data_to_dynamic_info_ = designate_shape; } - void SetDynamicType(const DynamicType dynamic_type) { - dynamic_type_ = dynamic_type; - } + void SetDynamicType(const DynamicType dynamic_type) { dynamic_type_ = dynamic_type; } Status CopyGraph(); private: @@ -103,7 +102,7 @@ class MultiBatchGraphCopyer { Status LinkNodeToMerge(const NodePtr &node, int out_index, const NodePtr &merge); Status CopyInDataEdges(const NodePtr &origin_node, int batch_num, const NodePtr ©ed_node); Status CopyInControlEdges(const NodePtr &node, int batch_num, const NodePtr ©ed_node); - Status CheckAndParseDynamicData(); + Status UpdateDataToDynamicInfo(const NodePtr &node); bool IsInBatchBranch(const NodePtr &node); NodeStatus GetNodeStatus(const NodePtr &node) { return origin_nodes_status_[node.get()]; }; Status CheckCopyResult(const std::vector &start_nodes); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 9909b0dc..005240ca 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,18 +26,12 @@ #include "graph/utils/node_utils.h" #include "graph/ge_context.h" #include "graph/common/local_context.h" -#include "framework/common/types.h" namespace ge { namespace multibatch { constexpr int kDecimal = 10; constexpr uint8_t kMaxShapesCount = 100; constexpr uint8_t kMinShapesCount = 2; -const int kDynmaicDims = -1; -const int kDynamicBatchDynamicDimsNum = 1; -const int kDynamicImgSizeDynamciDimsNum = 2; -const size_t kMaxNDDimNum = 4; -const size_t kMinNDDimNum = 1; void ParseDynamicSize(string dynamic_size, vector> &shapes) { std::vector shape_strs = ge::StringUtils::Split(dynamic_size, ';'); @@ -107,15 +101,15 @@ bool InitDynamicParams(vector> &shapes) { /// Status ParserDataToDynmaicInfo(const vector> &shapes, vector>> &data_name_and_shape, - map> > &data_to_dynamic_info) { + map>> &data_to_dynamic_info) { size_t cur_data_index = 0; for (size_t index = 0; index < data_name_and_shape.size(); ++index) { auto &cur_item = data_name_and_shape[index]; auto &data_name = cur_item.first; auto &data_shape = cur_item.second; - auto dynamic_dims_num = std::count_if(data_shape.begin(), data_shape.end(), - [&data_shape](int64_t dim){ return dim < 0; }); - vector > dynamic_info; + auto dynamic_dims_num = + std::count_if(data_shape.begin(), data_shape.end(), [&data_shape](int64_t dim) { return dim < 0; }); + vector> dynamic_info; for (auto &dynamic_gear_info : shapes) { vector one_gear; if (dynamic_gear_info.size() == static_cast(dynamic_dims_num)) { @@ -143,7 +137,6 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, return SUCCESS; } - /// /// @ingroup ge /// @brief Check Dynamic Param is invalid. @@ -153,7 +146,7 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, Status CheckDynamicParams(const vector> &shapes) { if (shapes.size() < kMinShapesCount) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes.size()), std::to_string(kMinShapesCount - 1)}); + "E10035", {"shapesize", "minshapesize"}, {std::to_string(shapes.size()), std::to_string(kMinShapesCount - 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " "value size [%zu] must be greater than [%zu].", @@ -162,7 +155,7 @@ Status CheckDynamicParams(const vector> &shapes) { } if (shapes.size() > kMaxShapesCount) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes.size()), std::to_string(kMaxShapesCount + 1)}); + "E10036", {"shapesize", "maxshapesize"}, {std::to_string(shapes.size()), std::to_string(kMaxShapesCount + 1)}); GELOGE(PARAM_INVALID, "Input parameter[--dynamic_batch_size, --dynamic_image_size or --dynamic_dims]'s " "value size [%zu] must be less than [%zu].", @@ -212,9 +205,9 @@ Status CalcShape(const std::vector &batch_shape, GeShape &data_shape) { if (data_shape.GetDim(i) < 0) { if (batch_shape_index >= batch_shape.size()) { ErrorManager::GetInstance().ATCReportErrMessage( - "E19012", {"function", "reason"}, - {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + - " does not match the data shape " + data_shape.ToString()}); + "E19012", {"function", "reason"}, + {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + + " does not match the data shape " + data_shape.ToString()}); GELOGE(PARAM_INVALID, "Failed to calc tensor shape, the batch shape count %zu, does not match the data shape %s", batch_shape.size(), data_shape.ToString().c_str()); @@ -225,8 +218,9 @@ Status CalcShape(const std::vector &batch_shape, GeShape &data_shape) { } if (batch_shape_index != batch_shape.size()) { ErrorManager::GetInstance().ATCReportErrMessage( - "E19012", {"function", "reason"}, {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + - " does not match the data shape " + data_shape.ToString()}); + "E19012", {"function", "reason"}, + {"CalcShape", "the batch shape count " + std::to_string(batch_shape.size()) + " does not match the data shape " + + data_shape.ToString()}); GELOGE(PARAM_INVALID, "Failed to calc tensor shape, the batch shape count %zu, does not match the data shape %s", batch_shape.size(), data_shape.ToString().c_str()); return PARAM_INVALID; @@ -258,62 +252,5 @@ Status StampDynamicType(const OpDescPtr &op_desc) { } return SUCCESS; } - -/// -/// @ingroup ge -/// @brief Check dynamic batch Shape. -/// @param [in] const vector &shape: data_shape to be checked. -/// @param [in] const string &data_name: cur data name. -/// @return 0: true/false -/// -bool CheckDynamicBatchShape(const vector &shape, const string &data_name) { - if (shape[0] == kDynmaicDims) { - for (size_t i = 1; i < shape.size(); ++i) { - if (shape[i] < 1) { - ErrorManager::GetInstance().ATCReportErrMessage("E10018", {"index", "shape"}, - {std::to_string(i), std::to_string(shape[i])}); - GELOGE(ge::PARAM_INVALID, - "Only batch N can be -1 when set --dynamic_batch_size, current data: %s shape[%zu] is %ld", - data_name.c_str(), i, shape[i]); - return false; - } - } - return true; - } else { - return false; - } -} - -/// -/// @ingroup ge -/// @brief Check Dynamic image size shape. -/// @param [in] unordered_map> &shape_map: map of data_name and data_shape. -/// @param [in] const std::string &input_format: format of input. -/// @return 0: true/false -/// -bool CheckDynamicImageSizeShape(const vector &shape, const string &data_name, - const std::string &input_format) { - int64_t height = 0; - int64_t width = 0; - if (input_format == "NCHW") { - height = shape[NCHW_DIM_H]; - width = shape[NCHW_DIM_W]; - } - - if (input_format == "NHWC") { - height = shape[NHWC_DIM_H]; - width = shape[NHWC_DIM_W]; - } - - if (height == kDynmaicDims && width == kDynmaicDims && - std::count(shape.begin(), shape.end(), kDynmaicDims) == kDynamicImgSizeDynamciDimsNum) { - return true; - } else { - ErrorManager::GetInstance().ATCReportErrMessage("E10019"); - GELOGE(ge::PARAM_INVALID, - "--input_shape's shape is invalid, only height and width can be -1 when set --dynamic_image_size."); - return false; - } -} } // namespace multibatch } // namespace ge diff --git a/ge/graph/preprocess/multi_batch_options.h b/ge/graph/preprocess/multi_batch_options.h index 8563f2f1..18f667ae 100644 --- a/ge/graph/preprocess/multi_batch_options.h +++ b/ge/graph/preprocess/multi_batch_options.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,28 +70,6 @@ Status ParserDataToDynmaicInfo(const vector> &shapes, /// @return 0: SUCCESS / others: INTERNAL_ERROR /// Status StampDynamicType(const OpDescPtr &op_desc); - -/// -/// @ingroup ge -/// @brief Check dynamic batch Shape. -/// @param [in] const vector &shape: data_shape to be checked. -/// @param [in] const string &data_name: cur data name. -/// @return 0: true/false -/// -bool CheckDynamicBatchShape(const vector &shape, const string &data_name); - -/// -/// @ingroup ge -/// @brief Check Dynamic image size shape. -/// @param [in] unordered_map> &shape_map: map of data_name and data_shape. -/// @param [in] const string &data_name: cur data name. -/// @param [in] const std::string &input_format: cur data format. -/// @param [in] const std::string &input_format: format of input. -/// @return 0: true/false -/// -bool CheckDynamicImageSizeShape(const vector &shape, const string &data_name, - const std::string &input_format); - } // namespace multibatch } // namespace ge -#endif // GE_GRAPH_PREPROCESS_MULTI_BATCH_OPTIONS_H_ +#endif // GE_GRAPH_PREPROCESS_MULTI_BATCH_OPTIONS_H_ diff --git a/ge/host_cpu_engine/common/constant/constant.h b/ge/host_cpu_engine/common/constant/constant.h index b9603b6a..a3cabdc4 100644 --- a/ge/host_cpu_engine/common/constant/constant.h +++ b/ge/host_cpu_engine/common/constant/constant.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.cc b/ge/host_cpu_engine/engine/host_cpu_engine.cc index cdbad1ed..648e13b1 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.cc +++ b/ge/host_cpu_engine/engine/host_cpu_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/engine/host_cpu_engine.h b/ge/host_cpu_engine/engine/host_cpu_engine.h index c8d5608f..ecafd98b 100644 --- a/ge/host_cpu_engine/engine/host_cpu_engine.h +++ b/ge/host_cpu_engine/engine/host_cpu_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/module.mk b/ge/host_cpu_engine/module.mk index 3c8e0cc9..e35c68c9 100644 --- a/ge/host_cpu_engine/module.mk +++ b/ge/host_cpu_engine/module.mk @@ -15,7 +15,6 @@ local_lib_inc_path := proto/task.proto \ ${TOPDIR}third_party/protobuf/include \ ${TOPDIR}inc/framework \ $(TOPDIR)framework/domi \ - $(TOPDIR)graphengine/ge \ #compiler for host include $(CLEAR_VARS) @@ -56,83 +55,3 @@ LOCAL_SRC_FILES := $(local_lib_src_files) LOCAL_C_INCLUDES := $(local_lib_inc_path) include ${BUILD_HOST_SHARED_LIBRARY} - -#compiler for host ops kernel builder -include $(CLEAR_VARS) -LOCAL_MODULE := libhost_cpu_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ - libc_sec \ - libslog \ - libgraph \ - libregister \ - -LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_HOST_SHARED_LIBRARY} - -#compiler for host static lib -include $(CLEAR_VARS) -LOCAL_MODULE := libhost_cpu_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := libprotobuf \ - libgraph \ - libregister \ - -LOCAL_SHARED_LIBRARIES := libc_sec \ - libslog \ - -LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_HOST_STATIC_LIBRARY} - -#compiler for device static lib -include $(CLEAR_VARS) -LOCAL_MODULE := libhost_cpu_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := libprotobuf \ - libgraph \ - libregister \ - -LOCAL_SHARED_LIBRARIES := libc_sec \ - libslog \ - -LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_STATIC_LIBRARY} - -#compiler for atc ops kernel builder -include $(CLEAR_VARS) -LOCAL_MODULE := atclib/libhost_cpu_opskernel_builder -LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -LOCAL_LDFLAGS := - -LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ - libc_sec \ - libslog \ - libgraph \ - libregister \ - -LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc - -LOCAL_C_INCLUDES := $(local_lib_inc_path) - -include ${BUILD_HOST_SHARED_LIBRARY} diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc deleted file mode 100644 index adb252bc..00000000 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.cc +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "host_cpu_ops_kernel_builder.h" -#include -#include "common/ge_inner_error_codes.h" -#include "ge/ge_api_types.h" -#include "graph/utils/node_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" -#include "framework/common/debug/ge_log.h" -#include "host_cpu_engine/common/constant/constant.h" -#include "register/ops_kernel_builder_registry.h" - -namespace ge { -namespace host_cpu { -REGISTER_OPS_KERNEL_BUILDER(kHostCpuOpKernelLibName, HostCpuOpsKernelBuilder); - -Status HostCpuOpsKernelBuilder::Finalize() { - return SUCCESS; -} -Status HostCpuOpsKernelBuilder::Initialize(const map &options) { - return SUCCESS; -} - -Status HostCpuOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { - OpDescPtr op_desc = ge_node.GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); - return FAILED; - } - - bool is_shape_unknown = false; - if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { - if (is_shape_unknown) { - GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); - return SUCCESS; - } - } - - const string name = ge_node.GetName(); - const string type = ge_node.GetType(); - GELOGD("Calc op[%s:%s] running param, output size=%zu.", name.c_str(), type.c_str(), op_desc->GetOutputsSize()); - - for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { - GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); - Format format = output_tensor.GetFormat(); - DataType data_type = output_tensor.GetDataType(); - - int64_t mem_size = 0; - // If mem size has been set, no need reset. - if ((TensorUtils::GetSize(output_tensor, mem_size) == GRAPH_SUCCESS) && (mem_size > 0)) { - GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", - name.c_str(), type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); - continue; - } - - int64_t output_mem_size = 0; - GeShape output_shape = output_tensor.GetShape(); - if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || - (output_mem_size < 0)) { - GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", - name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); - return FAILED; - } - GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", - name.c_str(), type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); - - TensorUtils::SetSize(output_tensor, output_mem_size); - if (op_desc->UpdateOutputDesc(static_cast(i), output_tensor) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, - TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); - return FAILED; - } - } - - GELOGD("Calc op[%s:%s] running param success.", name.c_str(), type.c_str()); - return SUCCESS; -} - -Status HostCpuOpsKernelBuilder::GenerateTask(const Node &node, RunContext &context, vector &tasks) { - // no need to generate device task - return SUCCESS; -} -} // namespace host_cpu -} // namespace ge \ No newline at end of file diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h deleted file mode 100644 index 82375b9f..00000000 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_builder.h +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ -#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ - -#include "common/opskernel/ops_kernel_builder.h" - -namespace ge { -namespace host_cpu { -class HostCpuOpsKernelBuilder : public OpsKernelBuilder { - public: - Status Initialize(const map &options) override; - - Status Finalize() override; - - Status CalcOpRunningParam(Node &node) override; - - Status GenerateTask(const Node &node, RunContext &context, std::vector &tasks) override; -}; -} // namespace host_cpu -} // namespace ge - -#endif // GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc index de20492c..4e7be2d5 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,11 +18,14 @@ #include #include "common/constant/constant.h" #include "ge/ge_api_types.h" +#include "common/ge/ge_util.h" +#include "common/ge_inner_error_codes.h" #include "framework/common/debug/ge_log.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "op/op_factory.h" +#include "proto/task.pb.h" namespace ge { namespace host_cpu { @@ -55,8 +58,71 @@ Status HostCpuOpsKernelInfoStore::Finalize() { return SUCCESS; } +Status HostCpuOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { + OpDescPtr op_desc = ge_node.GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(FAILED, "CalcOpRunningParam failed, as op desc is null"); + return FAILED; + } + + bool is_shape_unknown = false; + if (NodeUtils::GetNodeUnknownShapeStatus(ge_node, is_shape_unknown) == GRAPH_SUCCESS) { + if (is_shape_unknown) { + GELOGI("op:%s is unknown shape, does not need to calc output size.", ge_node.GetName().c_str()); + return SUCCESS; + } + } + + const string name = ge_node.GetName(); + const string type = ge_node.GetType(); + GELOGD("Calc op[%s:%s] running param, output size=%zu.", name.c_str(), type.c_str(), op_desc->GetOutputsSize()); + + for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { + GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); + Format format = output_tensor.GetFormat(); + DataType data_type = output_tensor.GetDataType(); + + int64_t mem_size = 0; + // If mem size has been set, no need reset. + if ((TensorUtils::GetSize(output_tensor, mem_size) == GRAPH_SUCCESS) && (mem_size > 0)) { + GELOGD("Op[%s:%s] out[%zu] mem size has been set, no need calc again, format=%s, data_type=%s, mem_size=%ld.", + name.c_str(), type.c_str(), i, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), mem_size); + continue; + } + + int64_t output_mem_size = 0; + GeShape output_shape = output_tensor.GetShape(); + if ((TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size) != GRAPH_SUCCESS) || + (output_mem_size < 0)) { + GELOGE(FAILED, "Calc op[%s:%s] out[%zu] mem size failed, mem_size=%ld, format=%s, data_type=%s.", name.c_str(), + type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + GELOGI("Calc op[%s:%s] out[%zu] mem size is %ld, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, + output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); + + TensorUtils::SetSize(output_tensor, output_mem_size); + if (op_desc->UpdateOutputDesc(static_cast(i), output_tensor) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Update op[%s:%s] out[%zu] desc failed, format=%s, data_type=%s.", name.c_str(), type.c_str(), i, + TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + } + + GELOGD("Calc op[%s:%s] running param success.", name.c_str(), type.c_str()); + return SUCCESS; +} + void HostCpuOpsKernelInfoStore::GetAllOpsKernelInfo(map &infos) const { infos = op_info_map_; } +Status HostCpuOpsKernelInfoStore::GenerateTask(const Node &node, RunContext &context, vector &tasks) { + // no need to generate device task + return SUCCESS; +} + bool HostCpuOpsKernelInfoStore::CheckSupported(const OpDescPtr &op_desc, std::string &) const { if (op_desc == nullptr) { return false; diff --git a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h index f7539f8e..1202cc8a 100644 --- a/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h +++ b/ge/host_cpu_engine/ops_kernel_store/host_cpu_ops_kernel_info.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,6 +57,22 @@ class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { */ void GetAllOpsKernelInfo(std::map &infos) const override; + /** + * Calc the running size of Operator, + * then GE will alloc the mem size from runtime + * @param ge_node Node information + * @return status whether this operation success + */ + Status CalcOpRunningParam(ge::Node &ge_node) override; + + /** + * call the runtime's interface to generate the task + * @param node Node information + * @param context run context info + * @return status whether this operation success + */ + Status GenerateTask(const ge::Node &ge_node, ge::RunContext &context, std::vector &tasks) override; + HostCpuOpsKernelInfoStore(const HostCpuOpsKernelInfoStore &ops_kernel_store) = delete; HostCpuOpsKernelInfoStore(const HostCpuOpsKernelInfoStore &&ops_kernel_store) = delete; HostCpuOpsKernelInfoStore &operator=(const HostCpuOpsKernelInfoStore &ops_kernel_store) = delete; diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc index a6e00f4a..472fca45 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h index 0f560485..757b96a6 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/host_op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/host_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op.h b/ge/host_cpu_engine/ops_kernel_store/op/op.h index c094f080..c1e1619c 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc index 176ae579..efe44f80 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc +++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h index 3a235ffd..92f627fd 100644 --- a/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h +++ b/ge/host_cpu_engine/ops_kernel_store/op/op_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_cpu_engine/proto/task.proto b/ge/host_cpu_engine/proto/task.proto index d0c09840..36ae4847 100644 --- a/ge/host_cpu_engine/proto/task.proto +++ b/ge/host_cpu_engine/proto/task.proto @@ -1,165 +1 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} +../../proto/task.proto \ No newline at end of file diff --git a/ge/host_kernels/add_kernel.cc b/ge/host_kernels/add_kernel.cc index 1c206018..afef1c37 100644 --- a/ge/host_kernels/add_kernel.cc +++ b/ge/host_kernels/add_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -138,8 +138,7 @@ Status AddKernel::AddCheck(const OpDescPtr &op_desc_ptr, const std::vectorGetOutputsSize() != kAddOutputSize)) { - GELOGW("The number of input for add must be %zu, output number must be %zu.", kAddInputSize, - kAddOutputSize); + GELOGW("The number of input for add must be %zu, output number must be %zu.", kAddInputSize, kAddOutputSize); return PARAM_INVALID; } // input vector elements must not be null diff --git a/ge/host_kernels/add_kernel.h b/ge/host_kernels/add_kernel.h index 70800b66..f8fd272e 100755 --- a/ge/host_kernels/add_kernel.h +++ b/ge/host_kernels/add_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/broadcast_args_kernel.cc b/ge/host_kernels/broadcast_args_kernel.cc index d8880db9..545d4f8e 100644 --- a/ge/host_kernels/broadcast_args_kernel.cc +++ b/ge/host_kernels/broadcast_args_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,12 +41,13 @@ Status BroadcastArgsKernel::Compute(const OpDescPtr op_desc_ptr, const std::vect } // check input size bool size_check = - (op_desc_ptr->GetAllInputsDesc().size() != kBCastArgsInputsSize || input.size() != kBCastArgsInputsSize || - op_desc_ptr->GetAllOutputsDesc().size() != kBCastArgsOutputsSize); + (op_desc_ptr->GetAllInputsDesc().size() != kBCastArgsInputsSize || input.size() != kBCastArgsInputsSize || + op_desc_ptr->GetAllOutputsDesc().size() != kBCastArgsOutputsSize); if (size_check) { - GELOGW("input/output size error. InDesc size:%zu," - "OutDesc size:%zu, in size:%zu ", - op_desc_ptr->GetAllInputsDesc().size(), op_desc_ptr->GetAllOutputsDesc().size(), input.size()); + GELOGW( + "input/output size error. InDesc size:%zu," + "OutDesc size:%zu, in size:%zu ", + op_desc_ptr->GetAllInputsDesc().size(), op_desc_ptr->GetAllOutputsDesc().size(), input.size()); return NOT_CHANGED; } diff --git a/ge/host_kernels/broadcast_args_kernel.h b/ge/host_kernels/broadcast_args_kernel.h index eb9a46f4..6d57976c 100755 --- a/ge/host_kernels/broadcast_args_kernel.h +++ b/ge/host_kernels/broadcast_args_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/broadcast_gradient_args_kernel.cc b/ge/host_kernels/broadcast_gradient_args_kernel.cc index 51ff4a4c..ed790dab 100644 --- a/ge/host_kernels/broadcast_gradient_args_kernel.cc +++ b/ge/host_kernels/broadcast_gradient_args_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_kernels/broadcast_gradient_args_kernel.h" #include diff --git a/ge/host_kernels/broadcast_gradient_args_kernel.h b/ge/host_kernels/broadcast_gradient_args_kernel.h index 84764228..8f183653 100755 --- a/ge/host_kernels/broadcast_gradient_args_kernel.h +++ b/ge/host_kernels/broadcast_gradient_args_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/cast_kernel.cc b/ge/host_kernels/cast_kernel.cc index 056081a1..106aa1c2 100644 --- a/ge/host_kernels/cast_kernel.cc +++ b/ge/host_kernels/cast_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,22 +59,21 @@ Status CastKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetOutputDesc(0); GeTensorDesc op_desc_in = op_desc_ptr->GetInputDesc(0); auto src_data_type = op_desc_in.GetDataType(); - auto src_shape = op_desc_in.GetShape(); - auto src_format = op_desc_in.GetFormat(); - auto data_type = op_desc.GetDataType(); - auto data_shape = op_desc.GetShape(); + auto src_shape = op_desc_in.GetShape(); + auto src_format = op_desc_in.GetFormat(); + auto data_type = op_desc.GetDataType(); + auto data_shape = op_desc.GetShape(); auto data_format = op_desc.GetFormat(); - GELOGD("Current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " - "output format %s, shape %s, data type %s", op_desc_ptr->GetName().c_str(), - TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), - TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), - formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + GELOGD( + "Current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", + op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); // const_weight_ptr->GetData().GetSize() == 0 is supported auto src_data_size = src_shape.GetShapeSize(); @@ -86,17 +85,16 @@ Status CastKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector(src_data_size), src_data_type, data_type}; formats::TransResult trans_result; GELOGD("Trans data type from %s to %s, shape %s, data size %ld", - TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), + TypeUtils::DataTypeToSerialString(src_data_type).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str(), formats::ShapeToString(src_shape).c_str(), src_data_size); if ((src_format != data_format) || (src_shape.GetDims() != data_shape.GetDims()) || (!formats::IsTransDataTypeSupport(cast_args))) { GELOGW("Transfer from data type %s to %s, format %s to %s, shape %s to %s is not supported", TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str(), - TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(data_format).c_str(), - formats::ShapeToString(src_shape).c_str(), formats::ShapeToString(data_shape).c_str()); + TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(src_shape).c_str(), + formats::ShapeToString(data_shape).c_str()); return NOT_CHANGED; } if (!KernelUtils::CheckSizeForTransOp(const_weight_ptr, op_desc_ptr)) { @@ -106,8 +104,8 @@ Status CastKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector(input[i + kConcatOffsetInputIndexOne]->GetData().data()); + reinterpret_cast(input[i + kConcatOffsetInputIndexOne]->GetData().data()); int64_t input_dim = input_shape[concat_dim]; // this index is valid, checked before if (input_dim > (INT64_MAX - offset)) { GELOGE(PARAM_INVALID, " %d and %ld addition can result in overflow!.", offset, input_dim); diff --git a/ge/host_kernels/concat_offset_kernel.h b/ge/host_kernels/concat_offset_kernel.h index d2f9422b..b1e0958a 100755 --- a/ge/host_kernels/concat_offset_kernel.h +++ b/ge/host_kernels/concat_offset_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/concat_v2_kernel.cc b/ge/host_kernels/concat_v2_kernel.cc index a9f0da81..c46b4277 100644 --- a/ge/host_kernels/concat_v2_kernel.cc +++ b/ge/host_kernels/concat_v2_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,12 +21,12 @@ #include "common/debug/log.h" #include "common/fp16_t.h" +#include "common/ge_inner_error_codes.h" #include "common/op/ge_op_utils.h" #include "framework/common/debug/ge_log.h" #include "host_kernels/kernel_utils.h" #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" -#include "framework/common/types.h" namespace ge { namespace { @@ -116,8 +116,7 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector &input, - int &tidx, +Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &input, int &tidx, ConstGeTensorPtr &tensor) { size_t input_size = input.size(); // N >= 2 and N + 1 >= 3 @@ -138,7 +137,7 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i continue; } if (tensor == nullptr) { - tensor = input.at(i); // get first valid tensor with data + tensor = input.at(i); // get first valid tensor with data } } @@ -161,7 +160,7 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i GE_CHECK_NOTNULL(tensor_axis); const int *axis = reinterpret_cast(tensor_axis->GetData().data()); GE_CHECK_NOTNULL(axis); - tidx = axis[0]; // [-rank(values), rank(values)) + tidx = axis[0]; // [-rank(values), rank(values)) int rank = static_cast(tensor->GetTensorDesc().GetShape().GetDimNum()); // rank if (tidx < 0) { tidx += rank; @@ -170,8 +169,8 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector &i // 2. empty tensor only support case: [n],[m],[] // case: [[],[]] ,[[],[]] ,[] or other case when rank >=2 is not supported if (tidx < 0 || tidx >= rank || (has_empty_tensor && rank > kSupportEmptyTensorRank)) { - GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", - tidx, rank, has_empty_tensor); + GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", tidx, rank, + has_empty_tensor); return NOT_CHANGED; } diff --git a/ge/host_kernels/concat_v2_kernel.h b/ge/host_kernels/concat_v2_kernel.h index 90f1899b..353b7ed5 100755 --- a/ge/host_kernels/concat_v2_kernel.h +++ b/ge/host_kernels/concat_v2_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc index d26237f4..c1245535 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.cc +++ b/ge/host_kernels/dynamic_stitch_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -200,7 +200,7 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec dst_offset = input_indices[j] * data_unit; src_offset = j * data_unit; auto protected_size = - allowance < static_cast(SECUREC_MEM_MAX_LEN) ? allowance : static_cast(SECUREC_MEM_MAX_LEN); + allowance < static_cast(SECUREC_MEM_MAX_LEN) ? allowance : static_cast(SECUREC_MEM_MAX_LEN); auto ret = memcpy_s(buf.get() + dst_offset, protected_size, input_data + src_offset, data_unit); if (ret != EOK) { GELOGW("Memory copy failed."); diff --git a/ge/host_kernels/dynamic_stitch_kernel.h b/ge/host_kernels/dynamic_stitch_kernel.h index 2cca94e3..512c731b 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.h +++ b/ge/host_kernels/dynamic_stitch_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/empty_kernel.cc b/ge/host_kernels/empty_kernel.cc index 19e938ce..a5e5fbcf 100644 --- a/ge/host_kernels/empty_kernel.cc +++ b/ge/host_kernels/empty_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -43,8 +43,8 @@ Status EmptyKernel::EmptyCheck(const OpDescPtr &op_desc_ptr, const std::vectorGetAllInputsDesc().size() != kEmptyInputsSize) || (input.size() != kEmptyInputsSize) || - (op_desc_ptr->GetAllOutputsDesc().size() != kEmptyOutputsSize)); + ((op_desc_ptr->GetAllInputsDesc().size() != kEmptyInputsSize) || (input.size() != kEmptyInputsSize) || + (op_desc_ptr->GetAllOutputsDesc().size() != kEmptyOutputsSize)); if (size_check) { GELOGW("Input/Output size error. InDesc size:%zu, OutDesc size:%zu, in size:%zu ", op_desc_ptr->GetAllInputsDesc().size(), op_desc_ptr->GetAllOutputsDesc().size(), input.size()); @@ -58,8 +58,7 @@ Status EmptyKernel::EmptyCheck(const OpDescPtr &op_desc_ptr, const std::vectorGetTensorDesc().GetShape().GetDimNum() > kShapeMaxDims) { - GELOGW("Check if the dimension is 1-D failed, dims:%zu", - shape->GetTensorDesc().GetShape().GetDimNum()); + GELOGW("Check if the dimension is 1-D failed, dims:%zu", shape->GetTensorDesc().GetShape().GetDimNum()); return PARAM_INVALID; } return SUCCESS; diff --git a/ge/host_kernels/empty_kernel.h b/ge/host_kernels/empty_kernel.h index 7fd2791c..bc426048 100755 --- a/ge/host_kernels/empty_kernel.h +++ b/ge/host_kernels/empty_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/expanddims_kernel.cc b/ge/host_kernels/expanddims_kernel.cc index f304fbdb..15648573 100644 --- a/ge/host_kernels/expanddims_kernel.cc +++ b/ge/host_kernels/expanddims_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,8 +50,7 @@ Status ExpanddimsKernel::Compute(const NodePtr &node_ptr) { GELOGI("Expanddims dimension kernel success."); return SUCCESS; } -Status ExpanddimsKernel::Compute(const ge::OpDescPtr op_desc_ptr, - const std::vector &input, +Status ExpanddimsKernel::Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, std::vector &v_output) { GELOGI("Expanddims folding kernel in."); if (op_desc_ptr == nullptr) { diff --git a/ge/host_kernels/expanddims_kernel.h b/ge/host_kernels/expanddims_kernel.h index 77971a29..4970d89c 100755 --- a/ge/host_kernels/expanddims_kernel.h +++ b/ge/host_kernels/expanddims_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/fill_kernel.cc b/ge/host_kernels/fill_kernel.cc index 4e3d4db5..27bcb9aa 100644 --- a/ge/host_kernels/fill_kernel.cc +++ b/ge/host_kernels/fill_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,6 @@ #include "host_kernels/kernel_utils.h" #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" -#include "framework/common/types.h" namespace { const int kFillInputSize = 2; diff --git a/ge/host_kernels/fill_kernel.h b/ge/host_kernels/fill_kernel.h index 1a4546f2..a1b6b4ef 100755 --- a/ge/host_kernels/fill_kernel.h +++ b/ge/host_kernels/fill_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floordiv_kernel.cc b/ge/host_kernels/floordiv_kernel.cc index 0574ca3b..5114122c 100644 --- a/ge/host_kernels/floordiv_kernel.cc +++ b/ge/host_kernels/floordiv_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floordiv_kernel.h b/ge/host_kernels/floordiv_kernel.h index d3dc3ff7..c8505731 100755 --- a/ge/host_kernels/floordiv_kernel.h +++ b/ge/host_kernels/floordiv_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floormod_kernel.cc b/ge/host_kernels/floormod_kernel.cc index 31e4e19b..7ad746de 100644 --- a/ge/host_kernels/floormod_kernel.cc +++ b/ge/host_kernels/floormod_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/floormod_kernel.h b/ge/host_kernels/floormod_kernel.h index 439fc0a6..faa5c8e2 100755 --- a/ge/host_kernels/floormod_kernel.h +++ b/ge/host_kernels/floormod_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/gather_v2_kernel.cc b/ge/host_kernels/gather_v2_kernel.cc index e52b4534..7413395a 100644 --- a/ge/host_kernels/gather_v2_kernel.cc +++ b/ge/host_kernels/gather_v2_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/gather_v2_kernel.h b/ge/host_kernels/gather_v2_kernel.h index 17fcba59..0bf4e3ee 100755 --- a/ge/host_kernels/gather_v2_kernel.h +++ b/ge/host_kernels/gather_v2_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/greater_kernel.cc b/ge/host_kernels/greater_kernel.cc index a245ec8d..f23eee2f 100644 --- a/ge/host_kernels/greater_kernel.cc +++ b/ge/host_kernels/greater_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/greater_kernel.h b/ge/host_kernels/greater_kernel.h index 6f136462..3697a8e8 100755 --- a/ge/host_kernels/greater_kernel.h +++ b/ge/host_kernels/greater_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,8 +37,8 @@ class GreaterKernel : public Kernel { Status GreaterCheck(const std::vector &input); const std::set greater_supported_type = { - DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, - DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, + DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, + DT_INT32, DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE, }; }; } // namespace ge diff --git a/ge/host_kernels/identity_kernel.cc b/ge/host_kernels/identity_kernel.cc index 702f5c93..16bd3138 100644 --- a/ge/host_kernels/identity_kernel.cc +++ b/ge/host_kernels/identity_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,11 +12,10 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "identity_kernel.h" #include "inc/kernel_factory.h" -#include "framework/common/types.h" namespace { constexpr uint32_t kInputDescIndex = 0; diff --git a/ge/host_kernels/identity_kernel.h b/ge/host_kernels/identity_kernel.h index 84cd08bb..2164d880 100644 --- a/ge/host_kernels/identity_kernel.h +++ b/ge/host_kernels/identity_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_IDENTITY_KERNEL_H_ #define GE_GRAPH_PASSES_FOLDING_KERNEL_IDENTITY_KERNEL_H_ diff --git a/ge/host_kernels/kernel_utils.cc b/ge/host_kernels/kernel_utils.cc index 595f9517..9bcd9e3c 100755 --- a/ge/host_kernels/kernel_utils.cc +++ b/ge/host_kernels/kernel_utils.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,8 +85,7 @@ bool KernelUtils::CheckFormatSupported(const NodePtr &node_ptr) { return true; } -bool KernelUtils::CheckSizeForTransOp(const ge::ConstGeTensorPtr &const_weight_ptr, - const ge::OpDescPtr &op_desc_ptr) { +bool KernelUtils::CheckSizeForTransOp(const ge::ConstGeTensorPtr &const_weight_ptr, const ge::OpDescPtr &op_desc_ptr) { if (const_weight_ptr == nullptr || op_desc_ptr == nullptr) { GELOGE(FAILED, "parameter invalid"); return false; diff --git a/ge/host_kernels/kernel_utils.h b/ge/host_kernels/kernel_utils.h index c9c90634..17b645aa 100755 --- a/ge/host_kernels/kernel_utils.h +++ b/ge/host_kernels/kernel_utils.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ class KernelUtils { * @param [out] output the tensor for save sequence of numbers * @author */ - template + template static Status GenData(const int64_t data_num, const T value, const GeTensorPtr &output) { if (data_num > 0) { if (!CheckInt64MulOverflow(data_num, static_cast(sizeof(T)))) { @@ -70,12 +70,12 @@ class KernelUtils { } /** - * Calculate dimension - * @param [in] dims save the tensor of the dimension - * @param [in] vec_dim results of each dimension - * @param [out] data_num total size of data - * @author - */ + * Calculate dimension + * @param [in] dims save the tensor of the dimension + * @param [in] vec_dim results of each dimension + * @param [out] data_num total size of data + * @author + */ template static Status CalcDims(const ConstGeTensorPtr dims, std::vector &vec_dim, int64_t &data_num) { data_num = 1; diff --git a/ge/host_kernels/maximum_kernel.cc b/ge/host_kernels/maximum_kernel.cc index 2ced113f..aca4ec2b 100644 --- a/ge/host_kernels/maximum_kernel.cc +++ b/ge/host_kernels/maximum_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/maximum_kernel.h b/ge/host_kernels/maximum_kernel.h index d7e69f59..feaa91e7 100755 --- a/ge/host_kernels/maximum_kernel.h +++ b/ge/host_kernels/maximum_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/mul_kernel.cc b/ge/host_kernels/mul_kernel.cc index b01a5c79..8dbe83a5 100644 --- a/ge/host_kernels/mul_kernel.cc +++ b/ge/host_kernels/mul_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/mul_kernel.h b/ge/host_kernels/mul_kernel.h index 2d06f676..e7c74c41 100755 --- a/ge/host_kernels/mul_kernel.h +++ b/ge/host_kernels/mul_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/pack_kernel.cc b/ge/host_kernels/pack_kernel.cc index 476005ef..9b62a582 100644 --- a/ge/host_kernels/pack_kernel.cc +++ b/ge/host_kernels/pack_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_kernels/pack_kernel.h" #include @@ -27,7 +28,6 @@ #include "host_kernels/kernel_utils.h" #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" -#include "framework/common/types.h" namespace { const int64_t kShapeItemNumMAX = 2000000000; @@ -79,8 +79,7 @@ Status PackKernel::ValidateKernelParams(const ge::OpDescPtr &op_desc_ptr, return NOT_CHANGED; } if (input.size() != static_cast(n_)) { - GELOGW("The number of input for Pack should be %d, in fact it is %ld ", static_cast(n_), - input.size()); + GELOGW("The number of input for Pack should be %d, in fact it is %ld ", static_cast(n_), input.size()); return PARAM_INVALID; } data_type_ = op_desc_ptr->GetInputDesc(0).GetDataType(); @@ -167,8 +166,7 @@ void PackKernel::ExpandDims(const int64_t axis, const std::vector &input, +Status PackKernel::CopyOutputData(const GeShape &final_shape, const std::vector &input, ge::GeTensorPtr &output_ptr) { output_ptr->MutableTensorDesc().SetShape(final_shape); output_ptr->MutableTensorDesc().SetDataType(DataType(data_type_)); @@ -206,8 +204,8 @@ Status PackKernel::CopyOutputData(const GeShape &final_shape, for (int64_t j = 0; j < n_; j++) { // input range already check before. Range is [0,n_). const uint8_t *in_data = input[j]->GetData().data(); - auto ret = memcpy_s(buf.get() + dst_offset, output_size * data_size - dst_offset, in_data + src_offset, - data_size * unit); + auto ret = + memcpy_s(buf.get() + dst_offset, output_size * data_size - dst_offset, in_data + src_offset, data_size * unit); if (ret != EOK) { GELOGW("Memory copy failed."); return NOT_CHANGED; diff --git a/ge/host_kernels/pack_kernel.h b/ge/host_kernels/pack_kernel.h index 87b77a66..708e46c3 100755 --- a/ge/host_kernels/pack_kernel.h +++ b/ge/host_kernels/pack_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_PACK_KERNEL_H_ #define GE_GRAPH_PASSES_FOLDING_KERNEL_PACK_KERNEL_H_ @@ -30,6 +31,7 @@ class PackKernel : public Kernel { public: Status Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, std::vector &v_output) override; + private: Status ValidateKernelParams(const ge::OpDescPtr &op_desc_ptr, const std::vector &input); Status ValidateInputs(const ge::OpDescPtr &op_desc_ptr, const std::vector &input); diff --git a/ge/host_kernels/permute_kernel.cc b/ge/host_kernels/permute_kernel.cc index 327c94f8..24bed54d 100755 --- a/ge/host_kernels/permute_kernel.cc +++ b/ge/host_kernels/permute_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,6 @@ #include "host_kernels/kernel_utils.h" #include "framework/common/ge_inner_error_codes.h" - namespace ge { namespace { const char *const kAttrOrder = "order"; @@ -75,21 +74,21 @@ Status PermuteKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetOutputDesc(0); GeTensorDesc op_desc_in = op_desc_ptr->GetInputDesc(0); auto src_format = op_desc_in.GetFormat(); - auto src_shape = op_desc_in.GetShape().GetDims(); + auto src_shape = op_desc_in.GetShape().GetDims(); auto src_data_type = op_desc_in.GetDataType(); auto data_shape = op_desc.GetShape().GetDims(); auto data_format = op_desc.GetFormat(); auto data_type = op_desc.GetDataType(); GELOGD( - "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " - "output format %s, shape %s, data type %s", - op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", + op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); vector perm_list; if (!AttrUtils::GetListInt(op_desc_ptr, kAttrOrder, perm_list) && diff --git a/ge/host_kernels/permute_kernel.h b/ge/host_kernels/permute_kernel.h index 589ea49e..b022abd7 100755 --- a/ge/host_kernels/permute_kernel.h +++ b/ge/host_kernels/permute_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/range_kernel.cc b/ge/host_kernels/range_kernel.cc index 32a72b47..4ce3725d 100644 --- a/ge/host_kernels/range_kernel.cc +++ b/ge/host_kernels/range_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/range_kernel.h b/ge/host_kernels/range_kernel.h index e58530d0..50b1c232 100755 --- a/ge/host_kernels/range_kernel.h +++ b/ge/host_kernels/range_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/rank_kernel.cc b/ge/host_kernels/rank_kernel.cc index 1de9478c..7fb92039 100755 --- a/ge/host_kernels/rank_kernel.cc +++ b/ge/host_kernels/rank_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,6 @@ #include "framework/common/debug/ge_log.h" #include "inc/kernel_factory.h" #include "omg/omg_inner_types.h" -#include "framework/common/types.h" namespace { const size_t kRankInputSize = 1; diff --git a/ge/host_kernels/rank_kernel.h b/ge/host_kernels/rank_kernel.h index 80c0bb7d..0de4960c 100755 --- a/ge/host_kernels/rank_kernel.h +++ b/ge/host_kernels/rank_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reduce_prod_kernel.cc b/ge/host_kernels/reduce_prod_kernel.cc index 4837a921..0a3fad72 100644 --- a/ge/host_kernels/reduce_prod_kernel.cc +++ b/ge/host_kernels/reduce_prod_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -159,7 +159,7 @@ void ReduceProdKernel::ShapeCal(const ge::OpDescPtr &op_desc_ptr, const std::vec vector data_dims = data_tensor->GetTensorDesc().GetShape().GetDims(); int32_t data_dim_size = static_cast(data_dims.size()); const uint8_t *axis_data = axis_tensor->GetData().GetData(); - GE_CHECK_NOTNULL_EXEC(axis_data, return); + GE_CHECK_NOTNULL_EXEC(axis_data, return ); int32_t axis = *(const_cast(reinterpret_cast(axis_data))); bool keep_dims = false; if (!AttrUtils::GetBool(op_desc_ptr, "keep_dims", keep_dims)) { diff --git a/ge/host_kernels/reduce_prod_kernel.h b/ge/host_kernels/reduce_prod_kernel.h index ccf33668..326dd2f5 100755 --- a/ge/host_kernels/reduce_prod_kernel.h +++ b/ge/host_kernels/reduce_prod_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reformat_kernel.cc b/ge/host_kernels/reformat_kernel.cc index c1942983..c2dd1e17 100644 --- a/ge/host_kernels/reformat_kernel.cc +++ b/ge/host_kernels/reformat_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -85,8 +85,7 @@ Status ReFormatKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorSetData(input.at(0)->GetData()) != GRAPH_SUCCESS, - GELOGW("set data failed"); + GE_IF_BOOL_EXEC(output_ptr->SetData(input.at(0)->GetData()) != GRAPH_SUCCESS, GELOGW("set data failed"); return NOT_CHANGED); v_output.emplace_back(output_ptr); GELOGD("ReFormatKernel success."); diff --git a/ge/host_kernels/reformat_kernel.h b/ge/host_kernels/reformat_kernel.h index 770b90b3..e3d49acf 100755 --- a/ge/host_kernels/reformat_kernel.h +++ b/ge/host_kernels/reformat_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reshape_kernel.cc b/ge/host_kernels/reshape_kernel.cc index 7c4f58f6..dc7e4bb8 100644 --- a/ge/host_kernels/reshape_kernel.cc +++ b/ge/host_kernels/reshape_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/reshape_kernel.h b/ge/host_kernels/reshape_kernel.h index 37b12db9..c0100e51 100755 --- a/ge/host_kernels/reshape_kernel.h +++ b/ge/host_kernels/reshape_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/rsqrt_kernel.cc b/ge/host_kernels/rsqrt_kernel.cc index 74c78787..5184d885 100755 --- a/ge/host_kernels/rsqrt_kernel.cc +++ b/ge/host_kernels/rsqrt_kernel.cc @@ -1,5 +1,5 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "host_kernels/rsqrt_kernel.h" #include @@ -27,7 +28,6 @@ #include "host_kernels/kernel_utils.h" #include "inc/kernel_factory.h" #include "common/math/math_util.h" -#include "framework/common/types.h" namespace ge { namespace { @@ -51,13 +51,13 @@ Status ZeroCheck(T x, const DataType &data_type) { } return SUCCESS; } -#define SET_RSQRT_CASE(DTYPE, TYPE) \ - case (DTYPE): \ - ret = RsqrtKernel::RsqrtCompute(input_ptr, output_ptr); \ +#define SET_RSQRT_CASE(DTYPE, TYPE) \ + case (DTYPE): \ + ret = RsqrtKernel::RsqrtCompute(input_ptr, output_ptr); \ break; } // namespace -template +template Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr &output_tensor_ptr) { GE_CHECK_NOTNULL(input_tensor_ptr); GE_CHECK_NOTNULL(output_tensor_ptr); @@ -65,12 +65,12 @@ Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr size_t data_count = data_size / sizeof(T); auto data_type = input_tensor_ptr->GetTensorDesc().GetDataType(); if (data_count > 0) { - unique_ptr buf(new(std::nothrow) T[data_count]()); + unique_ptr buf(new (std::nothrow) T[data_count]()); if (buf == nullptr) { GELOGW("New buf failed"); return NOT_CHANGED; } - auto ptr = const_cast(reinterpret_cast(input_tensor_ptr->GetData().data())); + auto ptr = const_cast(reinterpret_cast(input_tensor_ptr->GetData().data())); for (size_t i = 0; i < data_count; i++) { if (ZeroCheck(*(ptr + i), data_type) != SUCCESS) { GELOGW("Rsqrt: The input data can not less than or equal to zero, rsqrt folding failed."); @@ -78,18 +78,18 @@ Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr } switch (data_type) { case DT_FLOAT16: { - double val = static_cast(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); + double val = static_cast(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); double drSqrt = 1.0 / std::sqrt(val); buf[i] = drSqrt; break; } - case DT_FLOAT:{ - float denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); - buf[i] = static_cast(1 / denominator); + case DT_FLOAT: { + float denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); + buf[i] = static_cast(1 / denominator); break; } case DT_DOUBLE: { - double denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); + double denominator = std::sqrt(*(reinterpret_cast(input_tensor_ptr->GetData().data()) + i)); buf[i] = static_cast(1 / denominator); break; } @@ -99,7 +99,8 @@ Status RsqrtKernel::RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr } } GE_IF_BOOL_EXEC(output_tensor_ptr->SetData(reinterpret_cast(buf.get()), data_size) != GRAPH_SUCCESS, - GELOGW("Set data failed"); return NOT_CHANGED); + GELOGW("Set data failed"); + return NOT_CHANGED); output_tensor_ptr->MutableTensorDesc().SetDataType(data_type); output_tensor_ptr->MutableTensorDesc().SetShape(input_tensor_ptr->GetTensorDesc().GetShape()); } diff --git a/ge/host_kernels/rsqrt_kernel.h b/ge/host_kernels/rsqrt_kernel.h index e3733521..02b08252 100755 --- a/ge/host_kernels/rsqrt_kernel.h +++ b/ge/host_kernels/rsqrt_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,9 @@ class RsqrtKernel : public Kernel { public: Status Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, std::vector &v_output) override; + private: - template + template Status RsqrtCompute(ConstGeTensorPtr &input_tensor_ptr, GeTensorPtr &output_tensor_ptr); }; } // namespace ge diff --git a/ge/host_kernels/shape_kernel.cc b/ge/host_kernels/shape_kernel.cc index ecb0e082..2f20fb24 100644 --- a/ge/host_kernels/shape_kernel.cc +++ b/ge/host_kernels/shape_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #include "host_kernels/kernel_utils.h" #include "graph/passes/pass_utils.h" #include "inc/kernel_factory.h" -#include "framework/common/types.h" namespace ge { namespace { diff --git a/ge/host_kernels/shape_kernel.h b/ge/host_kernels/shape_kernel.h index 6ef416bf..8e8791e5 100755 --- a/ge/host_kernels/shape_kernel.h +++ b/ge/host_kernels/shape_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/shape_n_kernel.cc b/ge/host_kernels/shape_n_kernel.cc index 67d2eeff..33b878cf 100644 --- a/ge/host_kernels/shape_n_kernel.cc +++ b/ge/host_kernels/shape_n_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #include "host_kernels/kernel_utils.h" #include "graph/passes/pass_utils.h" #include "inc/kernel_factory.h" -#include "framework/common/types.h" namespace ge { Status ShapeNKernel::Compute(const NodePtr &node, std::vector &v_output) { @@ -49,7 +48,7 @@ Status ShapeNKernel::Compute(const NodePtr &node, std::vector &v_ou } vector dims = input_desc->GetShape().GetDims(); Status ret = - PassUtils::ConstructTensorDescWithData(op_desc->GetOutputDesc(static_cast(i)), dims, v_output); + PassUtils::ConstructTensorDescWithData(op_desc->GetOutputDesc(static_cast(i)), dims, v_output); if (ret != SUCCESS) { GELOGE(PARAM_INVALID, "ShapeN kernel construct tensor desc failed, i:%zu", i); return ret; diff --git a/ge/host_kernels/shape_n_kernel.h b/ge/host_kernels/shape_n_kernel.h index 51fd9393..55829a39 100755 --- a/ge/host_kernels/shape_n_kernel.h +++ b/ge/host_kernels/shape_n_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/size_kernel.cc b/ge/host_kernels/size_kernel.cc index caa5febc..65bb21fc 100644 --- a/ge/host_kernels/size_kernel.cc +++ b/ge/host_kernels/size_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/size_kernel.h b/ge/host_kernels/size_kernel.h index 43a00f2f..3a309bc7 100755 --- a/ge/host_kernels/size_kernel.h +++ b/ge/host_kernels/size_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/slice_d_kernel.cc b/ge/host_kernels/slice_d_kernel.cc index b8572290..3b8fd0a0 100644 --- a/ge/host_kernels/slice_d_kernel.cc +++ b/ge/host_kernels/slice_d_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -146,11 +146,11 @@ Status SliceDKernel::Compute(const OpDescPtr op_desc_ptr, const std::vector(tmp_value); - + if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) { GELOGW("Failed to get list param."); return PARAM_INVALID; diff --git a/ge/host_kernels/ssd_prior_box_kernel.h b/ge/host_kernels/ssd_prior_box_kernel.h index 0ebf221d..96de2b85 100755 --- a/ge/host_kernels/ssd_prior_box_kernel.h +++ b/ge/host_kernels/ssd_prior_box_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/strided_slice_kernel.cc b/ge/host_kernels/strided_slice_kernel.cc index 7b9c0608..13c61666 100644 --- a/ge/host_kernels/strided_slice_kernel.cc +++ b/ge/host_kernels/strided_slice_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,15 +12,20 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "host_kernels/strided_slice_kernel.h" + #include "common/fp16_t.h" +#include "common/ge_inner_error_codes.h" #include "common/math/math_util.h" -#include "framework/common/types.h" +#include "common/op/ge_op_utils.h" +#include "external/graph/types.h" +#include "framework/common/debug/ge_log.h" #include "graph/utils/type_utils.h" #include "host_kernels/kernel_utils.h" #include "inc/kernel_factory.h" +#include namespace ge { namespace { @@ -31,16 +36,16 @@ const size_t kStridedSliceBeginIndex = 1; const size_t kStridedSliceEndIndex = 2; const size_t kStridedSliceStrideIndex = 3; const int32_t kDefaultStrideSize = 1; -const uint32_t kMaskBitLeftUnit = 1; const std::set kIndexNumberType = {DT_INT32, DT_INT64}; -bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const uint32_t ellipsis_mask) { +bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const int ellipsis_mask) { if (ellipsis_mask != 0) { auto ellipsis_num = 0; auto input_shape = input_desc->GetShape(); - for (size_t i = 0; i < input_shape.GetDimNum(); ++i) { - auto i_temp = static_cast(i); - bool ellipsis_mask_flag = (ellipsis_mask) & (kMaskBitLeftUnit << i_temp); + bool ellipsis_mask_flag = false; + for (size_t i = 0; i < input_shape.GetDimNum(); i++) { + uint32_t i_temp = static_cast(i); + ellipsis_mask_flag = (static_cast(ellipsis_mask) & (1 << i_temp)); if (ellipsis_mask_flag) { ++ellipsis_num; } @@ -52,35 +57,6 @@ bool IsEllipsisMaskValid(const GeTensorDescPtr &input_desc, const uint32_t ellip } return true; } - -void GetOriginStrideVec(const std::vector &input, vector &orig_begin_vec, - vector &orig_end_vec, vector &orig_stride_vec) { - ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; - ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; - ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; - - auto data_type = begin_tensor->GetTensorDesc().GetDataType(); - size_t vec_size = begin_tensor->GetData().size() / GetSizeByDataType(data_type); - if (data_type == DT_INT32) { - const int32_t *begin = reinterpret_cast(begin_tensor->GetData().data()); - const int32_t *end = reinterpret_cast(end_tensor->GetData().data()); - const int32_t *stride = reinterpret_cast(stride_tensor->GetData().data()); - for (size_t i = 0; i < vec_size; ++i) { - orig_begin_vec.emplace_back(begin[i]); - orig_end_vec.emplace_back(end[i]); - orig_stride_vec.emplace_back(stride[i]); - } - } else { - const int64_t *begin = reinterpret_cast(begin_tensor->GetData().data()); - const int64_t *end = reinterpret_cast(end_tensor->GetData().data()); - const int64_t *stride = reinterpret_cast(stride_tensor->GetData().data()); - for (size_t i = 0; i < vec_size; ++i) { - orig_begin_vec.emplace_back(begin[i]); - orig_end_vec.emplace_back(end[i]); - orig_stride_vec.emplace_back(stride[i]); - } - } -} } // namespace Status StridedSliceKernel::Compute(const ge::OpDescPtr attr, const std::vector &input, vector &v_output) { @@ -157,7 +133,7 @@ Status StridedSliceKernel::CheckAndGetAttr(const OpDescPtr &attr) { } return SUCCESS; } -Status StridedSliceKernel::CheckInputParam(const std::vector &input) { +Status StridedSliceKernel::CheckInputParam(const std::vector &input) const { if (input.size() != kStridedSliceInputSize) { GELOGE(PARAM_INVALID, "The number of input for strided slice must be %zu.", kStridedSliceInputSize); return PARAM_INVALID; @@ -194,9 +170,9 @@ Status StridedSliceKernel::CheckInputParam(const std::vector & return PARAM_INVALID; } size_t weight0_size = weight0->GetData().size() / x_data_size; - size_t begin_data_size = begin_tensor->GetData().size(); - size_t end_data_size = end_tensor->GetData().size(); - size_t stride_data_size = stride_tensor->GetData().size(); + size_t begin_data_size = begin_tensor->GetData().size() / sizeof(int32_t); + size_t end_data_size = end_tensor->GetData().size() / sizeof(int32_t); + size_t stride_data_size = stride_tensor->GetData().size() / sizeof(int32_t); if ((weight0_size == 0) || (begin_data_size == 0) || (end_data_size == 0) || (stride_data_size == 0)) { GELOGW("Data size of inputs is 0."); return PARAM_INVALID; @@ -206,6 +182,7 @@ Status StridedSliceKernel::CheckInputParam(const std::vector & GELOGW("The sizes of begin, end and stride is not supported."); return PARAM_INVALID; } + return SUCCESS; } @@ -214,6 +191,8 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector &output_dims, std::vector &stride_vec) { ConstGeTensorPtr weight0 = input[kStridedSliceInputIndex]; ConstGeTensorPtr begin_tensor = input[kStridedSliceBeginIndex]; + ConstGeTensorPtr end_tensor = input[kStridedSliceEndIndex]; + ConstGeTensorPtr stride_tensor = input[kStridedSliceStrideIndex]; const GeShape x_shape = weight0->GetTensorDesc().GetShape(); auto x_dims = x_shape.GetDims(); @@ -221,13 +200,15 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector orig_begin_vec, orig_end_vec, orig_stride_vec; - GetOriginStrideVec(input, orig_begin_vec, orig_end_vec, orig_stride_vec); - auto begin_dim_num = orig_begin_vec.size(); + const int32_t *begin = reinterpret_cast(begin_tensor->GetData().data()); + const int32_t *end = reinterpret_cast(end_tensor->GetData().data()); + const int32_t *stride = reinterpret_cast(stride_tensor->GetData().data()); + auto begin_dim_num = begin_tensor->GetData().size() / sizeof(int32_t); auto min_dim = x_dims_num > begin_dim_num ? begin_dim_num : x_dims_num; for (size_t i = 0; i < x_dims.size(); ++i) { - auto i_temp = static_cast(i); - bool new_axis_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK) & (kMaskBitLeftUnit << i_temp)); + auto i_temp = static_cast(i); + bool new_axis_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); if (new_axis_mask_flag) { output_dims.push_back(1); input_dims.push_back(1); @@ -240,9 +221,9 @@ Status StridedSliceKernel::InitParamWithAttrs(const std::vector &x_dims) { auto begin_data_type_size = GetSizeByDataType(begin_tensor->GetTensorDesc().GetDataType()); size_t begin_vec_size = begin_tensor->GetData().size() / begin_data_type_size; auto final_dim_num = x_dims_num < begin_vec_size ? begin_vec_size : x_dims_num; for (size_t i = 0; i < final_dim_num; i++) { - auto i_temp = static_cast(i); - bool new_axis_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK) & (kMaskBitLeftUnit << i_temp)); + auto i_temp = static_cast(i); + bool new_axis_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_NEW_AXIS_MASK)) & (1 << i_temp)); if (new_axis_mask_flag) { x_dims.insert(x_dims.begin() + i, 1); } } } - Status StridedSliceKernel::MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const { - auto i_temp = static_cast(i); - bool begin_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_BEGIN_MASK) & (kMaskBitLeftUnit << i_temp)); - bool end_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK) & (kMaskBitLeftUnit << i_temp)); - bool ellipsis_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK) & (kMaskBitLeftUnit << i_temp)); - bool shrink_mask_flag = (attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK) & (kMaskBitLeftUnit << i_temp)); + uint64_t i_temp = static_cast(i); + bool begin_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_BEGIN_MASK)) & (1 << i_temp)); + bool end_mask_flag = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_END_MASK)) & (1 << i_temp)); + bool ellipsis_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_ELLIPSIS_MASK)) & (1 << i_temp)); + bool shrink_mask_flag = + (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << i_temp)); if (shrink_mask_flag) { begin_i = (begin_i < 0 ? (dim_i + begin_i) : begin_i); FMK_INT32_ADDCHECK(begin_i, kNumOne) @@ -309,9 +291,8 @@ Status StridedSliceKernel::MaskCal(const size_t i, int64_t &begin_i, int64_t &en } return SUCCESS; } - Status StridedSliceKernel::StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, - int64_t &dim_final) { + int64_t &dim_final) const { if (stride_i == 0) { stride_i = kDefaultStrideSize; } else if (stride_i < 0) { @@ -331,17 +312,15 @@ Status StridedSliceKernel::StrideCal(const int64_t x_dims_i, int64_t &begin_i, i } return SUCCESS; } - void StridedSliceKernel::GetOutputDims(uint32_t dims_size, const std::vector &output_dims, vector &v_dims) { for (uint32_t k = 0; k < dims_size; k++) { - bool shrink_mask_i = (attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK) & (kMaskBitLeftUnit << k)); + bool shrink_mask_i = (static_cast(attr_value_map_.at(STRIDE_SLICE_ATTR_SHRINK_AXIS_MASK)) & (1 << k)); if (shrink_mask_i) { continue; } v_dims.push_back(output_dims[k]); } } - REGISTER_KERNEL(STRIDEDSLICE, StridedSliceKernel); } // namespace ge diff --git a/ge/host_kernels/strided_slice_kernel.h b/ge/host_kernels/strided_slice_kernel.h index 315391fd..5d130cd7 100755 --- a/ge/host_kernels/strided_slice_kernel.h +++ b/ge/host_kernels/strided_slice_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,13 +28,13 @@ class StridedSliceKernel : public Kernel { private: Status CheckAndGetAttr(const OpDescPtr &attr); - static Status CheckInputParam(const std::vector &input) ; + Status CheckInputParam(const std::vector &input) const; Status InitParamWithAttrs(const std::vector &input, std::vector &input_dims, std::vector &begin_vec, std::vector &output_dims, std::vector &stride_vec); Status MaskCal(const size_t i, int64_t &begin_i, int64_t &end_i, int64_t &dim_i) const; - static Status StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, - int64_t &dim_final) ; + Status StrideCal(const int64_t x_dims_i, int64_t &begin_i, int64_t &end_i, int64_t &stride_i, + int64_t &dim_final) const; void ExpandDimsWithNewAxis(const ConstGeTensorPtr &begin_tensor, const size_t x_dims_num, vector &x_dims); void GetOutputDims(uint32_t dims_size, const std::vector &output_dims, vector &v_dims); diff --git a/ge/host_kernels/sub_kernel.cc b/ge/host_kernels/sub_kernel.cc index deb36cb3..70a14c9f 100644 --- a/ge/host_kernels/sub_kernel.cc +++ b/ge/host_kernels/sub_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/sub_kernel.h b/ge/host_kernels/sub_kernel.h index 32ab7084..4143980c 100755 --- a/ge/host_kernels/sub_kernel.h +++ b/ge/host_kernels/sub_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ class SubKernel : public Kernel { public: Status Compute(const ge::OpDescPtr attr, const std::vector &input, vector &v_output) override; + private: std::vector y_data_int8_t_; std::vector y_data_int16_t_; diff --git a/ge/host_kernels/transdata_kernel.cc b/ge/host_kernels/transdata_kernel.cc index 2b16b075..c5c9da6e 100644 --- a/ge/host_kernels/transdata_kernel.cc +++ b/ge/host_kernels/transdata_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,6 @@ #include "graph/utils/type_utils.h" #include "inc/kernel_factory.h" - namespace ge { namespace { const size_t kTransdataInputSize = 1; @@ -83,15 +82,15 @@ Status TransdataKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetFormat(); const auto &data_type = op_desc->GetDataType(); GELOGD( - "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " - "output format %s, shape %s, data type %s", - op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", + op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); const uint8_t *src_data = const_weight_ptr->GetData().data(); const formats::TransArgs trans_args{src_data, src_format, data_format, src_shape, data_shape, src_data_type}; diff --git a/ge/host_kernels/transdata_kernel.h b/ge/host_kernels/transdata_kernel.h index 1d212cf5..e4cf9b39 100755 --- a/ge/host_kernels/transdata_kernel.h +++ b/ge/host_kernels/transdata_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/transpose_kernel.cc b/ge/host_kernels/transpose_kernel.cc index 03d112aa..3f55539e 100755 --- a/ge/host_kernels/transpose_kernel.cc +++ b/ge/host_kernels/transpose_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -82,15 +82,15 @@ Status TransposeKernel::Compute(const OpDescPtr op_desc_ptr, const std::vectorGetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), - formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), - TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), - formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), - TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), - TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), - TypeUtils::DataTypeToSerialString(data_type).c_str()); + "current node %s, format %s, input shape %s, data type %s, weight format %s, shape %s, data type %s. " + "output format %s, shape %s, data type %s", + op_desc_ptr->GetName().c_str(), TypeUtils::FormatToSerialString(src_format).c_str(), + formats::ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(src_data_type).c_str(), + TypeUtils::FormatToSerialString(const_weight_ptr->GetTensorDesc().GetFormat()).c_str(), + formats::ShapeToString(const_weight_ptr->GetTensorDesc().GetShape()).c_str(), + TypeUtils::DataTypeToSerialString(const_weight_ptr->GetTensorDesc().GetDataType()).c_str(), + TypeUtils::FormatToSerialString(data_format).c_str(), formats::ShapeToString(data_shape).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str()); ConstGeTensorPtr tensor_perm_ptr = input[kTransposeInputPerm]; DataType data_dtype = tensor_perm_ptr->GetTensorDesc().GetDataType(); diff --git a/ge/host_kernels/transpose_kernel.h b/ge/host_kernels/transpose_kernel.h index 9e7c54d7..bb073c15 100755 --- a/ge/host_kernels/transpose_kernel.h +++ b/ge/host_kernels/transpose_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unpack_kernel.cc b/ge/host_kernels/unpack_kernel.cc index 1c28151f..fbfd9e16 100755 --- a/ge/host_kernels/unpack_kernel.cc +++ b/ge/host_kernels/unpack_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -90,4 +90,3 @@ Status UnpackKernel::Compute(const OpDescPtr attr, const std::vector &input, + virtual Status Compute(const ge::OpDescPtr attr, const std::vector &input, std::vector &v_output) override; }; } // namespace ge #endif // GE_GRAPH_PASSES_FOLDING_KERNEL_UNPACK_KERNEL_H_ - diff --git a/ge/host_kernels/unsqueeze_kernel.cc b/ge/host_kernels/unsqueeze_kernel.cc index 4ceaba3f..d66a3e2c 100644 --- a/ge/host_kernels/unsqueeze_kernel.cc +++ b/ge/host_kernels/unsqueeze_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/host_kernels/unsqueeze_kernel.h b/ge/host_kernels/unsqueeze_kernel.h index 510a1ffa..c676586f 100644 --- a/ge/host_kernels/unsqueeze_kernel.h +++ b/ge/host_kernels/unsqueeze_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index bf5af73b..cbb556e2 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -70,8 +70,8 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { buffer = malloc(allocate_size); } else { buffer = MemManager::Instance() - .CachingInstance(RT_MEMORY_HBM) - .Malloc(allocate_size, reinterpret_cast(try_reuse_addr), device_id_); + .CachingInstance(RT_MEMORY_HBM) + .Malloc(allocate_size, reinterpret_cast(try_reuse_addr), device_id_); } if (buffer == nullptr) { GELOGE(MEMALLOC_FAILED, "Failed to malloc memory, device_id = %u, size = %zu", device_id_, allocate_size); @@ -117,4 +117,4 @@ void NpuMemoryAllocator::DestroyAllocator() { allocators_.erase(device_id); } } // namespace hybrid -} // namespace ge +} // namespace ge \ No newline at end of file diff --git a/ge/hybrid/common/npu_memory_allocator.h b/ge/hybrid/common/npu_memory_allocator.h index 55cb13ad..99c01b34 100644 --- a/ge/hybrid/common/npu_memory_allocator.h +++ b/ge/hybrid/common/npu_memory_allocator.h @@ -50,7 +50,7 @@ class NpuMemoryAllocator { static NpuMemoryAllocator *GetAllocator(uint32_t device_id); static NpuMemoryAllocator *GetAllocator(); static void DestroyAllocator(); - static AllocationAttr* AttrWithDefaultPadding() { + static AllocationAttr *AttrWithDefaultPadding() { static AllocationAttr attr(kDefaultPadding, nullptr); return &attr; } @@ -59,6 +59,7 @@ class NpuMemoryAllocator { void Deallocate(void *data, MemStorageType mem_type = HBM); static constexpr int kDefaultPadding = 32; + private: explicit NpuMemoryAllocator(uint32_t device_id); uint32_t device_id_; @@ -68,4 +69,4 @@ class NpuMemoryAllocator { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_COMMON_MEMORY_ALLOCATOR_H_ +#endif // GE_HYBRID_COMMON_MEMORY_ALLOCATOR_H_ diff --git a/ge/hybrid/common/tensor_value.cc b/ge/hybrid/common/tensor_value.cc index 4f1935b8..11a96d13 100644 --- a/ge/hybrid/common/tensor_value.cc +++ b/ge/hybrid/common/tensor_value.cc @@ -61,11 +61,9 @@ TensorBuffer::~TensorBuffer() { } } -TensorValue::TensorValue(std::shared_ptr buffer) : buffer_(std::move(buffer)) { -} +TensorValue::TensorValue(std::shared_ptr buffer) : buffer_(std::move(buffer)) {} -TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_size_(size) { -} +TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_size_(size) {} TensorValue::~TensorValue() { Destroy(); } diff --git a/ge/hybrid/common/tensor_value.h b/ge/hybrid/common/tensor_value.h index 9f68cf2c..d720e0e0 100644 --- a/ge/hybrid/common/tensor_value.h +++ b/ge/hybrid/common/tensor_value.h @@ -29,23 +29,18 @@ class AllocationAttr; class TensorBuffer { public: - static std::unique_ptr Create(NpuMemoryAllocator *allocator, - size_t size, + static std::unique_ptr Create(NpuMemoryAllocator *allocator, size_t size, AllocationAttr *attr = nullptr); static std::unique_ptr Create(void *buffer, size_t size); TensorBuffer(const TensorBuffer &) = delete; - TensorBuffer &operator = (const TensorBuffer &) = delete; + TensorBuffer &operator=(const TensorBuffer &) = delete; ~TensorBuffer(); - void *GetData() { - return buffer_; - } + void *GetData() { return buffer_; } - size_t GetSize() const { - return size_; - } + size_t GetSize() const { return size_; } private: TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM); @@ -68,17 +63,13 @@ class TensorValue { void Destroy(); - bool IsEmpty() { - return ref_buffer_ == nullptr && buffer_ == nullptr; - } + bool IsEmpty() { return ref_buffer_ == nullptr && buffer_ == nullptr; } const void *GetData() const; std::string DebugString() const; - void SetName(const std::string &name) { - name_ = name; - } + void SetName(const std::string &name) { name_ = name; } void *MutableData(); diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index 05ed1157..37822039 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -53,31 +53,32 @@ struct GraphExecutionContext { mutable std::mutex mu; }; -#define RECORD_PROFILING_EVENT(context, evt_type, fmt, category, node_name, ...) \ -do { \ - if ((context != nullptr) && (context)->profiler != nullptr) { \ - if (node_name != nullptr) { \ - context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GetTid(), node_name, category, ##__VA_ARGS__);\ - } else { \ - context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GetTid(), category, ##__VA_ARGS__); \ - }\ - } \ -} while (0) +#define RECORD_PROFILING_EVENT(context, evt_type, fmt, category, node_name, ...) \ + do { \ + if ((context != nullptr) && (context)->profiler != nullptr) { \ + if (node_name != nullptr) { \ + context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GetTid(), node_name, category, \ + ##__VA_ARGS__); \ + } else { \ + context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GetTid(), category, ##__VA_ARGS__); \ + } \ + } \ + } while (0) #define RECORD_MODEL_EXECUTION_EVENT(context, fmt, ...) \ RECORD_PROFILING_EVENT((context), HybridProfiler::GENERAL, fmt, "ModelExecutor", nullptr, ##__VA_ARGS__) #define RECORD_SHAPE_INFERENCE_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::SHAPE_INFERENCE, fmt, "ShapeInference", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::SHAPE_INFERENCE, fmt, "ShapeInference", name, ##__VA_ARGS__) #define RECORD_COMPILE_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::COMPILE, fmt, "Compilation", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::COMPILE, fmt, "Compilation", name, ##__VA_ARGS__) #define RECORD_EXECUTION_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ - RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) + RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ +#endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index d4652a91..7f650017 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -18,7 +18,6 @@ #include "graph/load/new_model_manager/model_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" -#include "graph/ge_context.h" #include "omm/csa_interact.h" namespace ge { @@ -26,9 +25,7 @@ namespace hybrid { namespace { int kDataOutputIndex = 0; } -HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) - : model_(model), run_flag_(false) { -} +HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model) : model_(model), run_flag_(false) {} HybridModelAsyncExecutor::~HybridModelAsyncExecutor() { if (stream_ != nullptr) { @@ -36,13 +33,9 @@ HybridModelAsyncExecutor::~HybridModelAsyncExecutor() { } } -void HybridModelAsyncExecutor::SetDeviceId(uint32_t device_id) { - device_id_ = device_id; -} +void HybridModelAsyncExecutor::SetDeviceId(uint32_t device_id) { device_id_ = device_id; } -void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) { - model_id_ = model_id; -} +void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) { model_id_ = model_id; } Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr &data) { GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL, @@ -58,10 +51,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr &lis run_flag_ = true; listener_ = listener; - future_ = std::async([&]() -> Status { - GetContext().SetSessionId(executor_->GetContext()->session_id); - return RunInternal(); - }); + future_ = std::async([&]() -> Status { return RunInternal(); }); GE_CHK_BOOL_RET_STATUS(future_.valid(), INTERNAL_ERROR, "Failed to start."); GELOGD("HybridModelExecutor::Start successfully"); @@ -83,11 +73,11 @@ Status HybridModelAsyncExecutor::Stop() { } Status HybridModelAsyncExecutor::Init() { - data_inputer_ = std::unique_ptr(new(std::nothrow) DataInputer()); + data_inputer_ = std::unique_ptr(new (std::nothrow) DataInputer()); GE_CHECK_NOTNULL(data_inputer_); GE_CHK_RT_RET(rtStreamCreate(&stream_, RT_STREAM_PRIORITY_DEFAULT)); - executor_ = std::unique_ptr(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); + executor_ = std::unique_ptr(new (std::nothrow) HybridModelExecutor(model_, device_id_, stream_)); GE_CHECK_NOTNULL(executor_); GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine"); GE_CHK_STATUS_RET(InitInputTensors(), "Failed to init input tensors"); @@ -131,9 +121,9 @@ Status HybridModelAsyncExecutor::RunInternal() { RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_); ret = PreRun(current_data); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); - CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); - continue, "PreRun failed."); // [No need to check value] + ret != SUCCESS, (void)HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); + CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); + continue, "PreRun failed."); // [No need to check value] ret = executor_->Execute(args); ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); @@ -152,9 +142,7 @@ Status HybridModelAsyncExecutor::RunInternal() { return SUCCESS; } -Status HybridModelAsyncExecutor::HandleResult(Status exec_ret, - uint32_t data_id, - HybridModelExecutor::ExecuteArgs &args, +Status HybridModelAsyncExecutor::HandleResult(Status exec_ret, uint32_t data_id, HybridModelExecutor::ExecuteArgs &args, OutputData *output_data) { GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret); std::vector output_tensor_info_list; @@ -186,11 +174,8 @@ Status HybridModelAsyncExecutor::SyncVarData() { if (global_step_var != nullptr) { std::vector v_step; v_step.push_back(iterator_count_); - GE_CHK_RT_RET(rtMemcpy(global_step_var->MutableData(), - global_step_var->GetSize(), - v_step.data(), - v_step.size() * sizeof(uint64_t), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(global_step_var->MutableData(), global_step_var->GetSize(), v_step.data(), + v_step.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE)); } else { GELOGD("No GLOBAL_STEP variable was found."); } @@ -206,26 +191,21 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { auto data_size = input_tensor.GetSize(); GELOGD("To copy input data for input[%u]", input_index); if (input_index >= blobs.size()) { - GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", - blobs.size(), model_->input_nodes_.size(), input_index, data_size); + GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), + model_->input_nodes_.size(), input_index, data_size); return FAILED; } const DataBuffer &data_buf = blobs[input_index]; auto mem_size = static_cast(data_size); - GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, - PARAM_INVALID, - "input data size(%u) does not match model required size(%u), ret failed.", - data_buf.length, + GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, PARAM_INVALID, + "input data size(%u) does not match model required size(%u), ret failed.", data_buf.length, mem_size); GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%u]", model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length); - GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(), - mem_size, - data_buf.data, - data_buf.length, - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET( + rtMemcpy(input_tensor.MutableData(), mem_size, data_buf.data, data_buf.length, RT_MEMCPY_HOST_TO_DEVICE)); } return SUCCESS; @@ -240,8 +220,7 @@ Status HybridModelAsyncExecutor::InitInputTensors() { auto output_desc = input_node->op_desc->GetOutputDescPtr(kDataOutputIndex); GE_CHECK_NOTNULL(output_desc); int64_t tensor_size = 0; - GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size), - "Failed to get size from %s", + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size), "Failed to get size from %s", input_node->NodeName().c_str()); if (tensor_size == 0) { GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str()); @@ -264,24 +243,20 @@ Status HybridModelAsyncExecutor::OnComputeDone(uint32_t data_index, uint32_t res std::vector &outputs) { GELOGD("OnComputeDone. model id = %u, data index = %u, execution ret = %u", model_id_, data_index, result_code); if (listener_ != nullptr) { - GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_index, result_code, outputs), - "OnComputeDone failed"); + GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_index, result_code, outputs), "OnComputeDone failed"); } return result_code; } -Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &args, - OutputData *output_data, +Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector &outputs) { // copy output data from op to designated position std::vector &output_tensor_desc_list = args.output_desc; std::vector &output_tensors = args.outputs; if (output_tensor_desc_list.size() != output_tensors.size()) { - GELOGE(INTERNAL_ERROR, - "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu", - output_tensor_desc_list.size(), - output_tensors.size()); + GELOGE(INTERNAL_ERROR, "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu", + output_tensor_desc_list.size(), output_tensors.size()); return INTERNAL_ERROR; } @@ -292,29 +267,23 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a auto &tensor_desc = output_tensor_desc_list.at(i); GE_CHECK_NOTNULL(tensor_desc); int64_t output_size = -1; - GE_CHK_GRAPH_STATUS_RET(TensorUtils::CalcTensorMemSize(tensor_desc->GetShape(), - tensor_desc->GetFormat(), - tensor_desc->GetDataType(), - output_size), - "Failed to calc tensor size for output[%zu]. shape = [%s], type = %s, format = %s", - i, + GE_CHK_GRAPH_STATUS_RET(TensorUtils::CalcTensorMemSize(tensor_desc->GetShape(), tensor_desc->GetFormat(), + tensor_desc->GetDataType(), output_size), + "Failed to calc tensor size for output[%zu]. shape = [%s], type = %s, format = %s", i, tensor_desc->GetShape().ToString().c_str(), TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str()); - GELOGD("Got tensor size for output[%zu] successfully. shape = [%s], type = %s, format = %s, size = %ld", - i, + GELOGD("Got tensor size for output[%zu] successfully. shape = [%s], type = %s, format = %s, size = %ld", i, tensor_desc->GetShape().ToString().c_str(), TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), - TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str(), - output_size); + TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str(), output_size); GE_CHECK_GE(output_size, 0); GE_CHECK_LE(output_size, UINT32_MAX); if (output_tensor.GetSize() < static_cast(output_size)) { - GELOGE(INTERNAL_ERROR, - "output[%zu] tensor size(%zu) is not enough for output shape [%s]", - i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str()); + GELOGE(INTERNAL_ERROR, "output[%zu] tensor size(%zu) is not enough for output shape [%s]", i, + output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str()); return INTERNAL_ERROR; } @@ -323,13 +292,10 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a output.dims = tensor_desc->GetShape().GetDims(); output.length = output_size; if (output_size > 0) { - std::unique_ptr data_buf(new(std::nothrow) uint8_t[output_size]); + std::unique_ptr data_buf(new (std::nothrow) uint8_t[output_size]); GE_CHECK_NOTNULL(data_buf); - GE_CHK_RT_RET(rtMemcpy(data_buf.get(), - output_size, - output_tensor.GetData(), - output_size, - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET( + rtMemcpy(data_buf.get(), output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); output.data = std::move(data_buf); output_data->blobs.emplace_back(data_buf.get(), static_cast(output_size), false); } else { @@ -339,11 +305,9 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a } outputs.emplace_back(std::move(output)); - GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", - i, + GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i, TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), - tensor_desc->GetShape().ToString().c_str(), - output_size); + tensor_desc->GetShape().ToString().c_str(), output_size); } return SUCCESS; @@ -387,9 +351,7 @@ Status HybridModelAsyncExecutor::Execute(const vector &inputs, vector< } ge_tensor.MutableTensorDesc() = *args.output_desc[out_index]; - GELOGD("Set output[%d], tensor size = %ld, shape = [%s]", - out_index, - out_tensor_info.length, + GELOGD("Set output[%d], tensor size = %ld, shape = [%s]", out_index, out_tensor_info.length, ge_tensor.MutableTensorDesc().MutableShape().ToString().c_str()); ++out_index; } diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 8de2beb6..195f79a9 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -54,13 +54,10 @@ class HybridModelAsyncExecutor { Status SyncVarData(); - Status HandleResult(Status exec_ret, - uint32_t data_id, - HybridModelExecutor::ExecuteArgs &args, + Status HandleResult(Status exec_ret, uint32_t data_id, HybridModelExecutor::ExecuteArgs &args, OutputData *output_data); - Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, - OutputData *output_data, + Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector &outputs); Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector &outputs); @@ -85,4 +82,4 @@ class HybridModelAsyncExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_MODEL_HYBRID_MODEL_ASYNC_EXECUTOR_H_ +#endif // GE_HYBRID_EXECUTOR_MODEL_HYBRID_MODEL_ASYNC_EXECUTOR_H_ diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 4af34451..718801b4 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -23,14 +23,13 @@ namespace hybrid { namespace { const int kIntBase = 10; const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; -} // namespace +} // namespace HybridModelExecutor::HybridModelExecutor(HybridModel *model, uint32_t device_id, rtStream_t stream) - : model_(model), device_id_(device_id), stream_(stream) { -} + : model_(model), device_id_(device_id), stream_(stream) {} HybridModelExecutor::~HybridModelExecutor() { if (context_.rt_gen_context != nullptr) { - (void) rtCtxDestroy(context_.rt_gen_context); + (void)rtCtxDestroy(context_.rt_gen_context); } } @@ -62,8 +61,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { return SUCCESS; } -Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, - HybridModelExecutor::ExecuteArgs &args) { +Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, HybridModelExecutor::ExecuteArgs &args) { RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); @@ -98,15 +96,15 @@ Status HybridModelExecutor::InitExecutionContext() { GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id); context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_); GE_CHECK_NOTNULL(context_.allocator); - context_.callback_manager = std::unique_ptr(new(std::nothrow)CallbackManager(stream_)); + context_.callback_manager = std::unique_ptr(new (std::nothrow) CallbackManager(stream_)); GE_CHECK_NOTNULL(context_.callback_manager); context_.dump_properties = PropertiesManager::Instance().GetDumpProperties(context_.session_id); const char *profiling_level = std::getenv(kEnvProfilingLevel); if (profiling_level != nullptr) { context_.profiling_level = std::strtol(profiling_level, nullptr, kIntBase); - GELOGD("Got profiling level = %ld", context_.profiling_level); + GELOGD("Got profiling level = %d", context_.profiling_level); if (context_.profiling_level > 0) { - context_.profiler.reset(new(std::nothrow)HybridProfiler()); + context_.profiler.reset(new (std::nothrow) HybridProfiler()); GE_CHECK_NOTNULL(context_.profiler); } } diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 04aef6a5..2d1320a2 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -39,9 +39,7 @@ class HybridModelExecutor { Status Init(); - const GraphExecutionContext* GetContext() const { - return &context_; - } + const GraphExecutionContext *GetContext() const { return &context_; } Status Execute(ExecuteArgs &args); @@ -58,4 +56,4 @@ class HybridModelExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ +#endif // GE_HYBRID_EXECUTOR_HYBRID_MODEL_EXECUTOR_H_ diff --git a/ge/hybrid/executor/hybrid_profiler.cc b/ge/hybrid/executor/hybrid_profiler.cc index 7228197f..0150934e 100644 --- a/ge/hybrid/executor/hybrid_profiler.cc +++ b/ge/hybrid/executor/hybrid_profiler.cc @@ -28,11 +28,9 @@ const int kMaxEvents = 10000; const int kEventDescMax = 256; const int kMaxEventTypes = 8; const int kIndent = 8; -} +} // namespace -HybridProfiler::HybridProfiler(): counter_(0) { - Reset(); -} +HybridProfiler::HybridProfiler() : counter_(0) { Reset(); } void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) { va_list args; @@ -76,8 +74,8 @@ void HybridProfiler::Dump(std::ostream &output_stream) { auto end_dump = std::chrono::system_clock::now(); auto elapsed_dump = std::chrono::duration_cast(end_dump - start).count(); auto cost_dump = std::chrono::duration_cast(end_dump - start_dump).count(); - output_stream << std::setw(kIndent) << elapsed_dump << "\t\t" << cost_dump - << "\t\t" << "[Dump profiling]" << std::endl; + output_stream << std::setw(kIndent) << elapsed_dump << "\t\t" << cost_dump << "\t\t" + << "[Dump profiling]" << std::endl; events_.clear(); } diff --git a/ge/hybrid/executor/hybrid_profiler.h b/ge/hybrid/executor/hybrid_profiler.h index 62ef9c73..6f6794f4 100644 --- a/ge/hybrid/executor/hybrid_profiler.h +++ b/ge/hybrid/executor/hybrid_profiler.h @@ -57,4 +57,4 @@ class HybridProfiler { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_HYBRID_PROFILER_H_ +#endif // GE_HYBRID_EXECUTOR_HYBRID_PROFILER_H_ diff --git a/ge/hybrid/executor/node_done_manager.cc b/ge/hybrid/executor/node_done_manager.cc index c0b0b17b..de4ea14e 100644 --- a/ge/hybrid/executor/node_done_manager.cc +++ b/ge/hybrid/executor/node_done_manager.cc @@ -25,8 +25,7 @@ constexpr int kDefaultWaitTimeoutInSec = 60 * 10; } bool NodeDoneManager::Cond::Await() { std::unique_lock lk(cond_mu_); - if (!cv_.wait_for(lk, - std::chrono::seconds(kDefaultWaitTimeoutInSec), + if (!cv_.wait_for(lk, std::chrono::seconds(kDefaultWaitTimeoutInSec), [&]() { return is_released_ || is_cancelled_; })) { GELOGE(INTERNAL_ERROR, "Wait timed out."); return false; diff --git a/ge/hybrid/executor/node_done_manager.h b/ge/hybrid/executor/node_done_manager.h index faf12b46..f1fdfbec 100644 --- a/ge/hybrid/executor/node_done_manager.h +++ b/ge/hybrid/executor/node_done_manager.h @@ -40,6 +40,7 @@ class NodeDoneManager { void Release(); void Cancel(); bool Await(); + private: std::mutex cond_mu_; std::condition_variable cv_; @@ -55,4 +56,4 @@ class NodeDoneManager { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_NODE_DONE_COND_MANAGER_H_ +#endif // GE_HYBRID_EXECUTOR_NODE_DONE_COND_MANAGER_H_ diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 4f1f3fe8..e8e94c0d 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -27,31 +27,23 @@ namespace { // 5s * 120, wait for 10m constexpr auto kWaitInternal = 5; constexpr auto kMaxWaitTimes = 120; -} +} // namespace ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(node_item) { this->num_pending_shapes_ = node_item.num_inputs - node_item.num_static_input_shapes; - GELOGD("[%s] ShapeInferenceState created, pending shape count = %d", - node_item.NodeName().c_str(), + GELOGD("[%s] ShapeInferenceState created, pending shape count = %d", node_item.NodeName().c_str(), this->num_pending_shapes_); } -void ShapeInferenceState::UpdateInputShape(uint32_t idx, - const GeShape &ori_shape, - const GeShape &shape) { +void ShapeInferenceState::UpdateInputShape(uint32_t idx, const GeShape &ori_shape, const GeShape &shape) { if (!node_item.is_dynamic || node_item.is_input_shape_static[idx]) { GELOGD("[%s] Trying to update static shape, idx = %u. old shape = [%s], new shape = [%s]", - node_item.NodeName().c_str(), - idx, - node_item.op_desc->MutableInputDesc(idx)->GetShape().ToString().c_str(), + node_item.NodeName().c_str(), idx, node_item.op_desc->MutableInputDesc(idx)->GetShape().ToString().c_str(), shape.ToString().c_str()); return; } - GELOGD("[%s] Update input shape [%u] with Shape: [%s] and OriginalShape: [%s]", - node_item.NodeName().c_str(), - idx, - shape.ToString().c_str(), - ori_shape.ToString().c_str()); + GELOGD("[%s] Update input shape [%u] with Shape: [%s] and OriginalShape: [%s]", node_item.NodeName().c_str(), idx, + shape.ToString().c_str(), ori_shape.ToString().c_str()); std::lock_guard lk(mu_); node_item.op_desc->MutableInputDesc(idx)->SetShape(shape); @@ -109,17 +101,12 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex GeShape shape; GeShape ori_shape; RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); - GE_CHK_STATUS_RET(future.Get(ori_shape, shape), - "[%s] Get shape failed. index = %u", - node_item.NodeName().c_str(), + GE_CHK_STATUS_RET(future.Get(ori_shape, shape), "[%s] Get shape failed. index = %u", node_item.NodeName().c_str(), idx); RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); - GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", - node_item.NodeName().c_str(), - idx, - shape.ToString().c_str(), - ori_shape.ToString().c_str()); + GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", node_item.NodeName().c_str(), idx, + shape.ToString().c_str(), ori_shape.ToString().c_str()); node_item.op_desc->MutableInputDesc(idx)->SetShape(std::move(shape)); node_item.op_desc->MutableInputDesc(idx)->SetOriginShape(ori_shape); } @@ -127,11 +114,8 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex return SUCCESS; } -ShapeFuture::ShapeFuture(NodePtr src_node, - uint32_t src_index, - SubgraphContext *subgraph_context) - : src_node_(std::move(src_node)), src_index_(src_index), subgraph_context_(subgraph_context) { -} +ShapeFuture::ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context) + : src_node_(std::move(src_node)), src_index_(src_index), subgraph_context_(subgraph_context) {} NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_context) : node_item_(&node_item), shape_inference_state_(node_item), subgraph_context_(subgraph_context) { @@ -140,21 +124,16 @@ NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_contex Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { for (auto &src_node : node_item_->dependents_for_execution) { - GELOGI("[%s] Start to wait for data dependent node: [%s]", - node_item_->NodeName().c_str(), + GELOGI("[%s] Start to wait for data dependent node: [%s]", node_item_->NodeName().c_str(), src_node->GetName().c_str()); - RECORD_EXECUTION_EVENT(&context, - node_item_->NodeName().c_str(), - "[AwaitNodeDone] [%s] Start", + RECORD_EXECUTION_EVENT(&context, node_item_->NodeName().c_str(), "[AwaitNodeDone] [%s] Start", src_node->GetName().c_str()); if (!subgraph_context_->Await(src_node)) { GELOGE(INTERNAL_ERROR, "[%s] Await node [%s] failed.", GetName().c_str(), src_node->GetName().c_str()); return INTERNAL_ERROR; } - RECORD_EXECUTION_EVENT(&context, - node_item_->NodeName().c_str(), - "[AwaitNodeDone] [%s] End", + RECORD_EXECUTION_EVENT(&context, node_item_->NodeName().c_str(), "[AwaitNodeDone] [%s] End", src_node->GetName().c_str()); GELOGI("[%s] Done waiting node.", src_node->GetName().c_str()); } @@ -165,8 +144,7 @@ Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { Status NodeState::WaitForPrepareDone() { if (prepare_future_.valid()) { GELOGD("[%s] Start to wait for prepare future.", GetName().c_str()); - GE_CHK_STATUS_RET(prepare_future_.get(), - "[%s] PreRun failed.", GetName().c_str()); + GE_CHK_STATUS_RET(prepare_future_.get(), "[%s] PreRun failed.", GetName().c_str()); } return SUCCESS; diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 6ca714bb..73e0f75c 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -66,39 +66,23 @@ struct NodeState { NodeState(const NodeItem &node_item, SubgraphContext *subgraph_context); ~NodeState() = default; - OpDesc *GetOpDesc() const { - return op_desc_.get(); - } + OpDesc *GetOpDesc() const { return op_desc_.get(); } - inline const NodeItem *GetNodeItem() const { - return node_item_; - } + inline const NodeItem *GetNodeItem() const { return node_item_; } - inline const string &GetName() const { - return node_item_->NodeName(); - } + inline const string &GetName() const { return node_item_->NodeName(); } - inline const string &GetType() const { - return node_item_->NodeType(); - } + inline const string &GetType() const { return node_item_->NodeType(); } - ShapeInferenceState &GetShapeInferenceState() { - return shape_inference_state_; - } + ShapeInferenceState &GetShapeInferenceState() { return shape_inference_state_; } - const shared_ptr &GetKernelTask() const { - return kernel_task_; - } + const shared_ptr &GetKernelTask() const { return kernel_task_; } - void SetKernelTask(const shared_ptr &kernel_task) { - kernel_task_ = kernel_task; - } + void SetKernelTask(const shared_ptr &kernel_task) { kernel_task_ = kernel_task; } Status WaitForPrepareDone(); - void SetPrepareFuture(std::future &&prepare_future) { - this->prepare_future_ = std::move(prepare_future); - } + void SetPrepareFuture(std::future &&prepare_future) { this->prepare_future_ = std::move(prepare_future); } Status AwaitInputTensors(GraphExecutionContext &context) const; @@ -116,4 +100,4 @@ using NodeStatePtr = std::shared_ptr; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_NODE_STATE_H_ +#endif // GE_HYBRID_EXECUTOR_NODE_STATE_H_ diff --git a/ge/hybrid/executor/rt_callback_manager.cc b/ge/hybrid/executor/rt_callback_manager.cc index 63eb46d5..c1c98f73 100644 --- a/ge/hybrid/executor/rt_callback_manager.cc +++ b/ge/hybrid/executor/rt_callback_manager.cc @@ -21,24 +21,16 @@ namespace ge { namespace hybrid { -CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) { -} +CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) {} Status CallbackManager::RegisterCallback(rtCallback_t callback, void *user_data) { GELOGD("To register callback"); rtEvent_t event = nullptr; GE_CHK_RT_RET(rtEventCreate(&event)); - auto rt_ret = rtEventRecord(event, stream_); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Failed to invoke rtEventRecord, error code = %d", rt_ret); - (void) rtEventDestroy(event); - return RT_FAILED; - } - + GE_CHK_RT_RET(rtEventRecord(event, stream_)); auto cb = std::pair(callback, user_data); auto entry = std::pair>(event, std::move(cb)); if (!callback_queue_.Push(entry)) { - (void) rtEventDestroy(event); return INTERNAL_ERROR; } @@ -49,9 +41,7 @@ Status CallbackManager::RegisterCallback(rtCallback_t callback, void *user_data) Status CallbackManager::Init() { rtContext_t ctx = nullptr; GE_CHK_RT_RET(rtCtxGetCurrent(&ctx)); - ret_future_ = std::async([&](rtContext_t context) ->Status { - return CallbackProcess(context); - }, ctx); + ret_future_ = std::async([&](rtContext_t context) -> Status { return CallbackProcess(context); }, ctx); if (!ret_future_.valid()) { GELOGE(INTERNAL_ERROR, "Failed to init callback manager."); return INTERNAL_ERROR; @@ -113,7 +103,7 @@ void CallbackManager::RtCallbackFunc(void *data) { } Status CallbackManager::RegisterCallback(const std::function &callback) { - auto func = std::unique_ptr>(new(std::nothrow) std::function(callback)); + auto func = std::unique_ptr>(new (std::nothrow) std::function(callback)); GE_CHECK_NOTNULL(func); GELOGD("Callback registered"); return RegisterCallback(RtCallbackFunc, func.release()); diff --git a/ge/hybrid/executor/rt_callback_manager.h b/ge/hybrid/executor/rt_callback_manager.h index 1d1fa1cc..f102d660 100644 --- a/ge/hybrid/executor/rt_callback_manager.h +++ b/ge/hybrid/executor/rt_callback_manager.h @@ -52,4 +52,4 @@ class CallbackManager { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_RT_CALLBACK_MANAGER_H_ +#endif // GE_HYBRID_EXECUTOR_RT_CALLBACK_MANAGER_H_ diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index 923c2aa3..5d94efa2 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,16 +20,12 @@ namespace ge { namespace hybrid { -SubgraphContext::SubgraphContext(const GraphItem *graph_item) : graph_item_(graph_item) { - -} +SubgraphContext::SubgraphContext(const GraphItem *graph_item) : graph_item_(graph_item) {} Status SubgraphContext::Init() { GE_CHECK_NOTNULL(graph_item_); - GELOGD("[%s] Start to init subgraph context. total inputs = %d, total outputs = %d", - graph_item_->GetName().c_str(), - graph_item_->TotalInputs(), - graph_item_->TotalOutputs()); + GELOGD("[%s] Start to init subgraph context. total inputs = %d, total outputs = %d", graph_item_->GetName().c_str(), + graph_item_->TotalInputs(), graph_item_->TotalOutputs()); all_inputs_.resize(static_cast(graph_item_->TotalInputs())); all_outputs_.resize(static_cast(graph_item_->TotalOutputs())); @@ -40,7 +36,7 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) { std::lock_guard lk(mu_); auto &node_state = node_states_[node_item]; if (node_state == nullptr) { - node_state.reset(new(std::nothrow)NodeState(*node_item, this)); + node_state.reset(new (std::nothrow) NodeState(*node_item, this)); } return node_state; @@ -48,9 +44,7 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) { Status SubgraphContext::SetInput(int index, const TensorValue &tensor) { if (static_cast(index) >= all_inputs_.size()) { - GELOGE(INTERNAL_ERROR, - "output index output range. all input num = %zu, input index = %d", - all_inputs_.size(), + GELOGE(INTERNAL_ERROR, "output index output range. all input num = %zu, input index = %d", all_inputs_.size(), index); return INTERNAL_ERROR; } @@ -66,11 +60,8 @@ Status SubgraphContext::SetInput(const NodeItem &node_item, int input_index, con Status SubgraphContext::SetOutput(const NodeItem &node_item, int output_index, const TensorValue &tensor) { auto index = node_item.output_start + output_index; if ((output_index >= node_item.num_outputs) || (static_cast(index) >= all_outputs_.size())) { - GELOGE(INTERNAL_ERROR, - "output index output range. all output num = %zu, node_item = %s, output index = %d", - all_outputs_.size(), - node_item.DebugString().c_str(), - output_index); + GELOGE(INTERNAL_ERROR, "output index output range. all output num = %zu, node_item = %s, output index = %d", + all_outputs_.size(), node_item.DebugString().c_str(), output_index); return INTERNAL_ERROR; } @@ -93,10 +84,8 @@ Status SubgraphContext::GetOutputs(std::vector &outputs) { for (int i = 0; i < output_node->num_inputs; ++i) { TensorValue tensor; GE_CHK_STATUS_RET_NOLOG(GetInput(output_node->input_start + i, tensor)); - GELOGD("[%s] Adding output tensor by input index [%d], tensor = %s", - graph_item_->GetName().c_str(), - output_node->input_start + i, - tensor.DebugString().c_str()); + GELOGD("[%s] Adding output tensor by input index [%d], tensor = %s", graph_item_->GetName().c_str(), + output_node->input_start + i, tensor.DebugString().c_str()); outputs.emplace_back(std::move(tensor)); } } @@ -111,17 +100,13 @@ Status SubgraphContext::GetOutputs(std::vector &outputs) { return SUCCESS; } -bool SubgraphContext::Await(const NodePtr &node) { - return node_done_manager_.Await(node); -} +bool SubgraphContext::Await(const NodePtr &node) { return node_done_manager_.Await(node); } void SubgraphContext::OnError(Status error) { GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str()); node_done_manager_.Destroy(); } -void SubgraphContext::NodeDone(const NodePtr &node) { - node_done_manager_.NodeDone(node); -} +void SubgraphContext::NodeDone(const NodePtr &node) { node_done_manager_.NodeDone(node); } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index b86765f7..fd934d80 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,4 +58,4 @@ class SubgraphContext { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_ITERATION_CONTEXT_H_ +#endif // GE_HYBRID_EXECUTOR_ITERATION_CONTEXT_H_ diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 573e405e..c76bb209 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -15,7 +15,6 @@ */ #include "hybrid/executor/subgraph_executor.h" -#include "graph/ge_context.h" #include "hybrid/executor/worker/task_compile_engine.h" #include "hybrid/executor/worker/execution_engine.h" #include "hybrid/node_executor/node_executor.h" @@ -25,31 +24,27 @@ namespace hybrid { namespace { constexpr int kDefaultThreadNum = 4; constexpr int kDataInputIndex = 0; -} +} // namespace SubgraphExecutor::SubgraphExecutor(const GraphItem *graph_item, GraphExecutionContext *context, bool force_infer_shape) : graph_item_(graph_item), context_(context), force_infer_shape_(force_infer_shape), - pre_run_pool_(kDefaultThreadNum) { -} + pre_run_pool_(kDefaultThreadNum) {} -SubgraphExecutor::~SubgraphExecutor() { - GELOGD("[%s] SubgraphExecutor destroyed.", graph_item_->GetName().c_str()); -} +SubgraphExecutor::~SubgraphExecutor() { GELOGD("[%s] SubgraphExecutor destroyed.", graph_item_->GetName().c_str()); } Status SubgraphExecutor::Init(const std::vector &inputs, const std::vector &input_desc) { - subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_)); + subgraph_context_.reset(new (std::nothrow) SubgraphContext(graph_item_)); GE_CHECK_NOTNULL(subgraph_context_); GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str()); - shape_inference_engine_.reset(new(std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get())); + shape_inference_engine_.reset(new (std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get())); GE_CHECK_NOTNULL(shape_inference_engine_); if (graph_item_->IsDynamic()) { - GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc), - "[%s] Failed to set inputs.", + GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc), "[%s] Failed to set inputs.", graph_item_->GetName().c_str()); } else { GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs), @@ -78,16 +73,11 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetName().c_str(), - i, - input_node->input_start, - input_tensor.DebugString().c_str()); + GELOGD("[%s] Set input tensor[%zu] to inputs with index = %d, tensor = %s", graph_item_->GetName().c_str(), i, + input_node->input_start, input_tensor.DebugString().c_str()); GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor), - "[%s] Failed to set input tensor[%zu]", - graph_item_->GetName().c_str(), - i); + "[%s] Failed to set input tensor[%zu]", graph_item_->GetName().c_str(), i); if (force_infer_shape_ || input_node->is_dynamic) { GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i); @@ -110,20 +100,15 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector if (static_cast(parent_input_index) >= inputs.size()) { GELOGE(INTERNAL_ERROR, "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs", - graph_item_->GetName().c_str(), - inputs.size(), - parent_input_index + 1); + graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1); return INTERNAL_ERROR; } auto &input_tensor = inputs[parent_input_index]; subgraph_context_->SetInput(static_cast(i), input_tensor); - GELOGD("[%s] Set input tensor[%zu] with inputs with index = %d, tensor = %s", - graph_item_->GetName().c_str(), - i, - parent_input_index, - input_tensor.DebugString().c_str()); + GELOGD("[%s] Set input tensor[%zu] with inputs with index = %d, tensor = %s", graph_item_->GetName().c_str(), i, + parent_input_index, input_tensor.DebugString().c_str()); } return SUCCESS; @@ -146,9 +131,7 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector &inputs, Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector &inputs) { GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); if (graph_item_->GetAllNodes().size() != 1) { - GELOGE(INTERNAL_ERROR, - "[%s] Invalid known shape subgraph. node size = %zu", - graph_item_->GetName().c_str(), + GELOGE(INTERNAL_ERROR, "[%s] Invalid known shape subgraph. node size = %zu", graph_item_->GetName().c_str(), graph_item_->GetAllNodes().size()); return INTERNAL_ERROR; } @@ -163,8 +146,7 @@ Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vectorGetName().c_str(), + "[%s] Failed to execute node [%s] for known subgraph.", graph_item_->GetName().c_str(), known_shape_task_context_->GetNodeName()); GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str()); @@ -181,19 +163,16 @@ Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) { input_desc.emplace_back(task_context.GetInputDesc(i)); } - GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), - "[%s] Failed to execute subgraph.", + GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), "[%s] Failed to execute subgraph.", graph_item_->GetName().c_str()); - GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context), - "[%s] Failed to set output shapes to parent node.", + GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context), "[%s] Failed to set output shapes to parent node.", graph_item_->GetName().c_str()); return SUCCESS; } Status SubgraphExecutor::PrepareNodes() { - GELOGD("[%s] Start to prepare nodes. force infer shape = %s.", - graph_item_->GetName().c_str(), + GELOGD("[%s] Start to prepare nodes. force infer shape = %s.", graph_item_->GetName().c_str(), force_infer_shape_ ? "true" : "false"); auto &all_nodes = graph_item_->GetAllNodes(); for (auto all_node : all_nodes) { @@ -221,7 +200,6 @@ Status SubgraphExecutor::PrepareNodes() { // only do shape inference and compilation for nodes with dynamic shapes. if (node_item.is_dynamic) { auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status { - GetContext().SetSessionId(context_->session_id); GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state)); return PrepareForExecution(context_, *p_node_state); }); @@ -231,8 +209,8 @@ Status SubgraphExecutor::PrepareNodes() { GELOGD("[%s] Skipping shape inference and compilation for node with static shape.", node_item.NodeName().c_str()); if (node_item.kernel_task == nullptr) { GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str()); - GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), - "[%s] Failed to create task.", p_node_state->GetName().c_str()); + GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), "[%s] Failed to create task.", + p_node_state->GetName().c_str()); } else { node_state->SetKernelTask(node_item.kernel_task); } @@ -252,18 +230,18 @@ Status SubgraphExecutor::PrepareNodes() { Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) { const auto &node_item = *node_state.GetNodeItem(); - GE_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), - "[%s] Failed to InferShape.", node_state.GetName().c_str()); - GE_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), - "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str()); + GE_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), "[%s] Failed to InferShape.", + node_state.GetName().c_str()); + GE_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), "[%s] Failed to PropagateOutputShapes.", + node_state.GetName().c_str()); return SUCCESS; } Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state) { auto &node_item = *node_state.GetNodeItem(); if (node_item.kernel_task == nullptr) { - GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx), - "Failed to create task for node[%s]", node_state.GetName().c_str()); + GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx), "Failed to create task for node[%s]", + node_state.GetName().c_str()); } else { node_state.SetKernelTask(node_item.kernel_task); } @@ -298,8 +276,7 @@ Status SubgraphExecutor::LaunchTasks() { task_context->SetForceInferShape(force_infer_shape_); auto shared_task_context = std::shared_ptr(task_context.release()); GE_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_), - "[%s] Execute node failed.", - node_state->GetName().c_str()); + "[%s] Execute node failed.", node_state->GetName().c_str()); GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str()); } @@ -308,7 +285,6 @@ Status SubgraphExecutor::LaunchTasks() { Status SubgraphExecutor::ScheduleTasks() { GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str()); auto prepare_future = std::async([&]() -> Status { - GetContext().SetSessionId(context_->session_id); auto ret = PrepareNodes(); ready_queue_.Push(nullptr); return ret; @@ -325,29 +301,22 @@ Status SubgraphExecutor::ScheduleTasks() { return ret; } - GE_CHK_STATUS_RET(prepare_future.get(), - "[%s] Error occurred in task preparation.", - graph_item_->GetName().c_str()); + GE_CHK_STATUS_RET(prepare_future.get(), "[%s] Error occurred in task preparation.", graph_item_->GetName().c_str()); GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str()); return SUCCESS; } -Status SubgraphExecutor::GetOutputs(vector &outputs) { - return subgraph_context_->GetOutputs(outputs); -} +Status SubgraphExecutor::GetOutputs(vector &outputs) { return subgraph_context_->GetOutputs(outputs); } Status SubgraphExecutor::GetOutputs(vector &outputs, std::vector &output_desc) { GE_CHK_STATUS_RET(GetOutputs(outputs), "[%s] Failed to get output tensors.", graph_item_->GetName().c_str()); // copy output data from op to designated position - GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc), - "[%s] Failed to get output tensor desc.", + GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc), "[%s] Failed to get output tensor desc.", graph_item_->GetName().c_str()); if (outputs.size() != output_desc.size()) { - GELOGE(INTERNAL_ERROR, - "Number of output tensors(%zu) mismatch number of output tensor desc(%zu).", - outputs.size(), + GELOGE(INTERNAL_ERROR, "Number of output tensors(%zu) mismatch number of output tensor desc(%zu).", outputs.size(), output_desc.size()); return INTERNAL_ERROR; } @@ -365,18 +334,14 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { // get output tensors and tensor desc list std::vector outputs; std::vector output_desc_list; - GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), - "[%s] Failed to get output tensors.", + GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), "[%s] Failed to get output tensors.", graph_item_->GetName().c_str()); - GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list), - "[%s] Failed to get output tensor desc.", + GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list), "[%s] Failed to get output tensor desc.", graph_item_->GetName().c_str()); if (outputs.size() != output_desc_list.size()) { GELOGE(INTERNAL_ERROR, "[%s] num output tensors = %zu, num output tensor desc = %zu", - graph_item_->GetName().c_str(), - outputs.size(), - output_desc_list.size()); + graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size()); return INTERNAL_ERROR; } @@ -385,14 +350,9 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { int parent_output_index = graph_item_->GetParentOutputIndex(i); GE_CHECK_GE(parent_output_index, 0); // update tensor - GELOGD("[%s] Updating output[%zu] to parent output[%d]", - graph_item_->GetName().c_str(), - i, - parent_output_index); - - GELOGD("[%s] Updating output tensor, index = %d, tensor = %s", - graph_item_->GetName().c_str(), - parent_output_index, + GELOGD("[%s] Updating output[%zu] to parent output[%d]", graph_item_->GetName().c_str(), i, parent_output_index); + + GELOGD("[%s] Updating output tensor, index = %d, tensor = %s", graph_item_->GetName().c_str(), parent_output_index, outputs[i].DebugString().c_str()); GE_CHK_STATUS_RET(task_context.SetOutput(parent_output_index, outputs[i])); @@ -402,17 +362,12 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { const auto &output_desc = output_desc_list[i]; auto parent_output_desc = task_context.MutableOutputDesc(parent_output_index); GE_CHECK_NOTNULL(parent_output_desc); - GELOGD("[%s] Updating output shape[%d] from [%s] to [%s]", - graph_item_->GetName().c_str(), - parent_output_index, - parent_output_desc->MutableShape().ToString().c_str(), - output_desc->GetShape().ToString().c_str()); + GELOGD("[%s] Updating output shape[%d] from [%s] to [%s]", graph_item_->GetName().c_str(), parent_output_index, + parent_output_desc->MutableShape().ToString().c_str(), output_desc->GetShape().ToString().c_str()); parent_output_desc->SetShape(output_desc->GetShape()); - GELOGD("[%s] Updating output original shape[%d] from [%s] to [%s]", - graph_item_->GetName().c_str(), - parent_output_index, - parent_output_desc->GetOriginShape().ToString().c_str(), + GELOGD("[%s] Updating output original shape[%d] from [%s] to [%s]", graph_item_->GetName().c_str(), + parent_output_index, parent_output_desc->GetOriginShape().ToString().c_str(), output_desc->GetOriginShape().ToString().c_str()); parent_output_desc->SetOriginShape(output_desc->GetOriginShape()); } diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index d1949947..7cdb2070 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -77,8 +77,7 @@ class SubgraphExecutor { private: static Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); - Status Init(const std::vector &inputs, - const std::vector &input_desc); + Status Init(const std::vector &inputs, const std::vector &input_desc); Status InitInputsForUnknownShape(const std::vector &inputs, const std::vector &input_desc); Status InitInputsForKnownShape(const std::vector &inputs); @@ -99,4 +98,4 @@ class SubgraphExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_EXECUTOR_SUBGRAPH_EXECUTOR_H_ +#endif // GE_HYBRID_EXECUTOR_EXECUTOR_SUBGRAPH_EXECUTOR_H_ diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 7dc65433..1eb73e41 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -18,14 +18,10 @@ #include "graph/runtime_inference_context.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/tensor_adapter.h" -#include "graph/debug/ge_attr_define.h" #include "hybrid/node_executor/node_executor.h" #include "common/dump/dump_manager.h" #include "common/dump/dump_op.h" #include "common/types.h" -#include "common/ge_types.h" -#include "common/profiling/profiling_manager.h" -#include "runtime/base.h" namespace ge { namespace hybrid { @@ -38,11 +34,8 @@ Status LogInputs(const NodeItem &node_item, const TaskContext &task_context) { GE_CHECK_NOTNULL(input_tensor); const auto &tensor_desc = node_item.op_desc->MutableInputDesc(i); GE_CHECK_NOTNULL(tensor_desc); - GELOGD("[%s] Print task args. input[%d] = %s, shape = [%s]", - node_item.NodeName().c_str(), - i, - input_tensor->DebugString().c_str(), - tensor_desc->MutableShape().ToString().c_str()); + GELOGD("[%s] Print task args. input[%d] = %s, shape = [%s]", node_item.NodeName().c_str(), i, + input_tensor->DebugString().c_str(), tensor_desc->MutableShape().ToString().c_str()); } return SUCCESS; @@ -54,11 +47,8 @@ Status LogOutputs(const NodeItem &node_item, const TaskContext &task_context) { GE_CHECK_NOTNULL(output_tensor); const auto &tensor_desc = node_item.op_desc->MutableOutputDesc(i); GE_CHECK_NOTNULL(tensor_desc); - GELOGD("[%s] Print task args. output[%d] = %s, shape = [%s]", - node_item.NodeName().c_str(), - i, - output_tensor->DebugString().c_str(), - tensor_desc->MutableShape().ToString().c_str()); + GELOGD("[%s] Print task args. output[%d] = %s, shape = [%s]", node_item.NodeName().c_str(), i, + output_tensor->DebugString().c_str(), tensor_desc->MutableShape().ToString().c_str()); } return SUCCESS; @@ -69,28 +59,21 @@ class NodeDoneCallback { NodeDoneCallback(GraphExecutionContext *graph_context, std::shared_ptr task_context); ~NodeDoneCallback() = default; Status OnNodeDone(); + private: Status PrepareConstInputs(const NodeItem &node_item); Status DumpDynamicNode(); - Status ProfilingReport(); - Status GetGraphDescInfo(const NodePtr node, const HybridModel *model, - std::vector &compute_graph_info); - Status GetTaskDescInfo(const NodePtr node, const HybridModel *model, - std::vector &task_desc_info); GraphExecutionContext *graph_context_; std::shared_ptr context_; DumpOp dump_op_; }; -NodeDoneCallback::NodeDoneCallback(GraphExecutionContext *graph_context, - std::shared_ptr task_context) - : graph_context_(graph_context), context_(std::move(task_context)) { -} +NodeDoneCallback::NodeDoneCallback(GraphExecutionContext *graph_context, std::shared_ptr task_context) + : graph_context_(graph_context), context_(std::move(task_context)) {} Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) { for (auto output_idx : node_item.to_const_output_id_list) { - RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), - "[PrepareConstInputs] [index = %d] Start", + RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), "[PrepareConstInputs] [index = %d] Start", output_idx); auto output_tensor = context_->GetOutput(output_idx); @@ -106,26 +89,18 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) { "Failed to invoke GetTensorSizeInBytes"); if (output_tensor->GetSize() < static_cast(tensor_size)) { - GELOGE(INTERNAL_ERROR, - "[%s] Tensor size is not enough. output index = %d, required size = %zu, tensor = %s", - node_item.NodeName().c_str(), - output_idx, - tensor_size, - output_tensor->DebugString().c_str()); + GELOGE(INTERNAL_ERROR, "[%s] Tensor size is not enough. output index = %d, required size = %zu, tensor = %s", + node_item.NodeName().c_str(), output_idx, tensor_size, output_tensor->DebugString().c_str()); return INTERNAL_ERROR; } vector host_buffer(static_cast(tensor_size)); - GELOGD("[%s] To cache output[%d] to host, size = %zu", - node_item.NodeName().c_str(), - output_idx, + GELOGD("[%s] To cache output[%d] to host, size = %zu", node_item.NodeName().c_str(), output_idx, output_tensor->GetSize()); - GE_CHK_RT_RET(rtMemcpy(host_buffer.data(), - tensor_size, - output_tensor->GetData(), - tensor_size, - RT_MEMCPY_DEVICE_TO_HOST)); - tensor.SetData(std::move(host_buffer)); + GE_CHK_RT_RET( + rtMemcpy(host_buffer.data(), tensor_size, output_tensor->GetData(), tensor_size, RT_MEMCPY_DEVICE_TO_HOST)); + tensor.SetData(host_buffer); + string session_id = std::to_string(context_->GetSessionId()); RuntimeInferenceContext *runtime_infer_ctx = nullptr; GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(session_id, &runtime_infer_ctx), @@ -133,134 +108,16 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) { GE_CHK_STATUS_RET(runtime_infer_ctx->SetTensor(node_item.node_id, output_idx, std::move(tensor)), "Failed to SetTensor, node = %s, output_index = %d", node_item.NodeName().c_str(), output_idx); GELOGD("[%s] Output[%d] cached successfully in session: %s. node_id = %d, shape = [%s]", - node_item.NodeName().c_str(), - output_idx, - session_id.c_str(), - node_item.node_id, + node_item.NodeName().c_str(), output_idx, session_id.c_str(), node_item.node_id, ge_tensor_desc->GetShape().ToString().c_str()); - RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), - "[PrepareConstInputs] [index = %d] End", + RECORD_CALLBACK_EVENT(graph_context_, node_item.NodeName().c_str(), "[PrepareConstInputs] [index = %d] End", output_idx); } return SUCCESS; } -Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *model, - std::vector &task_desc_info) { - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(model); - - GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str()); - auto op_desc = node->GetOpDesc(); - std::string op_name = op_desc->GetName(); - std::string dynamic_model_name = model->GetModelName(); - - uint32_t task_id = 0; - uint32_t stream_id = 0; - if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { - GELOGE(PARAM_INVALID, "Get task_id and stream_id failed."); - return PARAM_INVALID; - } - - TaskDescInfo tmp_task_desc_info; - tmp_task_desc_info.model_name = dynamic_model_name; - tmp_task_desc_info.op_name = op_name; - tmp_task_desc_info.block_dim = 0; - auto task_defs = model->GetTaskDefs(node); - if (task_defs != nullptr && (*task_defs).size() > 0) { - const auto &task_def = (*task_defs)[0]; - tmp_task_desc_info.block_dim = task_def.kernel().block_dim(); - } - tmp_task_desc_info.task_id = task_id; - tmp_task_desc_info.stream_id = stream_id; - GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]", - node->GetName().c_str(), task_id, stream_id); - task_desc_info.emplace_back(tmp_task_desc_info); - return SUCCESS; -} - -Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel *model, - std::vector &compute_graph_info) { - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(model); - - GELOGD("GetComputeGraphInfo of node [%s] start.", node->GetName().c_str()); - - std::string dynamic_model_name = model->GetModelName(); - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is nullptr."); - return PARAM_INVALID; - } - - auto op_mode = static_cast(domi::ImplyType::INVALID); - if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) && - op_mode == static_cast(domi::ImplyType::TVM)) { - ComputeGraphDescInfo tmp_compute_graph_info; - tmp_compute_graph_info.model_name = dynamic_model_name; - tmp_compute_graph_info.op_name = op_desc->GetName(); - tmp_compute_graph_info.op_type = op_desc->GetType(); - - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } - compute_graph_info.emplace_back(tmp_compute_graph_info); - GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); - } - return SUCCESS; -} - -Status NodeDoneCallback::ProfilingReport() { - auto node = context_->GetNodeItem().node; - if (node == nullptr) { - GELOGE(PARAM_INVALID, "Get node is nullptr"); - return PARAM_INVALID; - } - - const auto &op_type = node->GetType(); - if (op_type == PARTITIONEDCALL) { - return SUCCESS; - } - - GE_CHECK_NOTNULL(graph_context_); - const HybridModel *model = graph_context_->model; - GE_CHECK_NOTNULL(model); - - GELOGD("ProfilingReport of node [%s] model [%s] start.", node->GetName().c_str(), model->GetModelName().c_str()); - std::vector task_desc_info; - TaskDescInfo tmp_task_desc_info; - auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info); - if (profiling_ret != RT_ERROR_NONE) { - GELOGE(profiling_ret, "Get task info of node[%s] failed.", node->GetName().c_str()); - return profiling_ret; - } - - std::vector compute_graph_info; - profiling_ret = GetGraphDescInfo(node, model, compute_graph_info); - if (profiling_ret != RT_ERROR_NONE) { - GELOGE(profiling_ret, "Get graph info of node[%s] failed.", node->GetName().c_str()); - return profiling_ret; - } - - ProfilingManager::Instance().ReportProfilingData(task_desc_info, compute_graph_info); - return SUCCESS; -} - Status NodeDoneCallback::DumpDynamicNode() { auto node = context_->GetNodeItem().node; if (node == nullptr) { @@ -334,11 +191,6 @@ Status NodeDoneCallback::OnNodeDone() { GE_CHK_STATUS_RET(DumpDynamicNode(), "Failed to dump dynamic node"); } - if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { - GE_CHK_STATUS_RET(ProfilingReport(), "Report node[%s] to profiling failed.", - node_item.NodeName().c_str()); - } - // release inputs for (int i = 0; i < context_->NumInputs(); ++i) { context_->ReleaseInput(i); @@ -348,11 +200,10 @@ Status NodeDoneCallback::OnNodeDone() { // PropagateOutputs for type == DEPEND_COMPUTE if (node_item.shape_inference_type == DEPEND_COMPUTE) { if (graph_context_->trace_enabled) { - (void) LogOutputs(node_item, *context_); + (void)LogOutputs(node_item, *context_); } - GE_CHK_STATUS_RET(context_->PropagateOutputs(), - "[%s] Failed to propagate outputs failed", + GE_CHK_STATUS_RET(context_->PropagateOutputs(), "[%s] Failed to propagate outputs failed", node_item.NodeName().c_str()); RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[PropagateOutputs] End"); @@ -368,12 +219,11 @@ Status NodeDoneCallback::OnNodeDone() { return SUCCESS; } -Status ExecutionEngine::ExecuteAsync(NodeState &node_state, - const std::shared_ptr &task_context, +Status ExecutionEngine::ExecuteAsync(NodeState &node_state, const std::shared_ptr &task_context, GraphExecutionContext &execution_context) { GELOGI("[%s] Node is ready for execution", task_context->GetNodeName()); RECORD_EXECUTION_EVENT(&execution_context, task_context->GetNodeName(), "Start"); - auto cb = std::shared_ptr(new(std::nothrow) NodeDoneCallback(&execution_context, task_context)); + auto cb = std::shared_ptr(new (std::nothrow) NodeDoneCallback(&execution_context, task_context)); GE_CHECK_NOTNULL(cb); auto callback = [&, cb]() { auto ret = cb->OnNodeDone(); @@ -387,9 +237,7 @@ Status ExecutionEngine::ExecuteAsync(NodeState &node_state, return SUCCESS; } -Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, - TaskContext &task_context, - GraphExecutionContext &context, +Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, TaskContext &task_context, GraphExecutionContext &context, const std::function &callback) { const auto &task = node_state.GetKernelTask(); if (task == nullptr) { @@ -399,16 +247,14 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, // Wait for dependent nodes(DEPEND_COMPUTE), so that the input tensors are valid. RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[AwaitDependents] Start"); - GE_CHK_STATUS_RET(node_state.AwaitInputTensors(context), - "[%s] Failed to wait for dependent nodes.", + GE_CHK_STATUS_RET(node_state.AwaitInputTensors(context), "[%s] Failed to wait for dependent nodes.", node_state.GetName().c_str()); const auto &node_item = *node_state.GetNodeItem(); auto executor = node_item.node_executor; GE_CHECK_NOTNULL(executor); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start"); - GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), - "[%s] Failed to prepare task", + GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), "[%s] Failed to prepare task", node_state.GetName().c_str()); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End"); GELOGD("[%s] Done task preparation successfully.", node_state.GetName().c_str()); @@ -426,13 +272,10 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state, if (context.profiling_level > 0) { auto *ctx = &context; const string &name = node_state.GetName(); - (void)task_context.RegisterCallback([ctx, name]() { - RECORD_CALLBACK_EVENT(ctx, name.c_str(), "[Compute] Start"); - }); + (void)task_context.RegisterCallback([ctx, name]() { RECORD_CALLBACK_EVENT(ctx, name.c_str(), "[Compute] Start"); }); } RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] Start"); - GE_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), - "[%s] Failed to execute task", + GE_CHK_STATUS_RET(node_item.node_executor->ExecuteTask(*task, task_context, callback), "[%s] Failed to execute task", node_state.GetName().c_str()); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ExecuteTask] End"); @@ -456,29 +299,17 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const continue; } - if (input_tensor->GetData() == nullptr) { - GELOGD("[%s] Skipping null input, index = %d", task_context.GetNodeName(), i); - continue; - } - int64_t expected_size; GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, expected_size)); GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); auto size_diff = expected_size - static_cast(input_tensor->GetSize()); if (size_diff > 0) { if (size_diff <= kMaxPadding) { - GELOGW("[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", - task_context.GetNodeName(), - i, - expected_size, - input_tensor->GetSize()); + GELOGW("[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", task_context.GetNodeName(), i, + expected_size, input_tensor->GetSize()); } else { - GELOGE(INTERNAL_ERROR, - "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", - task_context.GetNodeName(), - i, - expected_size, - input_tensor->GetSize()); + GELOGE(INTERNAL_ERROR, "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu", + task_context.GetNodeName(), i, expected_size, input_tensor->GetSize()); return INTERNAL_ERROR; } } @@ -487,12 +318,10 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const return SUCCESS; } -Status ExecutionEngine::PropagateOutputs(const NodeItem &node_item, - TaskContext &task_context, +Status ExecutionEngine::PropagateOutputs(const NodeItem &node_item, TaskContext &task_context, GraphExecutionContext &context) { if (node_item.shape_inference_type != DEPEND_COMPUTE) { - GE_CHK_STATUS_RET(task_context.PropagateOutputs(), - "[%s] Failed to propagate outputs.", + GE_CHK_STATUS_RET(task_context.PropagateOutputs(), "[%s] Failed to propagate outputs.", node_item.NodeName().c_str()); RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PropagateOutputs] End"); GELOGD("[%s] Done propagating outputs successfully.", node_item.NodeName().c_str()); diff --git a/ge/hybrid/executor/worker/execution_engine.h b/ge/hybrid/executor/worker/execution_engine.h index ad80d99b..56f1557d 100644 --- a/ge/hybrid/executor/worker/execution_engine.h +++ b/ge/hybrid/executor/worker/execution_engine.h @@ -24,18 +24,15 @@ namespace ge { namespace hybrid { class ExecutionEngine { public: - static Status ExecuteAsync(NodeState &node_state, - const std::shared_ptr &task_context, + static Status ExecuteAsync(NodeState &node_state, const std::shared_ptr &task_context, GraphExecutionContext &execution_context); private: static Status ValidateInputTensors(const NodeState &node_state, const TaskContext &task_context); static Status PropagateOutputs(const NodeItem &node_item, TaskContext &task_context, GraphExecutionContext &context); - static Status DoExecuteAsync(NodeState &node_state, - TaskContext &task_context, - GraphExecutionContext &context, + static Status DoExecuteAsync(NodeState &node_state, TaskContext &task_context, GraphExecutionContext &context, const std::function &callback); }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_EXECUTOR_EXECUTION_ENGINE_H_ +#endif // GE_HYBRID_EXECUTOR_EXECUTOR_EXECUTION_ENGINE_H_ diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index f4dec60a..49a29259 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -22,19 +22,13 @@ namespace ge { namespace hybrid { ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) - : execution_context_(execution_context), - subgraph_context_(subgraph_context) { -} + : execution_context_(execution_context), subgraph_context_(subgraph_context) {} Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Wait for all input shape become valid GE_CHK_STATUS_RET_NOLOG(node_state.GetShapeInferenceState().AwaitShapesReady(*execution_context_)); auto &node_item = *node_state.GetNodeItem(); - - // Wait for "const input nodes" if node's shape inference function requires any. - // Even if output shape is static, there are cases that the const-input will be used in OpTiling and Execution - GE_CHK_STATUS_RET_NOLOG(AwaitDependentNodes(node_state)); if (node_item.is_output_shape_static) { return SUCCESS; } @@ -57,6 +51,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { } } + // Wait for "const input nodes" if node's shape inference function requires any. + GE_CHK_STATUS_RET_NOLOG(AwaitDependentNodes(node_state)); + // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); { @@ -69,17 +66,13 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { bool is_unknown_shape = false; GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), - "Failed to get shape status. node = %s", - node_item.NodeName().c_str()); + "Failed to get shape status. node = %s", node_item.NodeName().c_str()); - GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, - INTERNAL_ERROR, - "[%s] Shape is still unknown after shape inference.", + GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, INTERNAL_ERROR, "[%s] Shape is still unknown after shape inference.", node_item.NodeName().c_str()); } - GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", - node_item.NodeName().c_str(), + GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", node_item.NodeName().c_str(), node_item.DebugString().c_str()); GELOGD("[%s] InferShapeAndType finished successfully.", node_item.NodeName().c_str()); @@ -89,21 +82,15 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { Status ShapeInferenceEngine::AwaitDependentNodes(NodeState &node_state) { auto &node_item = *node_state.GetNodeItem(); for (auto &src_node : node_item.dependents_for_shape_inference) { - GELOGI("[%s] Start to wait for data dependent node: %s", - node_item.NodeName().c_str(), - src_node->GetName().c_str()); - RECORD_SHAPE_INFERENCE_EVENT(execution_context_, - node_item.NodeName().c_str(), - "[AwaitNodeDone] [%s] Start", + GELOGI("[%s] Start to wait for data dependent node: %s", node_item.NodeName().c_str(), src_node->GetName().c_str()); + RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[AwaitNodeDone] [%s] Start", src_node->GetName().c_str()); if (!subgraph_context_->Await(src_node)) { GELOGE(INTERNAL_ERROR, "[%s] Await node failed.", src_node->GetName().c_str()); return INTERNAL_ERROR; } - RECORD_SHAPE_INFERENCE_EVENT(execution_context_, - node_item.NodeName().c_str(), - "[AwaitNodeDone] [%s] End", + RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[AwaitNodeDone] [%s] End", src_node->GetName().c_str()); GELOGI("[%s] Done waiting node.", src_node->GetName().c_str()); } @@ -118,9 +105,8 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { // output shape will not be valid until compute is done. bool shape_is_future = - node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE; - GELOGD("[%s] Start to propagate output shapes. shape_type = %d", - node_item.NodeName().c_str(), + node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE; + GELOGD("[%s] Start to propagate output shapes. shape_type = %d", node_item.NodeName().c_str(), node_item.shape_inference_type); RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[PropagateOutputShapes] Start"); // propagate each output @@ -136,10 +122,8 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { auto dst_node_state = subgraph_context_->GetOrCreateNodeState(dst_node_item); GE_CHECK_NOTNULL(dst_node_state); - GELOGI("[%s] Update dst node [%s], input index = %d", - node_item.NodeName().c_str(), - dst_node_item->NodeName().c_str(), - dst_input_index_and_node.first); + GELOGI("[%s] Update dst node [%s], input index = %d", node_item.NodeName().c_str(), + dst_node_item->NodeName().c_str(), dst_input_index_and_node.first); // in case type 3 and 4, shape will be valid after computing is done if (shape_is_future) { @@ -174,8 +158,7 @@ Status ShapeInferenceEngine::InferShapeForSubgraph(const NodeItem &node_item, co GELOGD("[%s] Start to invoke InferShapeAndType", node->GetName().c_str()); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndType(node)); GELOGD("[%s] Done invoking InferShapeAndType", node->GetName().c_str()); - GE_CHK_STATUS_RET(UpdatePeerNodeShape(*node), - "[%s] Failed to update shapes of peer node.", + GE_CHK_STATUS_RET(UpdatePeerNodeShape(*node), "[%s] Failed to update shapes of peer node.", node->GetName().c_str()); } @@ -213,15 +196,13 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { } GELOGI("Peer input op desc name is %s, need to flush: shape size is %zu, datatype is %d, original datatype is %d", - peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), - output_tensor->GetShape().GetDimNum(), output_tensor->GetDataType(), - output_tensor->GetOriginDataType()); + peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), output_tensor->GetShape().GetDimNum(), + output_tensor->GetDataType(), output_tensor->GetOriginDataType()); peer_input_desc->SetOriginShape(output_tensor->GetOriginShape()); peer_input_desc->SetShape(output_tensor->GetShape()); GELOGI("Peer input op desc name is %s, shape size is %zu, datatype is %d, original datatype is %d", - peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), - peer_input_desc->GetShape().GetDimNum(), peer_input_desc->GetDataType(), - peer_input_desc->GetOriginDataType()); + peer_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_input_desc->GetShape().GetDimNum(), + peer_input_desc->GetDataType(), peer_input_desc->GetOriginDataType()); } } return SUCCESS; diff --git a/ge/hybrid/executor/worker/shape_inference_engine.h b/ge/hybrid/executor/worker/shape_inference_engine.h index 7bb9269c..f8a391e2 100644 --- a/ge/hybrid/executor/worker/shape_inference_engine.h +++ b/ge/hybrid/executor/worker/shape_inference_engine.h @@ -44,4 +44,4 @@ class ShapeInferenceEngine { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_INFERSHAPE_SHAPE_INFERENCE_ENGINE_H_ +#endif // GE_HYBRID_EXECUTOR_INFERSHAPE_SHAPE_INFERENCE_ENGINE_H_ diff --git a/ge/hybrid/executor/worker/task_compile_engine.h b/ge/hybrid/executor/worker/task_compile_engine.h index 0bc66a69..a677cb2e 100644 --- a/ge/hybrid/executor/worker/task_compile_engine.h +++ b/ge/hybrid/executor/worker/task_compile_engine.h @@ -27,4 +27,4 @@ class TaskCompileEngine { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_EXECUTOR_COMPILE_TASK_COMPILE_ENGINE_H_ +#endif // GE_HYBRID_EXECUTOR_COMPILE_TASK_COMPILE_ENGINE_H_ diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc index d696adf9..0454fa72 100755 --- a/ge/hybrid/hybrid_davinci_model.cc +++ b/ge/hybrid/hybrid_davinci_model.cc @@ -24,12 +24,9 @@ namespace ge { namespace hybrid { class HybridDavinciModel::Impl { public: - explicit Impl(GeRootModelPtr ge_model) : model_(std::move(ge_model)), executor_(&model_) { - } + explicit Impl(GeRootModelPtr ge_model) : model_(std::move(ge_model)), executor_(&model_) {} - ~Impl() { - NodeExecutorManager::GetInstance().FinalizeExecutors(); - } + ~Impl() { NodeExecutorManager::GetInstance().FinalizeExecutors(); } Status Init() { GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().EnsureInitialized(), "Failed to initialize executors"); @@ -42,21 +39,13 @@ class HybridDavinciModel::Impl { return executor_.Execute(inputs, outputs); } - Status ModelRunStart() { - return executor_.Start(listener_); - } + Status ModelRunStart() { return executor_.Start(listener_); } - Status ModelRunStop() { - return executor_.Stop(); - } + Status ModelRunStop() { return executor_.Stop(); } - Status EnqueueData(const std::shared_ptr &data) { - return executor_.EnqueueData(data); - } + Status EnqueueData(const std::shared_ptr &data) { return executor_.EnqueueData(data); } - void SetListener(const shared_ptr &listener) { - listener_ = listener; - } + void SetListener(const shared_ptr &listener) { listener_ = listener; } void SetModelId(uint32_t model_id) { executor_.SetModelId(model_id); @@ -74,12 +63,10 @@ class HybridDavinciModel::Impl { HybridModelAsyncExecutor executor_; }; -HybridDavinciModel::~HybridDavinciModel() { - delete impl_; -} +HybridDavinciModel::~HybridDavinciModel() { delete impl_; } unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { - auto instance = unique_ptr(new (std::nothrow)HybridDavinciModel()); + auto instance = unique_ptr(new (std::nothrow) HybridDavinciModel()); if (instance != nullptr) { instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model); if (instance->impl_ != nullptr) { diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h index 00a48c1e..c286a222 100644 --- a/ge/hybrid/hybrid_davinci_model.h +++ b/ge/hybrid/hybrid_davinci_model.h @@ -58,4 +58,4 @@ class HybridDavinciModel { }; } // namespace hybrid } // namespace ge -#endif // HYBRID_HYBRID_DAVINCI_MODEL_H_ +#endif // HYBRID_HYBRID_DAVINCI_MODEL_H_ diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc index b95b9efc..7bde98a3 100644 --- a/ge/hybrid/hybrid_davinci_model_stub.cc +++ b/ge/hybrid/hybrid_davinci_model_stub.cc @@ -21,36 +21,23 @@ namespace hybrid { HybridDavinciModel::~HybridDavinciModel() {} std::unique_ptr HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) { - return std::unique_ptr(new (std::nothrow)HybridDavinciModel()); + return std::unique_ptr(new (std::nothrow) HybridDavinciModel()); } -Status HybridDavinciModel::Init() { - return UNSUPPORTED; -} +Status HybridDavinciModel::Init() { return UNSUPPORTED; } -Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { - return UNSUPPORTED; -} +Status HybridDavinciModel::Execute(const vector &inputs, vector &outputs) { return UNSUPPORTED; } -Status HybridDavinciModel::ModelRunStart() { - return UNSUPPORTED; -} +Status HybridDavinciModel::ModelRunStart() { return UNSUPPORTED; } -Status HybridDavinciModel::ModelRunStop() { - return UNSUPPORTED; -} +Status HybridDavinciModel::ModelRunStop() { return UNSUPPORTED; } -Status HybridDavinciModel::EnqueueData(const shared_ptr &data) { - return UNSUPPORTED; -} +Status HybridDavinciModel::EnqueueData(const shared_ptr &data) { return UNSUPPORTED; } -void HybridDavinciModel::SetListener(const shared_ptr &listener) { -} +void HybridDavinciModel::SetListener(const shared_ptr &listener) {} -void HybridDavinciModel::SetModelId(uint32_t model_id) { -} +void HybridDavinciModel::SetModelId(uint32_t model_id) {} -void HybridDavinciModel::SetDeviceId(uint32_t device_id) { -} +void HybridDavinciModel::SetDeviceId(uint32_t device_id) {} } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/model/graph_item.cc b/ge/hybrid/model/graph_item.cc index b763772e..120865ce 100644 --- a/ge/hybrid/model/graph_item.cc +++ b/ge/hybrid/model/graph_item.cc @@ -22,17 +22,11 @@ namespace hybrid { namespace { constexpr int kInvalidIndex = -1; } // namespace -GraphItem::~GraphItem() { - GELOGD("[%s] GraphItem destroyed.", name_.c_str()); -} +GraphItem::~GraphItem() { GELOGD("[%s] GraphItem destroyed.", name_.c_str()); } -const vector &hybrid::GraphItem::GetAllNodes() const { - return node_items_; -} +const vector &hybrid::GraphItem::GetAllNodes() const { return node_items_; } -const vector &GraphItem::GetInputNodes() const { - return input_nodes_; -} +const vector &GraphItem::GetInputNodes() const { return input_nodes_; } Status GraphItem::GetOutputDescList(vector &output_desc_list) const { if (output_node_ == nullptr) { @@ -52,13 +46,9 @@ Status GraphItem::GetOutputDescList(vector &output_desc_li return SUCCESS; } -bool GraphItem::IsDynamic() const { - return is_dynamic_; -} +bool GraphItem::IsDynamic() const { return is_dynamic_; } -const vector &GraphItem::GetInputIndexMapping() const { - return input_index_mapping_; -} +const vector &GraphItem::GetInputIndexMapping() const { return input_index_mapping_; } int GraphItem::GetParentOutputIndex(size_t index) const { if (index >= output_index_mapping_.size()) { @@ -68,8 +58,6 @@ int GraphItem::GetParentOutputIndex(size_t index) const { return output_index_mapping_[index]; } -const NodeItem *GraphItem::GetOutputNode() const { - return output_node_; -} +const NodeItem *GraphItem::GetOutputNode() const { return output_node_; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/graph_item.h b/ge/hybrid/model/graph_item.h index 64d809ee..cb0fbbed 100644 --- a/ge/hybrid/model/graph_item.h +++ b/ge/hybrid/model/graph_item.h @@ -30,21 +30,13 @@ class GraphItem { const vector &GetInputNodes() const; Status GetOutputDescList(std::vector &output_desc_list) const; - int TotalInputs() const { - return total_inputs_; - } + int TotalInputs() const { return total_inputs_; } - int TotalOutputs() const { - return total_outputs_; - } + int TotalOutputs() const { return total_outputs_; } - const std::string& GetName() const { - return name_; - } + const std::string &GetName() const { return name_; } - void SetName(const string &name) { - name_ = name; - } + void SetName(const string &name) { name_ = name; } const NodeItem *GetOutputNode() const; @@ -69,4 +61,4 @@ class GraphItem { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_MODEL_SUBGRAPH_ITEM_H_ +#endif // GE_HYBRID_MODEL_SUBGRAPH_ITEM_H_ diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index 64138d4b..18db28cb 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -16,8 +16,8 @@ #include "hybrid_model.h" #include -#include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/model_utils.h" +#include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" #include "graph/utils/tensor_utils.h" @@ -27,12 +27,9 @@ namespace ge { namespace hybrid { -HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) { -} +HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) {} -HybridModel::~HybridModel() { - GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); -} +HybridModel::~HybridModel() { GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); } Status HybridModel::Init() { GELOGD("Start to init hybrid model."); @@ -41,7 +38,7 @@ Status HybridModel::Init() { return SUCCESS; } -TensorValue* HybridModel::GetVariable(const string &name) const { +TensorValue *HybridModel::GetVariable(const string &name) const { auto it = variable_tensors_.find(name); if (it == variable_tensors_.end()) { GELOGI("Failed to get variable tensor. var name = [%s]", name.c_str()); @@ -53,16 +50,13 @@ TensorValue* HybridModel::GetVariable(const string &name) const { } NodePtr HybridModel::GetVariableNode(const string &name) const { - auto it = device_variable_nodes_.find(name); - if (it != device_variable_nodes_.end()) { - return it->second; - } - auto host_find = host_variable_nodes_.find(name); - if (host_find != host_variable_nodes_.end()) { - return host_find->second; + auto it = variable_nodes_.find(name); + if (it == variable_nodes_.end()) { + GELOGI("Failed to get variable node by name = [%s]", name.c_str()); + return nullptr; } - GELOGI("Failed to get variable node by name = [%s]", name.c_str()); - return nullptr; + + return it->second; } const std::vector *HybridModel::GetTaskDefs(const NodePtr &node) const { @@ -102,9 +96,7 @@ GeModelPtr HybridModel::GetGeModel(const NodePtr &node) const { return it->second; } -const GraphItem* HybridModel::GetRootGraphItem() const { - return root_graph_item_.get(); -} +const GraphItem *HybridModel::GetRootGraphItem() const { return root_graph_item_.get(); } const GraphItem *HybridModel::GetSubgraphItem(const std::string &graph_name) const { GELOGD("To find subgraph item by name = %s", graph_name.c_str()); @@ -127,8 +119,6 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c return GetSubgraphItem(subgraph_name); } -const string &HybridModel::GetModelName() const { - return model_name_; -} +const string &HybridModel::GetModelName() const { return model_name_; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index 11311968..668b5fd7 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,39 +41,27 @@ class HybridModel { const NodeItem *GetNodeItem(const NodePtr &node) const; - uint64_t GetSessionId() const { - return root_runtime_param_.session_id; - } + uint64_t GetSessionId() const { return root_runtime_param_.session_id; } GeModelPtr GetGeModel(const NodePtr &node) const; NodeItem *MutableNodeItem(const NodePtr &node); - size_t TotalVarMemSize() const { - return root_runtime_param_.var_size; - } + size_t TotalVarMemSize() const { return root_runtime_param_.var_size; } - const uint8_t* GetVarMemBase() const { - return var_mem_base_; - } + const uint8_t *GetVarMemBase() const { return var_mem_base_; } - void SetDeviceId(uint32_t device_id) { - device_id_ = device_id; - } + void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } - void SetModelId(uint32_t model_id) { - model_id_ = model_id; - } + void SetModelId(uint32_t model_id) { model_id_ = model_id; } - uint32_t GetModelId() const { - return model_id_; - } + uint32_t GetModelId() const { return model_id_; } - TensorValue* GetVariable(const string &name) const; + TensorValue *GetVariable(const string &name) const; NodePtr GetVariableNode(const string &name) const; - const std::vector* GetTaskDefs(const NodePtr &node) const; + const std::vector *GetTaskDefs(const NodePtr &node) const; const GraphItem *GetRootGraphItem() const; @@ -91,8 +79,7 @@ class HybridModel { GeRootModelPtr ge_root_model_; std::map input_nodes_; std::map constant_op_nodes_; - std::map device_variable_nodes_; //lint !e148 - std::map host_variable_nodes_; //lint !e148 + std::map variable_nodes_; std::map> variable_tensors_; std::map> task_defs_; std::map known_shape_sub_models_; @@ -109,4 +96,4 @@ class HybridModel { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_HYBRID_GRAPH_H_ +#endif // GE_HYBRID_HYBRID_GRAPH_H_ diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 812d822f..0671990c 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -17,12 +17,10 @@ #include "hybrid/model/hybrid_model_builder.h" #include "common/math/math_util.h" #include "graph/ge_context.h" -#include "graph/build/memory/var_mem_assign_util.h" #include "graph/utils/node_utils.h" #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" -#include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/utils/graph_utils.h" #include "graph/utils/type_utils.h" @@ -41,7 +39,7 @@ int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { int64_t var_size = 0; auto data_type = desc.GetDataType(); if (data_type == DT_STRING) { - (void) TensorUtils::GetSize(desc, var_size); + (void)TensorUtils::GetSize(desc, var_size); } else { var_size = GetSizeByDataType(data_type); if (var_size <= 0) { @@ -93,8 +91,7 @@ Status HybridModelBuilder::ValidateParams() { Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_item) { auto op_desc = node->GetOpDesc(); vector dependencies = node->GetOpDesc()->GetOpInferDepends(); - GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies), - "[%s] Failed to parse node dependencies.", + GE_CHK_STATUS_RET(ParseDependentInputNodes(node_item, dependencies), "[%s] Failed to parse node dependencies.", node_item.NodeName().c_str()); node_item.outputs.resize(node_item.num_outputs); @@ -105,7 +102,7 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite return INTERNAL_ERROR; } - for (auto &dst_in_anchor: out_data_anchor->GetPeerInDataAnchors()) { + for (auto &dst_in_anchor : out_data_anchor->GetPeerInDataAnchors()) { auto dst_node = dst_in_anchor->GetOwnerNode(); if (dst_node == nullptr) { GELOGW("dst node is nullptr. out anchor = %d", out_data_anchor->GetIdx()); @@ -113,8 +110,7 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite } NodeItem *dst_node_item = nullptr; - GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item), - "[%s] Failed to get or create node item.", + GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item), "[%s] Failed to get or create node item.", dst_node->GetName().c_str()); node_item.outputs[i].emplace_back(dst_in_anchor->GetIdx(), dst_node_item); } @@ -127,7 +123,7 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite Status HybridModelBuilder::ResolveRefIo(NodeItem &node_item) { bool is_ref = false; auto &op_desc = *node_item.op_desc; - (void) AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); + (void)AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); if (!is_ref) { return SUCCESS; } @@ -156,7 +152,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n return SUCCESS; } - auto new_node = std::unique_ptr(new(std::nothrow) NodeItem(node)); + auto new_node = std::unique_ptr(new (std::nothrow) NodeItem(node)); GE_CHECK_NOTNULL(new_node); GE_CHECK_NOTNULL(new_node->op_desc); GE_CHK_STATUS_RET(new_node->Init(), "Failed to init NodeItem [%s] .", node->GetName().c_str()); @@ -165,8 +161,8 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n // we do not need L2 Buffer const char *const kIsFirstNode = "is_first_node"; const char *const kIsLastNode = "is_last_node"; - (void) AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false); - (void) AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false); + (void)AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false); + (void)AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false); if (new_node->is_dynamic && (new_node->IsControlOp() || new_node->NodeType() == PARTITIONEDCALL)) { new_node->shape_inference_type = DEPEND_COMPUTE; @@ -184,8 +180,6 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const std::vector &dependencies) { std::set dependent_input_nodes; auto &ge_node = node_item.node; - bool is_hccl_op = - NodeExecutorManager::GetInstance().ResolveExecutorType(*ge_node) == NodeExecutorManager::ExecutorType::HCCL; // The input tensors become valid after computation is done for parent nodes of type DEPEND_COMPUTE. // Wait for these parent nodes before execution. @@ -200,16 +194,9 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s auto src_node_item = MutableNodeItem(src_node); GE_CHECK_NOTNULL(src_node_item); - if (is_hccl_op) { - GELOGD("[%s] Add input data dependent node [%s] due to engine type is HCCL", - node_item.NodeName().c_str(), - src_node_item->NodeName().c_str()); - src_node_item->has_observer = true; - node_item.dependents_for_execution.emplace_back(src_node); - } else if (src_node_item->shape_inference_type == DEPEND_COMPUTE) { + if (src_node_item->shape_inference_type == DEPEND_COMPUTE) { GELOGD("[%s] Add input data dependent node [%s] due to inference type = DEPEND_COMPUTE", - node_item.NodeName().c_str(), - src_node_item->NodeName().c_str()); + node_item.NodeName().c_str(), src_node_item->NodeName().c_str()); src_node_item->has_observer = true; node_item.dependents_for_execution.emplace_back(src_node); @@ -217,8 +204,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s if (src_node_item->shape_inference_type == DEPEND_SHAPE_RANGE) { GELOGD("[%s] Add input shape dependent node [%s] due to inference type = DEPEND_SHAPE_RANGE", - node_item.NodeName().c_str(), - src_node_item->NodeName().c_str()); + node_item.NodeName().c_str(), src_node_item->NodeName().c_str()); src_node_item->has_observer = true; dependent_input_nodes.emplace(src_node); } @@ -236,17 +222,14 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s GE_CHECK_NOTNULL(src_node_item); src_node_item->has_observer = true; node_item.dependents_for_execution.emplace_back(src_node); - GELOGD("[%s] Dependent added from %s for control op's cond/branch", - node_item.NodeName().c_str(), + GELOGD("[%s] Dependent added from %s for control op's cond/branch", node_item.NodeName().c_str(), src_node_item->NodeName().c_str()); } for (const auto &input_name : dependencies) { int input_index = node_item.op_desc->GetInputIndexByName(input_name); if (input_index < 0) { - GELOGE(INTERNAL_ERROR, - "[%s] Failed to get input index by name: %s", - node_item.NodeName().c_str(), + GELOGE(INTERNAL_ERROR, "[%s] Failed to get input index by name: %s", node_item.NodeName().c_str(), input_name.c_str()); return INTERNAL_ERROR; } @@ -262,10 +245,8 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s src_node_item->has_observer = true; dependent_input_nodes.emplace(src_node); - GELOGD("[%s] Dependent added from output of [%s:%d]", - node_item.NodeName().c_str(), - src_node_item->NodeName().c_str(), - peer_out_anchor->GetIdx()); + GELOGD("[%s] Dependent added from output of [%s:%d]", node_item.NodeName().c_str(), + src_node_item->NodeName().c_str(), peer_out_anchor->GetIdx()); } for (const auto &dep_node : dependent_input_nodes) { @@ -306,31 +287,21 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) { Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor, const InDataAnchorPtr &in_data_anchor) { GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor), "Failed to unlink %s:%d from %s:%d", - out_data_anchor->GetOwnerNode()->GetName().c_str(), - out_data_anchor->GetIdx(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), - in_data_anchor->GetIdx()); - - GELOGD("Succeeded in unlinking %s:%d from %s:%d", - out_data_anchor->GetOwnerNode()->GetName().c_str(), - out_data_anchor->GetIdx(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), - in_data_anchor->GetIdx()); + out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetIdx(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); + + GELOGD("Succeeded in unlinking %s:%d from %s:%d", out_data_anchor->GetOwnerNode()->GetName().c_str(), + out_data_anchor->GetIdx(), in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); return SUCCESS; } Status HybridModelBuilder::DoLinkDataAnchors(OutDataAnchorPtr &out_data_anchor, InDataAnchorPtr &in_data_anchor) { GE_CHK_GRAPH_STATUS_RET(out_data_anchor->LinkTo(in_data_anchor), "Failed to link %s:%d to %s:%d", - out_data_anchor->GetOwnerNode()->GetName().c_str(), - out_data_anchor->GetIdx(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), - in_data_anchor->GetIdx()); - - GELOGD("Succeeded in linking %s:%d to %s:%d", - out_data_anchor->GetOwnerNode()->GetName().c_str(), - out_data_anchor->GetIdx(), - in_data_anchor->GetOwnerNode()->GetName().c_str(), - in_data_anchor->GetIdx()); + out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetIdx(), + in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); + + GELOGD("Succeeded in linking %s:%d to %s:%d", out_data_anchor->GetOwnerNode()->GetName().c_str(), + out_data_anchor->GetIdx(), in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx()); return SUCCESS; } @@ -352,9 +323,7 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(FAILED, - "[%s] Failed to get attr [%s]", - data_op_desc->GetName().c_str(), + GELOGE(FAILED, "[%s] Failed to get attr [%s]", data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } @@ -388,7 +357,7 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { if (in_node_set.count(in_control_node) == 0) { GELOGD("[%s] Restore control edge to [%s]", in_control_node->GetName().c_str(), root_node->GetName().c_str()); GE_CHECK_NOTNULL(in_control_node->GetOutControlAnchor()); - (void) in_control_node->GetOutControlAnchor()->LinkTo(root_node->GetInControlAnchor()); + (void)in_control_node->GetOutControlAnchor()->LinkTo(root_node->GetInControlAnchor()); } } } @@ -400,10 +369,7 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { const auto &parent_node = graph.GetParentNode(); const NodePtr &net_output_node = graph.FindFirstNodeMatchType(NETOUTPUT); - if (net_output_node == nullptr) { - GELOGD("Graph has no netoutput no need to merge."); - return SUCCESS; - } + GE_CHECK_NOTNULL(net_output_node); const auto &net_output_desc = net_output_node->GetOpDesc(); GE_CHECK_NOTNULL(net_output_desc); @@ -426,8 +392,8 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGW("SubGraph: %s NetOutput input tensor %d, attr %s not found.", - graph.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGW("SubGraph: %s NetOutput input tensor %d, attr %s not found.", graph.GetName().c_str(), index, + ATTR_NAME_PARENT_NODE_INDEX.c_str()); continue; } @@ -475,17 +441,18 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap continue; } - auto subgraph = NodeUtils::GetSubgraph(*node, kSubgraphIndex); - GE_CHECK_NOTNULL(subgraph); - bool is_unknown_shape = subgraph->GetGraphUnknownFlag(); + bool is_unknown_shape = false; + GE_CHK_GRAPH_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), + "Failed to invoke GetNodeUnknownShapeStatus."); if (!is_unknown_shape) { merged_graph->AddNode(node); GELOGD("[%s] Known shape partitioned call added to merged graph.", op_desc->GetName().c_str()); continue; } - GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, *merged_graph, *subgraph), - "[%s] Failed to merge subgraph.", + auto subgraph = NodeUtils::GetSubgraph(*node, kSubgraphIndex); + GE_CHECK_NOTNULL(subgraph); + GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, *merged_graph, *subgraph), "[%s] Failed to merge subgraph.", subgraph->GetName().c_str()); } @@ -494,25 +461,21 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap for (auto &remained_subgraph : root_graph.GetAllSubgraphs()) { GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str()); - GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), - "Failed to add subgraph [%s]", + GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph), "Failed to add subgraph [%s]", remained_subgraph->GetName().c_str()); } return SUCCESS; } -Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, - ComputeGraph &parent_graph, +Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph) { auto parent_node = sub_graph.GetParentNode(); GE_CHECK_NOTNULL(parent_node); - GE_CHK_STATUS_RET(MergeInputNodes(sub_graph), - "[%s] Failed to merge data nodes for subgraph", + GE_CHK_STATUS_RET(MergeInputNodes(sub_graph), "[%s] Failed to merge data nodes for subgraph", sub_graph.GetName().c_str()); - GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph), - "[%s] Failed to merge net output nodes for subgraph", + GE_CHK_STATUS_RET(MergeNetOutputNode(sub_graph), "[%s] Failed to merge net output nodes for subgraph", sub_graph.GetName().c_str()); GELOGD("[%s] Done merging subgraph inputs and outputs successfully.", sub_graph.GetName().c_str()); @@ -521,21 +484,28 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, if (sub_op_type == DATA_TYPE || sub_op_type == NETOUTPUT) { continue; } + + if (sub_op_type == CONSTANT || sub_op_type == VARIABLE) { + GELOGE(INTERNAL_ERROR, "Unexpected node in unknown subgraph. type = %s, node = %s::%s", sub_op_type.c_str(), + sub_graph.GetName().c_str(), sub_node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (sub_op_type == PARTITIONEDCALL) { - auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex); - GE_CHECK_NOTNULL(sub_sub_graph); - if (sub_sub_graph->GetGraphUnknownFlag()) { - GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), - "[%s] Failed to merge subgraph", + bool is_unknown_shape = false; + GE_CHK_GRAPH_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*sub_node, is_unknown_shape), + "[%s] Failed to invoke GetNodeUnknownShapeStatus.", sub_node->GetName().c_str()); + if (is_unknown_shape) { + auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, kSubgraphIndex); + GE_CHECK_NOTNULL(sub_sub_graph); + GE_CHK_STATUS_RET(UnfoldSubgraph(root_graph, parent_graph, *sub_sub_graph), "[%s] Failed to merge subgraph", sub_sub_graph->GetName().c_str()); continue; } } parent_graph.AddNode(sub_node); - GELOGD("[%s::%s] added to parent graph: [%s].", - sub_graph.GetName().c_str(), - sub_node->GetName().c_str(), + GELOGD("[%s::%s] added to parent graph: [%s].", sub_graph.GetName().c_str(), sub_node->GetName().c_str(), parent_graph.GetName().c_str()); } @@ -544,9 +514,7 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph, return SUCCESS; } -Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, - const NodeItem &node_item, - bool is_root_graph) { +Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, const NodeItem &node_item, bool is_root_graph) { auto output_size = node_item.op_desc->GetAllInputsSize(); GE_CHECK_LE(output_size, UINT32_MAX); graph_item.output_edges_.resize(output_size); @@ -560,11 +528,8 @@ Status HybridModelBuilder::BuildOutputMapping(GraphItem &graph_item, auto src_node_item = GetNodeItem(src_node); GE_CHECK_NOTNULL(src_node_item); auto output_offset = src_node_item->output_start + peer_out_anchor->GetIdx(); - GELOGI("Output[%d], node = %s, output_index = %d, output_offset = %d ", - in_data_anchor->GetIdx(), - src_node_item->NodeName().c_str(), - peer_out_anchor->GetIdx(), - output_offset); + GELOGI("Output[%d], node = %s, output_index = %d, output_offset = %d ", in_data_anchor->GetIdx(), + src_node_item->NodeName().c_str(), peer_out_anchor->GetIdx(), output_offset); graph_item.output_edges_[in_data_anchor->GetIdx()] = {src_node_item, peer_out_anchor->GetIdx()}; } @@ -588,13 +553,11 @@ Status HybridModelBuilder::LoadGraph() { auto root_graph = ge_root_model_->GetRootGraph(); if (!GetContext().GetHostExecFlag()) { std::shared_ptr merged_graph; - GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", - root_graph->GetDirectNodesSize(), + GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), root_graph->GetAllNodesSize()); GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(*root_graph, merged_graph), "Failed to unfold subgraphs."); root_graph = std::move(merged_graph); - GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", - root_graph->GetDirectNodesSize(), + GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu", root_graph->GetDirectNodesSize(), root_graph->GetAllNodesSize()); GE_DUMP(root_graph, "hybrid_merged_graph"); } @@ -615,19 +578,16 @@ Status HybridModelBuilder::LoadGraph() { } if (sub_graph->GetGraphUnknownFlag()) { - GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false), - "Failed to load subgraph: [%s]", + GE_CHK_STATUS_RET(LoadDynamicSubgraph(*sub_graph, false), "Failed to load subgraph: [%s]", sub_graph->GetName().c_str()); } else { - GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item), - "[%s] Failed to identify ref outputs.", + GE_CHK_STATUS_RET(IdentifyVariableOutputs(*parent_node_item), "[%s] Failed to identify ref outputs.", parent_node_item->NodeName().c_str()); // if parent is function control op. need add a virtual partitioned call if (parent_node_item->IsControlOp()) { GE_CHK_STATUS_RET(LoadKnownShapedSubgraph(*sub_graph, parent_node_item), - "Failed to load function control op subgraph [%s]", - sub_graph->GetName().c_str()); + "Failed to load function control op subgraph [%s]", sub_graph->GetName().c_str()); } } } @@ -636,21 +596,16 @@ Status HybridModelBuilder::LoadGraph() { return SUCCESS; } -const NodeItem *HybridModelBuilder::GetNodeItem(const NodePtr &node) const { - return hybrid_model_.GetNodeItem(node); -} +const NodeItem *HybridModelBuilder::GetNodeItem(const NodePtr &node) const { return hybrid_model_.GetNodeItem(node); } -NodeItem *HybridModelBuilder::MutableNodeItem(const NodePtr &node) { - return hybrid_model_.MutableNodeItem(node); -} +NodeItem *HybridModelBuilder::MutableNodeItem(const NodePtr &node) { return hybrid_model_.MutableNodeItem(node); } Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ptr &tensor) { string var_name = var_node->GetName(); auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); uint8_t *var_logic = nullptr; GE_CHK_STATUS_RET(var_manager_->GetVarAddr(var_name, *tensor_desc, &var_logic), - "Failed to get var addr. var_name = %s, session_id = %ld", - var_name.c_str(), + "Failed to get var addr. var_name = %s, session_id = %ld", var_name.c_str(), hybrid_model_.GetSessionId()); uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM); @@ -664,7 +619,7 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_ int64_t var_size = CalcVarSizeInBytes(*tensor_desc); // var size is only for checking, will not allocate any memory by it - tensor.reset(new(std::nothrow)TensorValue(dev_mem, static_cast(var_size))); + tensor.reset(new (std::nothrow) TensorValue(dev_mem, static_cast(var_size))); GE_CHECK_NOTNULL(tensor); return SUCCESS; } @@ -687,8 +642,7 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr GE_CHK_BOOL_RET_STATUS(ge::CheckInt64Uint32MulOverflow(elem_num, kBytes) == SUCCESS, FAILED, "Shape size is invalid"); auto offset = static_cast(elem_num * kBytes); - auto hbm_raw_data_base_addr = - reinterpret_cast(reinterpret_cast(var_addr) + offset); + auto hbm_raw_data_base_addr = reinterpret_cast(reinterpret_cast(var_addr) + offset); for (int64_t i = elem_num - 1; i >= 0; --i) { buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); } @@ -714,19 +668,6 @@ Status HybridModelBuilder::AssignUninitializedConstantOps() { } } - for (auto &it : hybrid_model_.device_variable_nodes_) { - const string &var_name = it.first; - const NodePtr &var_node = it.second; - auto tensor_desc = var_node->GetOpDesc()->MutableOutputDesc(0); - if (!var_manager_->IsVarExist(var_name, *tensor_desc)) { - // allocate constant - GELOGD("[%s] Constant not allocated during graph building. now allocate it.", var_name.c_str()); - GE_CHK_STATUS_RET(var_manager_->AssignVarMem(var_name, *tensor_desc, RT_MEMORY_HBM)); - GE_CHK_STATUS_RET(VarMemAssignUtil::AssignData2Fp32Var(var_node, runtime_param_.session_id)) - GE_CHK_STATUS_RET(var_manager_->SetAllocatedGraphId(var_name, runtime_param_.graph_id)); - } - } - return SUCCESS; } @@ -734,32 +675,28 @@ Status HybridModelBuilder::InitConstantOps() { for (auto &it : hybrid_model_.constant_op_nodes_) { const string &var_name = it.first; const NodePtr &var_node = it.second; + std::unique_ptr var_tensor; + + GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); + GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); + var_tensor->SetName("ConstOp_" + var_name); + auto op_desc = var_node->GetOpDesc(); auto v_weights = ModelUtils::GetWeights(op_desc); - auto *ge_tensor = const_cast(v_weights[0].get()); + auto v_output_size = var_tensor->GetSize(); + auto v_output_addr = var_tensor->MutableData(); - std::unique_ptr var_tensor; - if (GetContext().GetHostExecFlag()) { - auto buffer = ge_tensor->MutableData(); - GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); - var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); + auto *ge_tensor = const_cast(v_weights[0].get()); + if (ge_tensor->GetData().size() > 0) { + GE_CHK_STATUS_RET_NOLOG(HandleDtString(*ge_tensor, v_output_addr)); + + GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%zu] datasize[%zu]", + runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr, v_output_size, + ge_tensor->GetData().size()); + GE_CHK_RT_RET(rtMemcpy(v_output_addr, v_output_size, ge_tensor->GetData().data(), ge_tensor->GetData().size(), + RT_MEMCPY_HOST_TO_DEVICE)); } else { - GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); - GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); - var_tensor->SetName("ConstOp_" + var_name); - auto v_output_size = var_tensor->GetSize(); - auto v_output_addr = var_tensor->MutableData(); - if (ge_tensor->GetData().size() > 0) { - GE_CHK_STATUS_RET_NOLOG(HandleDtString(*ge_tensor, v_output_addr)); - - GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%zu] datasize[%zu]", - runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr, v_output_size, - ge_tensor->GetData().size()); - GE_CHK_RT_RET(rtMemcpy(v_output_addr, v_output_size, ge_tensor->GetData().data(), ge_tensor->GetData().size(), - RT_MEMCPY_HOST_TO_DEVICE)); - } else { - GELOGI("[%s] Const op has no weight data.", op_desc->GetName().c_str()); - } + GELOGI("[%s] Const op has no weight data.", op_desc->GetName().c_str()); } hybrid_model_.variable_tensors_.emplace(var_name, std::move(var_tensor)); @@ -769,40 +706,17 @@ Status HybridModelBuilder::InitConstantOps() { } Status HybridModelBuilder::InitVariableTensors() { - for (auto &it : hybrid_model_.device_variable_nodes_) { + for (auto &it : hybrid_model_.variable_nodes_) { string var_name = it.first; NodePtr &var_node = it.second; std::unique_ptr tensor; GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, tensor)); - GELOGD("Init variable tensor. name = %s, size = %ld, addr = %p", - var_name.c_str(), - tensor->GetSize(), + GELOGD("Init variable tensor. name = %s, size = %ld, addr = %p", var_name.c_str(), tensor->GetSize(), tensor->GetData()); tensor->SetName("Var_" + var_name); hybrid_model_.variable_tensors_.emplace(var_name, std::move(tensor)); } - for (const auto &it : hybrid_model_.host_variable_nodes_) { - auto op_desc = it.second->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - GeTensorDesc output_tensor = op_desc->GetOutputDesc(0); - int64_t tensor_size = 0; - if (TensorUtils::CalcTensorMemSize(output_tensor.GetShape(), output_tensor.GetFormat(), output_tensor.GetDataType(), - tensor_size) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Calculate variable size failed, node name:%s", it.first.c_str()); - return INTERNAL_ERROR; - } - SharedMemInfo mem_info(it.first, tensor_size); - if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) { - GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); - return GE_GRAPH_MALLOC_FAILED; - } - GELOGD("Host variable [%s] malloc success.", it.first.c_str()); - - std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); - hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); - } - return SUCCESS; } @@ -820,9 +734,7 @@ Status HybridModelBuilder::LoadTasks() { } GELOGD("[%s] Start to build kernel task", node_ptr->GetName().c_str()); - auto load_ret = node_item->node_executor->LoadTask(hybrid_model_, - node_ptr, - node_item->kernel_task); + auto load_ret = node_item->node_executor->LoadTask(hybrid_model_, node_ptr, node_item->kernel_task); if (load_ret != UNSUPPORTED && load_ret != SUCCESS) { GELOGE(load_ret, "[%s] Failed to load task", node_ptr->GetName().c_str()); return load_ret; @@ -839,13 +751,11 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr GE_CHECK_NOTNULL(parent_node); auto op_type = parent_node->GetType(); if (op_type == IF || op_type == CASE || op_type == WHILE) { - GELOGD("Set ge_model for control op subgraph: [%s], task_size = %d", - sub_graph.GetName().c_str(), + GELOGD("Set ge_model for control op subgraph: [%s], task_size = %d", sub_graph.GetName().c_str(), ge_model->GetModelTaskDefPtr()->task_size()); subgraph_models_.emplace(sub_graph.GetName(), ge_model); } else { - GELOGD("Set ge_model for subgraph: [%s], task_size = %d", - sub_graph.GetName().c_str(), + GELOGD("Set ge_model for subgraph: [%s], task_size = %d", sub_graph.GetName().c_str(), ge_model->GetModelTaskDefPtr()->task_size()); hybrid_model_.known_shape_sub_models_.emplace(sub_graph.GetParentNode(), ge_model); } @@ -927,22 +837,14 @@ Status HybridModelBuilder::IndexSpecialNodes() { auto op_type = node->GetType(); GELOGD("node name = %s, node type = %s", node->GetName().c_str(), node->GetType().c_str()); if (op_type == VARIABLE) { - string placement; - (void) AttrUtils::GetStr(node->GetOpDesc(), ATTR_VARIABLE_PLACEMENT, placement); - if (placement == "host") { - hybrid_model_.host_variable_nodes_.emplace(node->GetName(), node); - } else { - hybrid_model_.device_variable_nodes_.emplace(node->GetName(), node); - } + hybrid_model_.variable_nodes_.emplace(node->GetName(), node); } else if (op_type == CONSTANTOP) { hybrid_model_.constant_op_nodes_.emplace(node->GetName(), node); } else if (op_type == DATA && node->GetOwnerComputeGraph() != root_graph) { NodePtr src_node; int peer_out_index = -1; GE_CHK_STATUS_RET_NOLOG(GetPeerNodeAcrossSubGraphs(node, src_node, peer_out_index)); - GELOGD("Got peer node for data node %s, peer node = %s(%s)", - node->GetName().c_str(), - src_node->GetName().c_str(), + GELOGD("Got peer node for data node %s, peer node = %s(%s)", node->GetName().c_str(), src_node->GetName().c_str(), src_node->GetType().c_str()); auto src_op_type = src_node->GetType(); @@ -955,11 +857,11 @@ Status HybridModelBuilder::IndexSpecialNodes() { } } } + return SUCCESS; } -Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, - NodePtr &peer_node, +Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, NodePtr &peer_node, int &peer_out_index) { auto sub_graph = data_node->GetOwnerComputeGraph(); GE_CHECK_NOTNULL(sub_graph); @@ -972,9 +874,7 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, auto data_op_desc = data_node->GetOpDesc(); uint32_t parent_index = 0; if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(INTERNAL_ERROR, - "[%s] Failed to get attr [%s]", - data_op_desc->GetName().c_str(), + GELOGE(INTERNAL_ERROR, "[%s] Failed to get attr [%s]", data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return INTERNAL_ERROR; } @@ -997,8 +897,7 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, if (src_node_type != PARTITIONEDCALL) { peer_node = src_wrapped_node; peer_out_index = kVarOutputIndex; - GELOGD("[%s] Node is connected to root graph's node: %s", - data_node->GetName().c_str(), + GELOGD("[%s] Node is connected to root graph's node: %s", data_node->GetName().c_str(), peer_node->GetName().c_str()); return SUCCESS; } @@ -1006,10 +905,8 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, auto src_graph = NodeUtils::GetSubgraph(*src_wrapped_node, kSubgraphIndex); GE_CHECK_NOTNULL(src_graph); auto src_net_output_node = src_graph->FindFirstNodeMatchType(NETOUTPUT); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr, - return INTERNAL_ERROR, - "Failed to find NetOutput in subgraph: %s", - src_graph->GetName().c_str()); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr, return INTERNAL_ERROR, + "Failed to find NetOutput in subgraph: %s", src_graph->GetName().c_str()); auto net_output_desc = src_net_output_node->GetOpDesc(); GE_CHECK_NOTNULL(net_output_desc); @@ -1022,8 +919,8 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, for (uint32_t i = 0; i < static_cast(input_size); ++i) { uint32_t p_index = 0; if (!AttrUtils::GetInt(net_output_desc->GetInputDesc(i), ATTR_NAME_PARENT_NODE_INDEX, p_index)) { - GELOGW("SubGraph: %s input tensor %u attr %s not found.", - src_graph->GetName().c_str(), i, ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGW("SubGraph: %s input tensor %u attr %s not found.", src_graph->GetName().c_str(), i, + ATTR_NAME_PARENT_NODE_INDEX.c_str()); continue; } @@ -1036,19 +933,13 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, peer_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(peer_node); peer_out_index = peer_out_anchor->GetIdx(); - GELOGD("Found peer node of Data node: %s::%s is %s::%s", - sub_graph->GetName().c_str(), - data_node->GetName().c_str(), - src_graph->GetName().c_str(), - peer_node->GetName().c_str()); + GELOGD("Found peer node of Data node: %s::%s is %s::%s", sub_graph->GetName().c_str(), + data_node->GetName().c_str(), src_graph->GetName().c_str(), peer_node->GetName().c_str()); return SUCCESS; } } - GELOGE(FAILED, - "Failed to find peer node for %s::%s", - sub_graph->GetName().c_str(), - data_node->GetName().c_str()); + GELOGE(FAILED, "Failed to find peer node for %s::%s", sub_graph->GetName().c_str(), data_node->GetName().c_str()); return FAILED; } Status HybridModelBuilder::InitRuntimeParams() { @@ -1068,15 +959,15 @@ Status HybridModelBuilder::InitRuntimeParams() { runtime_param_.graph_id = ge_root_model_->GetRootGraph()->GetGraphID(); value = 0; for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) { - (void) ge::AttrUtils::GetInt(it.second, ATTR_MODEL_VAR_SIZE, value); + (void)ge::AttrUtils::GetInt(it.second, ATTR_MODEL_VAR_SIZE, value); if (value > 0) { runtime_param_.var_size = static_cast(value); break; } } - GELOGI("InitRuntimeParams(), session_id:%lu, var_size:%lu. graph_id = %u", - runtime_param_.session_id, runtime_param_.var_size, runtime_param_.graph_id); + GELOGI("InitRuntimeParams(), session_id:%lu, var_size:%lu. graph_id = %u", runtime_param_.session_id, + runtime_param_.var_size, runtime_param_.graph_id); var_manager_ = VarManager::Instance(runtime_param_.session_id); GE_CHECK_NOTNULL(var_manager_); @@ -1100,11 +991,8 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { auto src_node = GetPeerNode(in_data_anchor); GE_CHECK_NOTNULL(src_node); auto src_op_type = src_node->GetType(); - GELOGD("Node %s, output %d, src node = %s, src node type = %s", - node_item.NodeName().c_str(), - in_data_anchor->GetIdx(), - src_node->GetName().c_str(), - src_op_type.c_str()); + GELOGD("Node %s, output %d, src node = %s, src node type = %s", node_item.NodeName().c_str(), + in_data_anchor->GetIdx(), src_node->GetName().c_str(), src_op_type.c_str()); if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) { continue; @@ -1124,7 +1012,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { } string ref_var_name; - (void) AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name); + (void)AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name); if (ref_var_name.empty()) { continue; } @@ -1157,8 +1045,7 @@ Status HybridModelBuilder::GetParentNodeOutputIndex(const OpDesc &op_desc, int i auto input_desc = op_desc.MutableInputDesc(index); GE_CHECK_NOTNULL(input_desc); if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, out_index)) { - GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.", - index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(INTERNAL_ERROR, "NetOutput input tensor %d, attr %s not found.", index, ATTR_NAME_PARENT_NODE_INDEX.c_str()); return INTERNAL_ERROR; } return SUCCESS; @@ -1173,8 +1060,7 @@ Status HybridModelBuilder::InitModelMem() { } if (total_var_size > 0 && hybrid_model_.var_mem_base_ == nullptr) { - GE_CHK_STATUS_RET(var_manager_->MallocVarMemory(total_var_size), - "Malloc Var Memory Fail."); + GE_CHK_STATUS_RET(var_manager_->MallocVarMemory(total_var_size), "Malloc Var Memory Fail."); hybrid_model_.var_mem_base_ = var_manager_->GetVarMemoryBase(RT_MEMORY_HBM); } @@ -1192,33 +1078,30 @@ Status HybridModelBuilder::TransAllVarData() { } std::vector variable_node_list; - for (auto &it : hybrid_model_.device_variable_nodes_) { + for (auto &it : hybrid_model_.variable_nodes_) { variable_node_list.emplace_back(it.second); GELOGD("[%s] added for trans var data", it.first.c_str()); } - GE_CHK_STATUS_RET(TransVarDataUtils::TransAllVarData(variable_node_list, - runtime_param_.session_id, - ctx, - runtime_param_.graph_id), - "TransAllVarData failed."); + GE_CHK_STATUS_RET( + TransVarDataUtils::TransAllVarData(variable_node_list, runtime_param_.session_id, ctx, runtime_param_.graph_id), + "TransAllVarData failed."); GELOGI("TransAllVarData success."); return SUCCESS; } Status HybridModelBuilder::CopyVarData() { - GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(), - runtime_param_.session_id, - hybrid_model_.device_id_), - "CopyVarData failed."); + GE_CHK_STATUS_RET( + TransVarDataUtils::CopyVarData(ge_root_model_->GetRootGraph(), runtime_param_.session_id, hybrid_model_.device_id_), + "CopyVarData failed."); GELOGI("CopyVarData success."); return SUCCESS; } Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item) { GELOGD("Start to load known shaped subgraph [%s]", graph.GetName().c_str()); - auto graph_item = std::unique_ptr(new(std::nothrow)GraphItem()); + auto graph_item = std::unique_ptr(new (std::nothrow) GraphItem()); GE_CHECK_NOTNULL(graph_item); graph_item->is_dynamic_ = false; auto subgraph_name = graph.GetName(); @@ -1234,14 +1117,11 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem if (op_type == DATA) { int32_t data_index = 0; if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) { - GELOGE(FAILED, - "[%s] Failed to get attr [%s]", - node->GetName().c_str(), - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, "[%s] Failed to get attr [%s]", node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } - (void) wrapper_op_desc->AddInputDesc(op_desc->GetInputDesc(0)); + (void)wrapper_op_desc->AddInputDesc(op_desc->GetInputDesc(0)); graph_item->input_index_mapping_.emplace_back(data_index); } else if (op_type == NETOUTPUT) { int output_index = 0; @@ -1252,8 +1132,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem } GE_CHK_GRAPH_STATUS_RET(wrapper_op_desc->AddOutputDesc(*output_desc), - "[%s] Failed to add output desc. output index = %d", - graph.GetName().c_str(), + "[%s] Failed to add output desc. output index = %d", graph.GetName().c_str(), output_index); graph_item->output_index_mapping_.emplace_back(data_index); @@ -1278,8 +1157,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem graph_item->total_inputs_ = node_item->num_inputs; graph_item->total_outputs_ = node_item->num_outputs; - GELOGD("NodeItem create for known shape subgraph [%s], NodeItem = %s", - graph.GetName().c_str(), + GELOGD("NodeItem create for known shape subgraph [%s], NodeItem = %s", graph.GetName().c_str(), node_item->DebugString().c_str()); GELOGD("Done parse known shape subgraph successfully. graph = [%s]", graph.GetName().c_str()); @@ -1292,7 +1170,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) { GELOGD("Start to load subgraph [%s]", graph.GetName().c_str()); // for known partitioned call, load all nodes - auto graph_item = std::unique_ptr(new(std::nothrow)GraphItem()); + auto graph_item = std::unique_ptr(new (std::nothrow) GraphItem()); GE_CHECK_NOTNULL(graph_item); graph_item->is_dynamic_ = true; @@ -1308,7 +1186,7 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root NodeItem *node_item = nullptr; GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node, &node_item)); GE_CHK_STATUS_RET_NOLOG(BuildNodeItem(node, *node_item)); - GE_CHK_STATUS_RET_NOLOG(UpdateAnchorStatus(node)); // needed by FE generate task + GE_CHK_STATUS_RET_NOLOG(UpdateAnchorStatus(node)); // needed by FE generate task node_item->input_start = input_start; node_item->output_start = output_start; @@ -1348,7 +1226,7 @@ Status HybridModelBuilder::ParseVarOutputs(NodeItem &node_item) { for (int i = 0; i < node_item.num_outputs; ++i) { auto output_tensor_desc = node_item.op_desc->GetOutputDesc(i); std::string var_name; - (void) AttrUtils::GetStr(output_tensor_desc, ASSIGN_VAR_NAME, var_name); + (void)AttrUtils::GetStr(output_tensor_desc, ASSIGN_VAR_NAME, var_name); if (!var_name.empty()) { auto var_node = hybrid_model_.GetVariableNode(var_name); GE_CHECK_NOTNULL(var_node); @@ -1358,8 +1236,7 @@ Status HybridModelBuilder::ParseVarOutputs(NodeItem &node_item) { return SUCCESS; } -Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, - vector &data_nodes, +Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, vector &data_nodes, bool is_root_graph) { uint32_t data_op_index = 0; for (auto &node_item : data_nodes) { @@ -1372,10 +1249,7 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item, data_op_index++; } else { if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) { - GELOGE(FAILED, - "[%s] Failed to get attr [%s]", - node->GetName().c_str(), - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, "[%s] Failed to get attr [%s]", node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } } diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index d522939e..ecd327ff 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -49,9 +49,7 @@ class HybridModelBuilder { static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph); static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph); static Status InitWeights(); - static Status BuildInputMapping(GraphItem &graph_item, - std::vector &data_nodes, - bool is_root_graph); + static Status BuildInputMapping(GraphItem &graph_item, std::vector &data_nodes, bool is_root_graph); static Status ResolveRefIo(NodeItem &node_item); Status BuildOutputMapping(GraphItem &partitioned_call, const NodeItem &node_item, bool is_root_graph); Status ValidateParams(); @@ -76,9 +74,7 @@ class HybridModelBuilder { Status ParseVarOutputs(NodeItem &node_item); Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); - const char* GetGraphName() const { - return hybrid_model_.model_name_.c_str(); - } + const char *GetGraphName() const { return hybrid_model_.model_name_.c_str(); } const NodeItem *GetNodeItem(const NodePtr &node) const; NodeItem *MutableNodeItem(const NodePtr &node); @@ -95,4 +91,4 @@ class HybridModelBuilder { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_MODEL_HYBRID_MODEL_BUILDER_H_ +#endif // GE_HYBRID_MODEL_HYBRID_MODEL_BUILDER_H_ diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index 4a019487..7ec8d946 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -26,16 +26,13 @@ namespace ge { namespace hybrid { namespace { -const char * const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; -const char * const kNodeTypeRetVal = "_RetVal"; +const char *const kAttrNameOriginalFusionGraph = "_original_fusion_graph"; +const char *const kNodeTypeRetVal = "_RetVal"; Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(FAILED, - "[%s] Failed to get attr [%s]", - op_desc.GetName().c_str(), - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, "[%s] Failed to get attr [%s]", op_desc.GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } @@ -54,10 +51,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr Status ParseOutputMapping(OpDescPtr op_desc, FusedSubgraph &fused_subgraph) { uint32_t parent_index = 0; if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGE(FAILED, - "[%s] Failed to get attr [%s]", - op_desc->GetName().c_str(), - ATTR_NAME_PARENT_NODE_INDEX.c_str()); + GELOGE(FAILED, "[%s] Failed to get attr [%s]", op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str()); return FAILED; } @@ -71,11 +65,11 @@ Status ParseFusedSubgraph(NodeItem &node_item) { } GELOGI("[%s] Start to parse fused subgraph.", node_item.node_name.c_str()); - auto fused_subgraph = std::unique_ptr(new (std::nothrow)FusedSubgraph()); + auto fused_subgraph = std::unique_ptr(new (std::nothrow) FusedSubgraph()); GE_CHECK_NOTNULL(fused_subgraph); ComputeGraphPtr fused_graph; - (void) AttrUtils::GetGraph(*node_item.op_desc, kAttrNameOriginalFusionGraph, fused_graph); + (void)AttrUtils::GetGraph(*node_item.op_desc, kAttrNameOriginalFusionGraph, fused_graph); GE_CHECK_NOTNULL(fused_graph); fused_graph->SetGraphUnknownFlag(true); @@ -102,7 +96,7 @@ Status ParseFusedSubgraph(NodeItem &node_item) { return SUCCESS; } } // namespace -NodeItem::NodeItem(NodePtr node): node(std::move(node)) { +NodeItem::NodeItem(NodePtr node) : node(std::move(node)) { this->op_desc = this->node->GetOpDesc().get(); this->node_id = this->op_desc->GetId(); this->num_inputs = this->op_desc->GetInputsSize(); @@ -113,17 +107,11 @@ NodeItem::NodeItem(NodePtr node): node(std::move(node)) { Status NodeItem::Init() { int32_t unknown_shape_type_val = 0; - (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + (void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); shape_inference_type = static_cast(unknown_shape_type_val); - (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); - GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); - if (!is_dynamic) { - GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), - "[%s] Failed to get shape status.", - node->GetName().c_str()); - } - + GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), "[%s] Failed to get shape status.", + node->GetName().c_str()); GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); if (is_dynamic) { for (int i = 0; i < num_inputs; ++i) { @@ -134,8 +122,8 @@ Status NodeItem::Init() { } else { num_static_input_shapes++; is_input_shape_static.push_back(true); - GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", - NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); + GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", NodeName().c_str(), i, + input_desc->MutableShape().ToString().c_str()); } } @@ -179,7 +167,7 @@ std::string NodeItem::DebugString() const { for (auto &items : outputs) { ss << ", output[" << index++ << "]: "; for (auto &item : items) { - ss << "(" << item.second->NodeName() << ":" <NodeName() << ":" << item.first << "), "; } } diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index c10cf13e..53cdeca6 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -43,13 +43,9 @@ struct NodeItem { Status Init(); - const std::string &NodeName() const { - return node_name; - } + const std::string &NodeName() const { return node_name; } - const std::string &NodeType() const { - return node_type; - } + const std::string &NodeType() const { return node_type; } bool IsControlOp() const; @@ -91,4 +87,4 @@ struct NodeItem { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_MODEL_NODE_ITEM_H_ +#endif // GE_HYBRID_MODEL_NODE_ITEM_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 09c516fb..942d6d9e 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -24,8 +24,7 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, AiCoreNodeExecutor); -AiCoreNodeTask::AiCoreNodeTask(std::vector> &&tasks) : tasks_(std::move(tasks)) { -} +AiCoreNodeTask::AiCoreNodeTask(std::vector> &&tasks) : tasks_(std::move(tasks)) {} Status AiCoreNodeExecutor::Initialize() { auto ge_lib = GELib::GetInstance(); @@ -39,7 +38,7 @@ Status AiCoreNodeExecutor::Initialize() { auto aic_ops_store = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); GE_CHECK_NOTNULL(aic_ops_store); - compiler_.reset(new(std::nothrow)AiCoreTaskCompiler(aic_ops_store)); + compiler_.reset(new (std::nothrow) AiCoreTaskCompiler(aic_ops_store)); GE_CHECK_NOTNULL(compiler_); return SUCCESS; } @@ -85,7 +84,7 @@ Status AiCoreNodeExecutor::GenNodeKey(const NodePtr &node, std::string &node_key auto num_dims = shape.GetDimNum(); if (num_dims == 0) { continue; - } // scalar + } // scalar for (std::size_t i = 0; i < num_dims - 1; i++) { node_key.append(std::to_string(shape.GetDim(i))); node_key.push_back('_'); @@ -113,8 +112,8 @@ std::shared_ptr AiCoreNodeTaskRegistry::GetTask(const std::string &nod return (iter != reg_node_tasks_.end()) ? iter->second : nullptr; } -Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, - const NodePtr &node, shared_ptr &task) const { +Status AiCoreNodeExecutor::CompileTask(const HybridModel &model, const NodePtr &node, + shared_ptr &task) const { GE_CHECK_NOTNULL(node); auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -159,13 +158,9 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function auto op_desc = context.GetNodeItem().op_desc; GE_CHECK_NOTNULL(op_desc); GELOGI("[%s] ExecuteAsync Start.", op_desc->GetName().c_str()); - for (auto it = tasks_.begin(); it != tasks_.end(); ++it) { - // AtomicAddrClean has 2 tasks - if (tasks_.size() == 2 && it == tasks_.begin() && !(*(tasks_.rbegin()))->GetClearAtomic()) { - continue; - } + for (auto &task : tasks_) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start"); - GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream())); + GE_CHK_STATUS_RET_NOLOG(task->LaunchKernel(context.GetStream())); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } @@ -185,12 +180,8 @@ Status AiCoreNodeTask::UpdateArgs(TaskContext &context) { auto op_desc = context.GetNodeItem().op_desc; GE_CHECK_NOTNULL(op_desc); GELOGI("[%s] AiCoreNodeTask UpdateArgs Start.", op_desc->GetName().c_str()); - for (auto it = tasks_.rbegin(); it != tasks_.rend(); ++it) { - GE_CHK_STATUS_RET_NOLOG((*it)->UpdateArgs(context)); - // AtomicAddrClean has 2 tasks - if (tasks_.size() == 2 && it == tasks_.rbegin() && !(*it)->GetClearAtomic()) { - break; - } + for (auto &task : tasks_) { + GE_CHK_STATUS_RET_NOLOG(task->UpdateArgs(context)); } GELOGI("[%s] AiCoreNodeTask UpdateArgs End.", op_desc->GetName().c_str()); return SUCCESS; @@ -198,12 +189,8 @@ Status AiCoreNodeTask::UpdateArgs(TaskContext &context) { Status AiCoreNodeTask::UpdateTilingData(TaskContext &context) { GELOGD("[%s] PrepareWithShape started", context.GetNodeName()); - for (auto it = tasks_.rbegin(); it != tasks_.rend(); ++it) { - GE_CHK_STATUS_RET_NOLOG((*it)->PrepareWithShape(context)); - // AtomicAddrClean has 2 tasks - if (tasks_.size() == 2 && it == tasks_.rbegin() && !(*it)->GetClearAtomic()) { - break; - } + for (auto &task : tasks_) { + GE_CHK_STATUS_RET_NOLOG(task->PrepareWithShape(context)); } GELOGD("[%s] Done PrepareWithShape successfully.", context.GetNodeName()); return SUCCESS; diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.h b/ge/hybrid/node_executor/aicore/aicore_node_executor.h index b4afc34c..506202fa 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.h +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.h @@ -36,6 +36,7 @@ class AiCoreNodeTaskRegistry { std::shared_ptr GetTask(const std::string &node_key); bool AddTask(const std::string &node_key, const std::shared_ptr task); + private: AiCoreNodeTaskRegistry() = default; std::map> reg_node_tasks_; @@ -51,6 +52,7 @@ class AiCoreNodeTask : public NodeTask { Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; + private: std::vector> tasks_; }; @@ -59,8 +61,7 @@ class AiCoreNodeExecutor : public NodeExecutor { public: Status Initialize() override; Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const override; - Status CompileTask(const HybridModel &model, const NodePtr &node, - std::shared_ptr &task) const override; + Status CompileTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; private: static Status GenNodeKey(const NodePtr &node, std::string &node_key); @@ -68,4 +69,4 @@ class AiCoreNodeExecutor : public NodeExecutor { }; } // namespace hybrid } // namespace ge -#endif //GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_KERNEL_AICORE_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index fd6387e6..9ec0cc22 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -37,9 +37,7 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) } Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { - GE_CHK_STATUS_RET(ValidateTaskDef(task_def), - "[%s] Failed to validate task def: [%s]", - op_desc.GetName().c_str(), + GE_CHK_STATUS_RET(ValidateTaskDef(task_def), "[%s] Failed to validate task def: [%s]", op_desc.GetName().c_str(), task_def.DebugString().c_str()); const domi::KernelDef &kernel_def = task_def.kernel(); @@ -50,7 +48,7 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef block_dim_ = kernel_def.block_dim(); // malloc args memory - args_.reset(new(std::nothrow) uint8_t[args_size_]); + args_.reset(new (std::nothrow) uint8_t[args_size_]); GE_CHECK_NOTNULL(args_); errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); if (err != EOK) { @@ -66,10 +64,7 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef const auto *args_offset_buffer = reinterpret_cast(context.args_offset().data()); uint32_t offset = *args_offset_buffer; if (offset > args_size_) { - GELOGE(INTERNAL_ERROR, - "[%s] Arg offset out of range. offset = %u, arg size = %u", - GetName().c_str(), - offset, + GELOGE(INTERNAL_ERROR, "[%s] Arg offset out of range. offset = %u, arg size = %u", GetName().c_str(), offset, args_size_); return INTERNAL_ERROR; } @@ -77,11 +72,7 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef arg_base_ = reinterpret_cast(args_.get() + offset); max_arg_count_ = (args_size_ - offset) / sizeof(void *); GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d, arg base = %p, arg size = %u", - op_desc.GetName().c_str(), - stub_name_.c_str(), - block_dim_, - arg_base_, - args_size_); + op_desc.GetName().c_str(), stub_name_.c_str(), block_dim_, arg_base_, args_size_); return SUCCESS; } @@ -120,8 +111,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); OpRunInfo tiling_info; - tiling_info.block_dim = -1; // codex: Using uninitialized value - tiling_info.clear_atomic = true; + tiling_info.block_dim = -1; // codex: Using uninitialized value auto execution_context = context.GetExecutionContext(); RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] Start"); @@ -131,7 +121,6 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { // update op args by tiling info block_dim_ = static_cast(tiling_info.block_dim); op_desc->SetWorkspaceBytes(tiling_info.workspaces); - clear_atomic_ = tiling_info.clear_atomic; tiling_data_ = tiling_info.tiling_data.str(); if (tiling_data_.empty()) { @@ -146,9 +135,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { } RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] Start"); - GE_CHK_RT_RET(rtMemcpy(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), - tiling_data_.c_str(), tiling_data_.size(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), tiling_data_.c_str(), + tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE)); RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] End"); GELOGD("[%s] Done updating tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); @@ -157,8 +145,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpParaCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), - "Failed calc tiling data of node %s.", + GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), "Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); return SUCCESS; @@ -170,11 +157,8 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { ++expected_arg_count; } if (expected_arg_count > max_arg_count_) { - GELOGE(INTERNAL_ERROR, - "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", - GetName().c_str(), - max_arg_count_, - expected_arg_count); + GELOGE(INTERNAL_ERROR, "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", GetName().c_str(), + max_arg_count_, expected_arg_count); return INTERNAL_ERROR; } @@ -220,7 +204,7 @@ Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { bool dynamic_supported = false; - (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); + (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); if (!dynamic_supported) { GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); return SUCCESS; @@ -228,7 +212,7 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { GELOGD("Start alloc tiling data of node %s.", op_desc.GetName().c_str()); int64_t max_size = -1; - (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); + (void)AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); if (max_size <= 0) { GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); @@ -244,17 +228,11 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { return SUCCESS; } -bool AiCoreOpTask::IsDynamicShapeSupported() { - return tiling_buffer_ != nullptr; -} +bool AiCoreOpTask::IsDynamicShapeSupported() { return tiling_buffer_ != nullptr; } -const std::string &AiCoreOpTask::GetName() const { - return stub_name_; -} +const std::string &AiCoreOpTask::GetName() const { return stub_name_; } -std::string AiCoreOpTask::GetKeyForOpParamSize() const { - return kAttrOpParamSize; -} +std::string AiCoreOpTask::GetKeyForOpParamSize() const { return kAttrOpParamSize; } Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); @@ -264,12 +242,11 @@ Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &t Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices(const OpDesc &op_desc) { GELOGD("[%s] Start to setup AtomicAddrClean task.", op_desc.GetName().c_str()); std::vector atomic_output_indices; - (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); - map> workspace_info; // op_name, ws_index, ws_offset + (void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices); + map> workspace_info; // op_name, ws_index, ws_offset workspace_info = op_desc.TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, workspace_info); if (atomic_output_indices.empty() && workspace_info.empty()) { - GELOGE(INTERNAL_ERROR, - "[%s] Neither ATOMIC_ATTR_OUTPUT_INDEX nor EXT_ATTR_ATOMIC_WORKSPACE_INFO is empty.", + GELOGE(INTERNAL_ERROR, "[%s] Neither ATOMIC_ATTR_OUTPUT_INDEX nor EXT_ATTR_ATOMIC_WORKSPACE_INFO is empty.", op_desc.GetName().c_str()); return INTERNAL_ERROR; } @@ -297,25 +274,19 @@ Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices(const OpDesc &op_desc) } if (arg_count > max_arg_count_) { - GELOGE(INTERNAL_ERROR, - "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", - GetName().c_str(), - max_arg_count_, - arg_count); + GELOGE(INTERNAL_ERROR, "[%s] Invalid arg memory, max arg count = %u, but expect = %zu", GetName().c_str(), + max_arg_count_, arg_count); return INTERNAL_ERROR; } return SUCCESS; } -std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { - return kAttrAtomicOpParamSize; -} +std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const { return kAttrAtomicOpParamSize; } Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), - "Failed calc tiling data of node %s.", + GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), "Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpAtomicCalculate successfully.", node->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 0447ade7..41ab0d79 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -44,9 +44,7 @@ class AiCoreOpTask { Status LaunchKernel(rtStream_t stream); - const std::string& GetName() const; - - bool GetClearAtomic() const {return clear_atomic_;} + const std::string &GetName() const; protected: Status UpdateTilingInfo(TaskContext &context); @@ -68,7 +66,6 @@ class AiCoreOpTask { std::unique_ptr args_ = nullptr; uint32_t args_size_ = 0; uint32_t block_dim_ = 1; - bool clear_atomic_ = true; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { @@ -87,4 +84,4 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { }; } // namespace hybrid } // namespace ge -#endif //GE_HYBRID_KERNEL_AICORE_OP_TASK_H_ +#endif // GE_HYBRID_KERNEL_AICORE_OP_TASK_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc index b2996435..bad91806 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.cc @@ -34,15 +34,12 @@ const char *AiCoreKernelRegistry::GetUnique(const string &stub_key) { } AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector &task_defs) - : op_desc_(op_desc), task_defs_(task_defs) { -} + : op_desc_(op_desc), task_defs_(task_defs) {} Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool ignore_failure_on_atomic) { GE_CHECK_NOTNULL(op_desc_); if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { - GELOGE(INTERNAL_ERROR, - "[%s] At most 2 task was supported, but got %zu", - op_desc_->GetName().c_str(), + GELOGE(INTERNAL_ERROR, "[%s] At most 2 task was supported, but got %zu", op_desc_->GetName().c_str(), task_defs_.size()); return INTERNAL_ERROR; } @@ -51,38 +48,32 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr &node_task, bool i if (ExpectAtomicAddrCleanTask()) { if (task_defs_.size() != kNumTaskWithAtomicAddrCleanTask) { if (ignore_failure_on_atomic) { - GELOGI("[%s] AtomicAddrClean task was expected, but got %zu task_defs", - op_desc_->GetName().c_str(), + GELOGI("[%s] AtomicAddrClean task was expected, but got %zu task_defs", op_desc_->GetName().c_str(), task_defs_.size()); return SUCCESS; } else { - GELOGE(INTERNAL_ERROR, - "[%s] AtomicAddrClean task was expected, but got %zu task_defs", - op_desc_->GetName().c_str(), - task_defs_.size()); + GELOGE(INTERNAL_ERROR, "[%s] AtomicAddrClean task was expected, but got %zu task_defs", + op_desc_->GetName().c_str(), task_defs_.size()); return INTERNAL_ERROR; } } GELOGD("[%s] Build AtomicAddrClean task.", op_desc_->GetName().c_str()); - auto atomic_task = - std::unique_ptr(new(std::nothrow)AtomicAddrCleanOpTask()); + auto atomic_task = std::unique_ptr(new (std::nothrow) AtomicAddrCleanOpTask()); GE_CHECK_NOTNULL(atomic_task); - GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), - "[%s] Failed to init task for AtomicAddrClean", + GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); op_tasks.emplace_back(std::move(atomic_task)); } // build aicore task - auto aicore_task = std::unique_ptr(new(std::nothrow)AiCoreOpTask()); + auto aicore_task = std::unique_ptr(new (std::nothrow) AiCoreOpTask()); GE_CHECK_NOTNULL(aicore_task); - GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), - "[%s] Failed to init task for AtomicAddrClean", + GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), "[%s] Failed to init task for AtomicAddrClean", op_desc_->GetName().c_str()); op_tasks.emplace_back(std::move(aicore_task)); - node_task.reset(new(std::nothrow)AiCoreNodeTask(std::move(op_tasks))); + node_task.reset(new (std::nothrow) AiCoreNodeTask(std::move(op_tasks))); GE_CHECK_NOTNULL(node_task); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_task_builder.h b/ge/hybrid/node_executor/aicore/aicore_task_builder.h index 92db809d..4610e57a 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_builder.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_builder.h @@ -57,4 +57,4 @@ class AiCoreTaskBuilder { }; } // namespace hybrid } // namespace ge -#endif //GE_HYBRID_KERNEL_AICORE_TASK_BUILDER_H_ +#endif // GE_HYBRID_KERNEL_AICORE_TASK_BUILDER_H_ diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc index ed92ada7..588f179d 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc @@ -17,7 +17,6 @@ #include "aicore_task_compiler.h" #include "framework/common/debug/log.h" #include "graph/debug/ge_attr_define.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" namespace ge { namespace hybrid { @@ -25,32 +24,29 @@ namespace { uintptr_t kWeightBase = 0x10000000; uintptr_t kMemBase = 0x20000000; uint64_t kFakeSize = 0x10000000UL; -} +} // namespace std::mutex AiCoreTaskCompiler::mu_; AiCoreTaskCompiler::AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store) : aic_kernel_store_(std::move(aic_kernel_store)) {} -Status AiCoreTaskCompiler::DoCompileOp(const NodePtr &node) const { +Status AiCoreTaskCompiler::DoCompileOp(OpsKernelInfoStore &ops_store, const NodePtr &node) { GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(aic_kernel_store_); vector node_vec; node_vec.emplace_back(node); - GE_CHK_STATUS_RET(aic_kernel_store_->CompileOpRun(node_vec), - "Failed to execute CompileOp, node = %s", + GE_CHK_STATUS_RET(ops_store.CompileOpRun(node_vec), "Failed to execute CompileOp, node = %s", node->GetName().c_str()); - GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().CalcOpRunningParam(*node), - "Failed to execute CalcOpRunningParam, node = %s", + GE_CHK_STATUS_RET(ops_store.CalcOpRunningParam(*node), "Failed to execute CalcOpRunningParam, node = %s", node->GetName().c_str()); return SUCCESS; } -Status AiCoreTaskCompiler::CompileOp(const NodePtr &node, std::vector &tasks) { +Status AiCoreTaskCompiler::CompileOp(const NodePtr &node, std::vector &tasks) const { GE_CHECK_NOTNULL(node); GELOGI("AiCoreTaskCompiler(%s) CompileOp Start.", node->GetName().c_str()); + GE_CHECK_NOTNULL(aic_kernel_store_); - - GE_CHK_STATUS_RET_NOLOG(DoCompileOp(node)); + GE_CHK_STATUS_RET_NOLOG(DoCompileOp(*aic_kernel_store_, node)); GELOGD("successfully compiled op: %s", node->GetName().c_str()); auto op_desc = node->GetOpDesc(); @@ -60,13 +56,13 @@ Status AiCoreTaskCompiler::CompileOp(const NodePtr &node, std::vectorSetOutputOffset(output_offsets); std::vector workspaces(op_desc->GetWorkspaceBytes().size(), kMemBase); op_desc->SetWorkspace(std::move(workspaces)); - GE_CHK_STATUS_RET_NOLOG(DoGenerateTask(*node, tasks)); + GE_CHK_STATUS_RET_NOLOG(DoGenerateTask(*aic_kernel_store_, *node, tasks)); GELOGD("successfully generated task: %s", node->GetName().c_str()); GELOGI("AiCoreTaskCompiler(%s) CompileOp End.", node->GetName().c_str()); return SUCCESS; } -Status AiCoreTaskCompiler::DoGenerateTask(const Node &node, +Status AiCoreTaskCompiler::DoGenerateTask(OpsKernelInfoStore &store, const Node &node, std::vector &tasks) { rtModel_t rt_model_ = nullptr; GE_CHK_RT_RET(rtModelCreate(&rt_model_, 0)); @@ -87,7 +83,7 @@ Status AiCoreTaskCompiler::DoGenerateTask(const Node &node, Status ret; { std::lock_guard lk(mu_); - ret = OpsKernelBuilderManager::Instance().GenerateTask(node, context, tasks); + ret = store.GenerateTask(node, context, tasks); } GE_CHK_STATUS(ret, "Failed to execute GenerateTask, node = %s", node.GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index 38ed458f..39673188 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -27,13 +27,14 @@ class AiCoreTaskCompiler { explicit AiCoreTaskCompiler(OpsKernelInfoStorePtr aic_kernel_store); ~AiCoreTaskCompiler() = default; - Status CompileOp(const NodePtr &node, std::vector &tasks); + Status CompileOp(const NodePtr &node, std::vector &tasks) const; + private: - Status DoCompileOp(const NodePtr &node) const; - Status DoGenerateTask(const Node &node, std::vector &tasks); + static Status DoCompileOp(OpsKernelInfoStore &store, const NodePtr &node); + static Status DoGenerateTask(OpsKernelInfoStore &store, const Node &node, std::vector &tasks); OpsKernelInfoStorePtr aic_kernel_store_; static std::mutex mu_; }; } // namespace hybrid } // namespace ge -#endif //GE_HYBRID_KERNEL_AICORE_TASK_COMPILER_H_ +#endif // GE_HYBRID_KERNEL_AICORE_TASK_COMPILER_H_ diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc index 3974e29b..1d6c464f 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -24,7 +24,7 @@ namespace hybrid { namespace { // if dim count is not reach kMaxShapeDims(8), use INT64_MIN to mark dim end. constexpr int64_t kDimEndFlag = INT64_MIN; -} +} // namespace Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GELOGI("Node[%s] parse ext info start.", node_name_.c_str()); @@ -34,10 +34,10 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { } ext_info_len_ = ext_info.size(); - ext_info_.reset(new(std::nothrow)uint8_t[ext_info_len_]); + ext_info_.reset(new (std::nothrow) uint8_t[ext_info_len_]); GE_CHECK_NOTNULL(ext_info_); - (void) memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()); + (void)memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()); input_shape_and_type_.clear(); output_shape_and_type_.clear(); @@ -58,8 +58,8 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed."); break; default: - GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", - node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); + GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoType, + aicpu_ext_info->infoLen); break; } offset += sizeof(AicpuExtInfo); @@ -75,14 +75,14 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), PARAM_INVALID, - "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", - node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen); + "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", node_name_.c_str(), + sizeof(int32_t), aicpu_ext_info->infoLen); auto type = reinterpret_cast(aicpu_ext_info->infoMsg); GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, PARAM_INVALID, - "Node[%s] parse ext shape type failed as need %d but %d.", - node_name_.c_str(), unknown_type_, *type); + "Node[%s] parse ext shape type failed as need %d but %d.", node_name_.c_str(), unknown_type_, + *type); GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); return SUCCESS; } @@ -105,8 +105,8 @@ Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { if (unknown_type_ == DEPEND_COMPUTE) { - GELOGD("Node[%s] is depend compute type no need ext output shape, ignore it, infoLen=%u.", - node_name_.c_str(), aicpu_ext_info->infoLen); + GELOGD("Node[%s] is depend compute type no need ext output shape, ignore it, infoLen=%u.", node_name_.c_str(), + aicpu_ext_info->infoLen); return SUCCESS; } auto need_len = output_num_ * sizeof(AicpuShapeAndType); @@ -128,8 +128,7 @@ Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const const auto &shape = input_desc.GetShape(); GE_CHK_STATUS_RET(UpdateShapeAndType(shape, input_desc.GetDataType(), input_shape_and_type_[input_index]), - "Node[%s] input[%u] update input shape and type failed.", - node_name_.c_str(), input_index); + "Node[%s] input[%u] update input shape and type failed.", node_name_.c_str(), input_index); return SUCCESS; } @@ -145,12 +144,12 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons std::vector> range; auto range_ret = output_desc.GetShapeRange(range); GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR, - "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", - node_name_.c_str(), range_ret); + "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", node_name_.c_str(), + range_ret); for (size_t k = 0; k < range.size(); ++k) { if (shape.GetDim(k) < 0 && k < range.size()) { - GELOGD("Node[%s] output[%u] update dim[%zu] from %ld to range max %ld.", - node_name_.c_str(), output_index, k, shape.GetDim(k), range[k].second); + GELOGD("Node[%s] output[%u] update dim[%zu] from %ld to range max %ld.", node_name_.c_str(), output_index, k, + shape.GetDim(k), range[k].second); shape.SetDim(k, range[k].second); } } @@ -171,8 +170,8 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da AicpuShapeAndType *shape_and_type) { auto dim_num = shape.GetDimNum(); if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) { - GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", - dim_num, aicpu::FWKAdapter::kMaxShapeDims); + GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", dim_num, + aicpu::FWKAdapter::kMaxShapeDims); return PARAM_INVALID; } size_t index = 0; @@ -187,8 +186,7 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da return SUCCESS; } -void AicpuExtInfoHandler::GetShapeAndType(const AicpuShapeAndType *shape_and_type, - GeShape &shape, +void AicpuExtInfoHandler::GetShapeAndType(const AicpuShapeAndType *shape_and_type, GeShape &shape, DataType &data_type) { std::vector dims; for (uint32_t index = 0; index < aicpu::FWKAdapter::kMaxShapeDims; ++index) { diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h index 9c867cdc..a42678b1 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h @@ -30,20 +30,12 @@ using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; class AicpuExtInfoHandler { public: AicpuExtInfoHandler(std::string node_name, uint32_t input_num, uint32_t output_num, UnknowShapeOpType unknown_type) - : node_name_(std::move(node_name)), - input_num_(input_num), - output_num_(output_num), - unknown_type_(unknown_type) { - } + : node_name_(std::move(node_name)), input_num_(input_num), output_num_(output_num), unknown_type_(unknown_type) {} ~AicpuExtInfoHandler() = default; - uint8_t *GetExtInfo() const { - return ext_info_.get(); - } - size_t GetExtInfoLen() const { - return ext_info_len_; - } + uint8_t *GetExtInfo() const { return ext_info_.get(); } + size_t GetExtInfoLen() const { return ext_info_len_; } Status Parse(const std::string &ext_info); @@ -54,18 +46,13 @@ class AicpuExtInfoHandler { Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); private: - Status ParseExtShapeType(AicpuExtInfo *aicpu_ext_info); Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); - static Status UpdateShapeAndType(const GeShape &shape, - DataType data_type, - AicpuShapeAndType *shape_and_type); + static Status UpdateShapeAndType(const GeShape &shape, DataType data_type, AicpuShapeAndType *shape_and_type); - static void GetShapeAndType(const AicpuShapeAndType *shape_and_type, - GeShape &shape, - DataType &data_type); + static void GetShapeAndType(const AicpuShapeAndType *shape_and_type, GeShape &shape, DataType &data_type); private: const std::string node_name_; @@ -80,4 +67,4 @@ class AicpuExtInfoHandler { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_AICPU_EXT_INFO_H_ \ No newline at end of file +#endif // GE_HYBRID_AICPU_EXT_INFO_H_ \ No newline at end of file diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index fa379ed6..871f1db4 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -20,14 +20,14 @@ #include "graph/load/new_model_manager/model_manager.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" +#include "init/gelib.h" namespace ge { namespace hybrid { namespace { // mem need release constexpr uint64_t kReleaseFlag = 1; -} +} // namespace REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor); REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor); @@ -43,25 +43,24 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info) { if (node_item_->is_dynamic) { // dynamic node must have ext info GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info), - "Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", - node_name_.c_str(), kernel_ext_info.size()); + "Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", node_name_.c_str(), + kernel_ext_info.size()); } // if no ext info no need copy to device. if (kernel_ext_info.empty()) { - GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", - node_name_.c_str(), node_item_->is_dynamic ? "true" : "false"); + GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", node_name_.c_str(), + node_item_->is_dynamic ? "true" : "false"); return SUCCESS; } // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_ext_info.size(), ext_info_addr_dev_), - "Node[%s] alloc kernel_ext_info buf failed, size=%zu", - node_name_.c_str(), kernel_ext_info.size()); + "Node[%s] alloc kernel_ext_info buf failed, size=%zu", node_name_.c_str(), kernel_ext_info.size()); // copy default ext info to device - GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), - kernel_ext_info.data(), kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), kernel_ext_info.data(), + kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } @@ -72,11 +71,8 @@ Status AicpuNodeTaskBase::UpdateOutputShapeFromExtInfo() { return SUCCESS; } // copy to host buf - GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_.GetExtInfo(), - aicpu_ext_handle_.GetExtInfoLen(), - ext_info_addr_dev_->GetData(), - ext_info_addr_dev_->GetSize(), - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_.GetExtInfo(), aicpu_ext_handle_.GetExtInfoLen(), + ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); for (auto i = 0; i < node_item_->num_outputs; ++i) { GeShape shape; @@ -85,19 +81,18 @@ Status AicpuNodeTaskBase::UpdateOutputShapeFromExtInfo() { aicpu_ext_handle_.GetOutputShapeAndType(i, shape, data_type); auto output_desc = node_item_->op_desc->MutableOutputDesc(i); GE_CHECK_NOTNULL(output_desc); - GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, i, output_desc), - "Update node %s [%d]th output shape failed.", + GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, i, output_desc), "Update node %s [%d]th output shape failed.", node_name_.c_str(), i); } return SUCCESS; } -Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(const GeShape &shape_new, - int32_t output_index, GeTensorDescPtr &output_desc) { +Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(const GeShape &shape_new, int32_t output_index, + GeTensorDescPtr &output_desc) { auto shape_old = output_desc->GetShape(); output_desc->SetShape(shape_new); - GELOGI("Update node[%s] out[%d] shape from %s to %s.", node_name_.c_str(), output_index, - shape_old.ToString().c_str(), shape_new.ToString().c_str()); + GELOGI("Update node[%s] out[%d] shape from %s to %s.", node_name_.c_str(), output_index, shape_old.ToString().c_str(), + shape_new.ToString().c_str()); auto origin_shape_old = output_desc->GetOriginShape(); auto origin_format = output_desc->GetOriginFormat(); @@ -108,16 +103,15 @@ Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(const GeShape &shape_new, } // if format is not same need convert shape std::vector origin_dims_new; - auto trans_ret = formats::TransShape(format, shape_new.GetDims(), - output_desc->GetDataType(), origin_format, origin_dims_new); + auto trans_ret = + formats::TransShape(format, shape_new.GetDims(), output_desc->GetDataType(), origin_format, origin_dims_new); GE_CHK_STATUS_RET(trans_ret, "Node[%s] out[%d] originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", node_name_.c_str(), output_index, origin_format, format, shape_new.ToString().c_str()); auto origin_shape_new = GeShape(origin_dims_new); output_desc->SetOriginShape(origin_shape_new); - GELOGI("Node[%s] out[%d] originFormat[%d] is not same as format[%d], need update from %s ro %s.", - node_name_.c_str(), output_index, origin_format, format, - origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); + GELOGI("Node[%s] out[%d] originFormat[%d] is not same as format[%d], need update from %s ro %s.", node_name_.c_str(), + output_index, origin_format, format, origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); return SUCCESS; } @@ -132,8 +126,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { auto input_desc = node_item_->op_desc->MutableInputDesc(i); GE_CHECK_NOTNULL(input_desc); GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateInputShapeAndType(i, *input_desc), - "Node[%s] input[%d] update input shape failed.", - node_name_.c_str(), i); + "Node[%s] input[%d] update input shape failed.", node_name_.c_str(), i); } if (unknown_type_ != DEPEND_COMPUTE) { @@ -142,25 +135,21 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { GE_CHECK_NOTNULL(output_desc); GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateOutputShapeAndType(j, *output_desc), - "Node[%s] output[%d] UpdateOutputShapeAndType failed.", - node_name_.c_str(), j); + "Node[%s] output[%d] UpdateOutputShapeAndType failed.", node_name_.c_str(), j); } } // copy input and output shapes to device - GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), - ext_info_addr_dev_->GetSize(), - aicpu_ext_handle_.GetExtInfo(), - aicpu_ext_handle_.GetExtInfoLen(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), aicpu_ext_handle_.GetExtInfo(), + aicpu_ext_handle_.GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("Node[%s] update ext info end.", node_name_.c_str()); return SUCCESS; } Status AicpuNodeTaskBase::UpdateArgs(TaskContext &context) { - GELOGI("Node[%s] update args begin. is_dynamic=%s, unknown_type=%d", - node_name_.c_str(), node_item_->is_dynamic ? "true" : "false", unknown_type_); + GELOGI("Node[%s] update args begin. is_dynamic=%s, unknown_type=%d", node_name_.c_str(), + node_item_->is_dynamic ? "true" : "false", unknown_type_); if (node_item_->num_inputs == 0 && node_item_->num_outputs == 0) { GELOGI("Node[%s] has no input and output, no need update args.", node_name_.c_str()); return SUCCESS; @@ -205,8 +194,8 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::functionnum_outputs == 0)) { - GELOGI("Node[%s] type[%s] unknown_type is %d, output num is %d.", - node_name_.c_str(), node_item_->node_type.c_str(), unknown_type_, node_item_->num_outputs); + GELOGI("Node[%s] type[%s] unknown_type is %d, output num is %d.", node_name_.c_str(), node_item_->node_type.c_str(), + unknown_type_, node_item_->num_outputs); return SUCCESS; } @@ -214,8 +203,8 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary); for (auto i = 0; i < node_item_->num_outputs; ++i) { GE_CHK_STATUS_RET(AllocTensorBuffer(result_summary_size, output_summary_[i]), - "Node[%s] alloc buffer for result summary info failed, size=%zu.", - node_name_.c_str(), result_summary_size); + "Node[%s] alloc buffer for result summary info failed, size=%zu.", node_name_.c_str(), + result_summary_size); } output_summary_host_.resize(node_item_->num_outputs); @@ -223,22 +212,20 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { // copy task need copy output_data and output_shape, max len is 2 * output_num const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_release_flag_dev_), - "Node[%s] alloc copy task input release_flag failed, size=%zu", - node_name_.c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input release_flag failed, size=%zu", node_name_.c_str(), + copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_data_size_dev_), - "Node[%s] alloc copy task input data_size failed, size=%zu", - node_name_.c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input data_size failed, size=%zu", node_name_.c_str(), + copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_src_dev_), - "Node[%s] alloc copy task input src failed, size=%zu", - node_name_.c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input src failed, size=%zu", node_name_.c_str(), copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_dst_dev_), - "Node[%s] alloc copy task input dst failed, size=%zu", - node_name_.c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input dst failed, size=%zu", node_name_.c_str(), copy_input_buf_len); // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), copy_task_args_buf_), - "Node[%s] alloc copy task args buf failed, size=%zu", - node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL)); + "Node[%s] alloc copy task args buf failed, size=%zu", node_name_.c_str(), + sizeof(STR_FWK_OP_KERNEL)); std::vector copy_io_addr; copy_io_addr.emplace_back(reinterpret_cast(copy_input_release_flag_dev_->GetData())); @@ -251,42 +238,38 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { // can alloc in init, it can reuse GE_CHK_STATUS_RET(AllocTensorBuffer(copy_io_addr_size, copy_ioaddr_dev_), - "Node[%s] alloc copy task io buf failed, size=%zu", - node_name_.c_str(), copy_io_addr_size); + "Node[%s] alloc copy task io buf failed, size=%zu", node_name_.c_str(), copy_io_addr_size); - GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size, - ©_io_addr[0], copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size, ©_io_addr[0], copy_io_addr_size, + RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } Status AicpuTfNodeTask::Init(const HybridModel &model) { GELOGI("Node[%s] init start.", node_name_.c_str()); - GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel_ex(), FAILED, - "Node[%s] is tf node but task def does not has kernel ex.", + GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel_ex(), FAILED, "Node[%s] is tf node but task def does not has kernel ex.", node_name_.c_str()); auto &kernel_ex_def = task_def_.kernel_ex(); auto kernel_workspace_size = kernel_ex_def.task_info().size(); GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_workspace_size, kernel_workspace_), - "Node[%s] alloc buffer for kernel workspace failed, size=%zu.", - node_name_.c_str(), kernel_workspace_size); + "Node[%s] alloc buffer for kernel workspace failed, size=%zu.", node_name_.c_str(), + kernel_workspace_size); - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_->GetData(), kernel_workspace_size, - kernel_ex_def.task_info().data(), kernel_workspace_size, - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(kernel_workspace_->GetData(), kernel_workspace_size, kernel_ex_def.task_info().data(), + kernel_workspace_size, RT_MEMCPY_HOST_TO_DEVICE)); auto input_output_size = (node_item_->num_inputs + node_item_->num_outputs) * sizeof(uint64_t); // alloc input output addr buf, allow alloc size 0 GE_CHK_STATUS_RET(AllocTensorBuffer(input_output_size, input_output_addr_), - "Node[%s] alloc buffer for io addr failed, size=%zu.", - node_name_.c_str(), input_output_size); + "Node[%s] alloc buffer for io addr failed, size=%zu.", node_name_.c_str(), input_output_size); auto &kernel_ext_info = kernel_ex_def.kernel_ext_info(); auto kernel_ext_info_size = kernel_ex_def.kernel_ext_info_size(); GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", - node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size); + "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", node_name_.c_str(), + kernel_ext_info.size(), kernel_ext_info_size); // init ext info GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name_.c_str()); @@ -294,14 +277,14 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { // build fwk_op_kernel. GE_CHK_BOOL_RET_STATUS(sizeof(STR_FWK_OP_KERNEL) >= kernel_ex_def.args_size(), FAILED, - "Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", - node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args_size()); + "Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", node_name_.c_str(), + sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args_size()); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; - errno_t sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), - kernel_ex_def.args().data(), kernel_ex_def.args_size()); - GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, - "Node[%s] memcpy fwk_op_kernel failed, ret: %d.", node_name_.c_str(), sec_ret); + errno_t sec_ret = + memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); + GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, "Node[%s] memcpy fwk_op_kernel failed, ret: %d.", + node_name_.c_str(), sec_ret); fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_->GetData()); fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(input_output_addr_->GetData()); @@ -315,16 +298,15 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = GetStepIdAddr(model); auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID; - GE_CHK_STATUS_RET(EnsureSessionCreated(session_id), "Node[%s] create session id %lu failed.", - node_name_.c_str(), session_id); + GE_CHK_STATUS_RET(EnsureSessionCreated(session_id), "Node[%s] create session id %lu failed.", node_name_.c_str(), + session_id); // alloc kernel_buf_ and copy to device. GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), kernel_buf_), - "Node[%s] alloc buffer for kernel buf failed, size=%zu.", - node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL)); + "Node[%s] alloc buffer for kernel buf failed, size=%zu.", node_name_.c_str(), + sizeof(STR_FWK_OP_KERNEL)); - GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), - &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), + GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("Node[%s] init end.", node_name_.c_str()); @@ -344,8 +326,7 @@ uint64_t AicpuTfNodeTask::GetStepIdAddr(const HybridModel &model) { Status AicpuTfNodeTask::EnsureSessionCreated(uint64_t session_id) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - GE_CHK_STATUS_RET(model_manager->CreateAicpuSession(session_id), - "Create aicpu session %lu failed", session_id); + GE_CHK_STATUS_RET(model_manager->CreateAicpuSession(session_id), "Create aicpu session %lu failed", session_id); return SUCCESS; } @@ -353,23 +334,22 @@ Status AicpuTfNodeTask::ReadResultSummaryAndPrepareMemory(TaskContext &context, std::vector> &out_shape_hbm) { for (auto i = 0; i < node_item_->num_outputs; ++i) { auto &result_summary = output_summary_host_[i]; - GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), - output_summary_[i]->GetData(), output_summary_[i]->GetSize(), - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), output_summary_[i]->GetData(), + output_summary_[i]->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); auto raw_data_size = result_summary.raw_data_size; std::unique_ptr tensor_buffer; GE_CHK_STATUS_RET(AllocTensorBuffer(raw_data_size, tensor_buffer), - "Node[%s] out[%d] alloc tensor buffer failed, raw_data_size=%lu", - node_name_.c_str(), i, raw_data_size); + "Node[%s] out[%d] alloc tensor buffer failed, raw_data_size=%lu", node_name_.c_str(), i, + raw_data_size); auto status = context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release()))); GE_CHK_STATUS_RET(status, "Node[%s] set output %d failed.", node_name_.c_str(), i); auto shape_data_size = result_summary.shape_data_size; std::unique_ptr shape_buffer; GE_CHK_STATUS_RET(AllocTensorBuffer(shape_data_size, shape_buffer), - "Node[%s] out[%d] alloc shape buffer failed, shape_data_size=%lu", - node_name_.c_str(), i, shape_data_size); + "Node[%s] out[%d] alloc shape buffer failed, shape_data_size=%lu", node_name_.c_str(), i, + shape_data_size); out_shape_hbm.emplace_back(std::move(shape_buffer)); } return SUCCESS; @@ -377,41 +357,37 @@ Status AicpuTfNodeTask::ReadResultSummaryAndPrepareMemory(TaskContext &context, Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, const std::vector> &out_shape_hbm) { - GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), - INTERNAL_ERROR, - "Node[%s] has %d outputs but out shape is %zu.", - node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); + GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, + "Node[%s] has %d outputs but out shape is %zu.", node_name_.c_str(), node_item_->num_outputs, + out_shape_hbm.size()); uint64_t copy_num = 0; GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num)); STR_FWK_OP_KERNEL aicpu_task = {0}; std::string task_info; - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] Start"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[GenMemCopyTask] Start"); GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info)); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[GenMemCopyTask] End"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[GenMemCopyTask] End"); std::unique_ptr kernel_workspace_buf; GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf), - "Node[%s] alloc copy task workspace buf failed, size=%zu.", - node_name_.c_str(), task_info.size()); + "Node[%s] alloc copy task workspace buf failed, size=%zu.", node_name_.c_str(), task_info.size()); - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), - task_info.data(), task_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), task_info.data(), task_info.size(), + RT_MEMCPY_HOST_TO_DEVICE)); aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_buf->GetData()); aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), - &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &aicpu_task, + sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); - GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), - RT_KERNEL_DEFAULT, context.GetStream())); + GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, + context.GetStream())); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); GE_CHK_RT_RET(rtStreamSynchronize(context.GetStream())); @@ -430,9 +406,8 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, for (auto i = 0; i < node_item_->num_outputs; ++i) { const auto &summary = output_summary_host_[i]; GELOGI("Node[%s] out[%d] summary, shape data=0x%lx, shape data size=%lu, raw data=0x%lx, raw data size=%lu.", - node_name_.c_str(), i, - summary.shape_data_ptr, summary.shape_data_size, - summary.raw_data_ptr, summary.raw_data_size); + node_name_.c_str(), i, summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, + summary.raw_data_size); if (summary.raw_data_size > 0) { auto output = context.GetOutput(i); GE_CHECK_NOTNULL(output); @@ -456,8 +431,7 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, copy_num = copy_input_release_flag.size(); - GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, - "Node[%s] need copy num is 0", node_name_.c_str()); + GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, "Node[%s] need copy num is 0", node_name_.c_str()); // copy task need copy output and output shape const size_t copy_input_buf_len = copy_num * sizeof(uint64_t); @@ -466,28 +440,31 @@ Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, ©_input_release_flag[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT_RET(rtMemcpy(copy_input_data_size_dev_->GetData(), copy_input_data_size_dev_->GetSize(), ©_input_data_size[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT_RET(rtMemcpy(copy_input_src_dev_->GetData(), copy_input_src_dev_->GetSize(), - ©_input_src[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_->GetData(), copy_input_dst_dev_->GetSize(), - ©_input_dst[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_src_dev_->GetData(), copy_input_src_dev_->GetSize(), ©_input_src[0], + copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_->GetData(), copy_input_dst_dev_->GetSize(), ©_input_dst[0], + copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, std::string &task_info) { - static constexpr const char *const kKernelLibName = "aicpu_tf_kernel"; - auto kernel_builder = OpsKernelBuilderManager::Instance().GetOpsKernelBuilder(kKernelLibName); - GE_CHK_BOOL_RET_STATUS(kernel_builder != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName); - auto ret = kernel_builder->GenMemCopyTask(copy_num, task, task_info); + auto instance_ptr = ge::GELib::GetInstance(); + GE_CHK_BOOL_RET_STATUS(instance_ptr != nullptr && instance_ptr->InitFlag(), GE_CLI_GE_NOT_INITIALIZED, + "GE is not initialized"); + + static constexpr const char *const kKernelLibName = "aicpu_kernel"; + OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kKernelLibName); + GE_CHK_BOOL_RET_STATUS(kernel_info != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName); + auto ret = kernel_info->GenMemCopyTask(copy_num, task, task_info); GE_CHK_STATUS_RET(ret, "Call aicpu GenMemCopyTask failed, copy_num=%lu, ret=%u", copy_num, ret); return SUCCESS; } Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm) { - GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), - INTERNAL_ERROR, - "Node[%s] has %d outputs but out shape is %zu", - node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); + GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, + "Node[%s] has %d outputs but out shape is %zu", node_name_.c_str(), node_item_->num_outputs, + out_shape_hbm.size()); for (auto i = 0; i < node_item_->num_outputs; ++i) { const auto &result_summary = output_summary_host_[i]; auto output_desc = node_item_->op_desc->MutableOutputDesc(i); @@ -499,18 +476,17 @@ Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, node_name_.c_str(), i, result_summary.shape_data_size); uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); GELOGI("Node[%s] [%d]th output dim num=%u.", node_name_.c_str(), i, dim_num); - std::unique_ptr shape_addr(new(std::nothrow) int64_t[dim_num]()); + std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); GE_CHECK_NOTNULL(shape_addr); - GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, - shape_hbm->GetData(), shape_hbm->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm->GetData(), + shape_hbm->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { shape_dims.emplace_back(shape_addr[dim_idx]); GELOGD("Node[%s] [%d]th output dim[%u]=%ld.", node_name_.c_str(), i, dim_idx, shape_addr[dim_idx]); } } GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), i, output_desc), - "Node[%s] update [%d]th output shape failed.", - node_name_.c_str(), i); + "Node[%s] update [%d]th output shape failed.", node_name_.c_str(), i); } return SUCCESS; } @@ -520,20 +496,15 @@ Status AicpuTfNodeTask::UpdateShapeAndDataByResultSummary(TaskContext &context) std::vector> out_shape_hbm; GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(context, out_shape_hbm), - "Node[%s] read ResultSummary and update output shape failed.", - node_name_.c_str()); + "Node[%s] read ResultSummary and update output shape failed.", node_name_.c_str()); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), - "[ReadResultSummaryAndPrepareMemory] End"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[ReadResultSummaryAndPrepareMemory] End"); - GE_CHK_STATUS_RET(CopyDataToHbm(context, out_shape_hbm), - "Node[%s] copy data to output failed.", - node_name_.c_str()); + GE_CHK_STATUS_RET(CopyDataToHbm(context, out_shape_hbm), "Node[%s] copy data to output failed.", node_name_.c_str()); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[CopyDataToHbm] End"); - GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(context, out_shape_hbm), - "Node[%s] update shape by hbm buffer failed.", + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(context, out_shape_hbm), "Node[%s] update shape by hbm buffer failed.", node_name_.c_str()); GELOGI("Node[%s] update shape and data by result summary end.", node_name_.c_str()); @@ -546,8 +517,8 @@ Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) { for (auto i = 0; i < node_item_->num_inputs; ++i) { auto inputData = context.GetInput(i); GE_CHECK_NOTNULL(inputData); - GELOGD("Node[%s] input[%d] addr = %p, size = %zu", node_name_.c_str(), i, - inputData->GetData(), inputData->GetSize()); + GELOGD("Node[%s] input[%d] addr = %p, size = %zu", node_name_.c_str(), i, inputData->GetData(), + inputData->GetSize()); io_addrs.emplace_back(reinterpret_cast(inputData->GetData())); } @@ -559,17 +530,16 @@ Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) { auto outputData = context.GetOutput(j); GE_CHECK_NOTNULL(outputData); - GELOGD("Node[%s] output[%d] addr = %p, size = %zu", - node_name_.c_str(), j, outputData->GetData(), outputData->GetSize()); + GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, outputData->GetData(), + outputData->GetSize()); io_addrs.emplace_back(reinterpret_cast(outputData->GetData())); } } else { // unknown type 4 use result summary update ioaddr. GELOGI("Node[%s] is depend compute node, use result summary as out addr.", node_name_.c_str()); - GE_CHK_BOOL_RET_STATUS(output_summary_.size() == static_cast(node_item_->num_outputs), - INTERNAL_ERROR, - "Node[%s] has %d output but %zu output summary.", - node_name_.c_str(), node_item_->num_outputs, output_summary_.size()); + GE_CHK_BOOL_RET_STATUS(output_summary_.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, + "Node[%s] has %d output but %zu output summary.", node_name_.c_str(), + node_item_->num_outputs, output_summary_.size()); for (auto j = 0; j < node_item_->num_outputs; ++j) { void *summary_addr = output_summary_[j]->GetData(); @@ -580,11 +550,8 @@ Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) { // if has input and output, need copy to ioaddr if (!io_addrs.empty()) { // copy input and output to device - GE_CHK_RT_RET(rtMemcpy(input_output_addr_->GetData(), - input_output_addr_->GetSize(), - &io_addrs[0], - sizeof(uint64_t) * io_addrs.size(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(input_output_addr_->GetData(), input_output_addr_->GetSize(), &io_addrs[0], + sizeof(uint64_t) * io_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE)); } return SUCCESS; } @@ -600,8 +567,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { } Status AicpuTfNodeTask::TaskCallback(TaskContext &context) { - GELOGI("Node[%s] task callback start. is_dynamic=%s, unknown_type=%d.", - node_name_.c_str(), node_item_->is_dynamic ? "true" : "false", unknown_type_); + GELOGI("Node[%s] task callback start. is_dynamic=%s, unknown_type=%d.", node_name_.c_str(), + node_item_->is_dynamic ? "true" : "false", unknown_type_); Status callback_ret = SUCCESS; if (node_item_->is_dynamic) { // check need update shape, call update shape. @@ -621,8 +588,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { GELOGI("Node[%s] init start.", node_name.c_str()); GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, - "Node[%s] unknown type[%d] is depend compute, it's not supported now.", - node_name.c_str(), unknown_type_); + "Node[%s] unknown type[%d] is depend compute, it's not supported now.", node_name.c_str(), + unknown_type_); GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel(), FAILED, "Node[%s] task def does not has kernel.", node_name.c_str()); auto &kernel_def = task_def_.kernel(); @@ -630,43 +597,40 @@ Status AicpuNodeTask::Init(const HybridModel &model) { auto &args = kernel_def.args(); args_size_ = kernel_def.args_size(); - GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, - "Node[%s] task def args.size=%zu, but args_size=%u.", + GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, "Node[%s] task def args.size=%zu, but args_size=%u.", node_name.c_str(), args.size(), args_size_); GE_CHK_BOOL_RET_STATUS(args_size_ >= sizeof(aicpu::AicpuParamHead), FAILED, - "Node[%s] task def args_size=%u is less than aicpu param head len=%zu.", - node_name.c_str(), args_size_, sizeof(aicpu::AicpuParamHead)); + "Node[%s] task def args_size=%u is less than aicpu param head len=%zu.", node_name.c_str(), + args_size_, sizeof(aicpu::AicpuParamHead)); - args_.reset(new(std::nothrow) uint8_t[args_size_]()); - GE_CHK_BOOL_RET_STATUS(args_ != nullptr, FAILED, - "Node[%s] malloc args mem failed, args_size_=%u.", - node_name.c_str(), args_size_); + args_.reset(new (std::nothrow) uint8_t[args_size_]()); + GE_CHK_BOOL_RET_STATUS(args_ != nullptr, FAILED, "Node[%s] malloc args mem failed, args_size_=%u.", node_name.c_str(), + args_size_); errno_t sec_ret = memcpy_s(args_.get(), args_size_, args.c_str(), args.size()); - GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, - "Node[%s] copy args failed, ret: %d", node_name_.c_str(), sec_ret); + GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, "Node[%s] copy args failed, ret: %d", node_name_.c_str(), + sec_ret); auto aicpu_param_head = reinterpret_cast(args_.get()); auto io_num = node_item_->num_inputs + node_item_->num_outputs; // check AicpuParamHead ioAddrNum is right. GE_CHK_BOOL_RET_STATUS((aicpu_param_head->ioAddrNum == static_cast(io_num)), PARAM_INVALID, - "Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.", - node_name.c_str(), aicpu_param_head->ioAddrNum, - node_item_->num_inputs, node_item_->num_outputs); + "Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.", node_name.c_str(), + aicpu_param_head->ioAddrNum, node_item_->num_inputs, node_item_->num_outputs); auto mini_len = sizeof(aicpu::AicpuParamHead) + io_num * sizeof(uint64_t); // check args len must over mini len. GE_CHK_BOOL_RET_STATUS((mini_len <= aicpu_param_head->length), PARAM_INVALID, - "Node[%s] param head length=%u, but min len need %zu.", - node_name.c_str(), aicpu_param_head->length, mini_len); + "Node[%s] param head length=%u, but min len need %zu.", node_name.c_str(), + aicpu_param_head->length, mini_len); auto &kernel_ext_info = kernel_def.kernel_ext_info(); auto kernel_ext_info_size = kernel_def.kernel_ext_info_size(); GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", - node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size); + "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", node_name.c_str(), + kernel_ext_info.size(), kernel_ext_info_size); GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name.c_str()); @@ -697,15 +661,15 @@ Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) { for (auto j = 0; j < node_item_->num_outputs; ++j) { auto outputData = context.GetOutput(j); GE_CHECK_NOTNULL(outputData); - GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, - outputData->GetData(), outputData->GetSize()); + GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, outputData->GetData(), + outputData->GetSize()); io_addrs.emplace_back(reinterpret_cast(outputData->GetData())); } auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead); // if has input and output, need copy to ioaddr - error_t cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), - &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); + error_t cpy_ret = + memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); GE_CHK_BOOL_RET_STATUS(cpy_ret == EOK, INTERNAL_ERROR, "Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.", node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); @@ -719,17 +683,16 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { uint32_t flag = RT_KERNEL_DEFAULT; auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), reinterpret_cast(kernel_name.c_str()), - 1, // default core dim is 1 - args_.get(), args_size_, - nullptr, context.GetStream(), flag); + 1, // default core dim is 1 + args_.get(), args_size_, nullptr, context.GetStream(), flag); GE_CHK_RT_RET(rt_ret); GELOGI("Node[%s] launch task end.", node_name_.c_str()); return SUCCESS; } Status AicpuNodeTask::TaskCallback(TaskContext &context) { - GELOGI("Node[%s] task callback start, is_dynamic = %s, unknown_type=%d.", - node_name_.c_str(), node_item_->is_dynamic ? "true" : "false", unknown_type_); + GELOGI("Node[%s] task callback start, is_dynamic = %s, unknown_type=%d.", node_name_.c_str(), + node_item_->is_dynamic ? "true" : "false", unknown_type_); Status callback_ret = SUCCESS; // check need update shape, call update shape. @@ -737,8 +700,7 @@ Status AicpuNodeTask::TaskCallback(TaskContext &context) { // check result callback_ret = UpdateOutputShapeFromExtInfo(); } else { - GELOGI("Node[%s] unknown shape type is %d no need update output shape.", - node_name_.c_str(), unknown_type_); + GELOGI("Node[%s] unknown shape type is %d no need update output shape.", node_name_.c_str(), unknown_type_); } GELOGI("Node[%s] task callback end.", node_name_.c_str()); return callback_ret; @@ -752,8 +714,7 @@ Status AiCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons return status; } -Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, - const NodePtr &node, +Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const { GE_CHECK_NOTNULL(node); GELOGI("Node[%s] load task start.", node->GetName().c_str()); @@ -762,13 +723,13 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, auto task_defs = model.GetTaskDefs(node); GE_CHECK_NOTNULL(task_defs); if (node_item->shape_inference_type != DEPEND_COMPUTE) { - GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, - "Node[%s] task_def num[%zu] != 1", node->GetName().c_str(), (*task_defs).size()); + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", + node->GetName().c_str(), (*task_defs).size()); } else { // The number of tasks of the fourth type operator may be 2 GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1 || (*task_defs).size() == 2, PARAM_INVALID, - "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", - node->GetName().c_str(), (*task_defs).size()); + "Node[%s] DEPEND_COMPUTE task_def num[%zu] != 1 or 2", node->GetName().c_str(), + (*task_defs).size()); } const auto &task_def = (*task_defs)[0]; std::shared_ptr aicpu_task; @@ -779,13 +740,13 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, GELOGI("Node[%s] task type=%u is AicpuNodeTask.", node->GetName().c_str(), task_def.type()); aicpu_task = MakeShared(node_item, task_def); } else { - GELOGE(UNSUPPORTED, "Node[%s] task type=%u is not supported by aicpu node executor.", - node->GetName().c_str(), task_def.type()); + GELOGE(UNSUPPORTED, "Node[%s] task type=%u is not supported by aicpu node executor.", node->GetName().c_str(), + task_def.type()); return UNSUPPORTED; } - GE_CHK_BOOL_RET_STATUS(aicpu_task != nullptr, MEMALLOC_FAILED, - "Load task for node %s failed.", node->GetName().c_str()); + GE_CHK_BOOL_RET_STATUS(aicpu_task != nullptr, MEMALLOC_FAILED, "Load task for node %s failed.", + node->GetName().c_str()); GE_CHK_STATUS_RET(aicpu_task->Init(model), "Node[%s] task init failed.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 7caabd66..8aca6ff7 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -27,12 +27,12 @@ namespace hybrid { class AicpuNodeTaskBase : public NodeTask { public: AicpuNodeTaskBase(const NodeItem *node_item, const domi::TaskDef &task_def) - : node_item_(node_item), task_def_(task_def), - node_name_(node_item->node_name), node_type_(node_item->node_type), + : node_item_(node_item), + task_def_(task_def), + node_name_(node_item->node_name), + node_type_(node_item->node_type), unknown_type_(node_item->shape_inference_type), - aicpu_ext_handle_(node_item->node_name, - node_item->num_inputs, - node_item->num_outputs, + aicpu_ext_handle_(node_item->node_name, node_item->num_inputs, node_item->num_outputs, node_item->shape_inference_type) {} ~AicpuNodeTaskBase() override = default; @@ -42,6 +42,7 @@ class AicpuNodeTaskBase : public NodeTask { Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; + protected: virtual Status InitExtInfo(const std::string &kernel_ext_info); @@ -80,15 +81,13 @@ class AicpuNodeTaskBase : public NodeTask { class AicpuTfNodeTask : public AicpuNodeTaskBase { public: - AicpuTfNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) - : AicpuNodeTaskBase(node_item, task_def) {} + AicpuTfNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) : AicpuNodeTaskBase(node_item, task_def) {} ~AicpuTfNodeTask() override = default; Status Init(const HybridModel &model) override; protected: - Status LaunchTask(TaskContext &context) override; Status TaskCallback(TaskContext &context) override; @@ -108,19 +107,17 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { /// Status ReadResultSummaryAndPrepareMemory(TaskContext &context, std::vector> &out_shape_hbm); - Status CopyDataToHbm(TaskContext &context, - const std::vector> &out_shape_hbm); + Status CopyDataToHbm(TaskContext &context, const std::vector> &out_shape_hbm); - Status UpdateShapeByHbmBuffer(TaskContext &context, - const std::vector> &out_shape_hbm); + Status UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm); - Status PrepareCopyInputs(const TaskContext &context, - const std::vector> &out_shape_hbm, + Status PrepareCopyInputs(const TaskContext &context, const std::vector> &out_shape_hbm, uint64_t ©_num); static Status EnsureSessionCreated(uint64_t session_id); static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, std::string &task_info); static uint64_t GetStepIdAddr(const HybridModel &model); + private: // kernel buf, device mem std::unique_ptr kernel_buf_; @@ -146,15 +143,13 @@ class AicpuTfNodeTask : public AicpuNodeTaskBase { class AicpuNodeTask : public AicpuNodeTaskBase { public: - AicpuNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) - : AicpuNodeTaskBase(node_item, task_def) {} + AicpuNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) : AicpuNodeTaskBase(node_item, task_def) {} ~AicpuNodeTask() override = default; Status Init(const HybridModel &model) override; protected: - Status LaunchTask(TaskContext &context) override; Status TaskCallback(TaskContext &context) override; @@ -171,12 +166,10 @@ class AicpuNodeTask : public AicpuNodeTaskBase { class AiCpuNodeExecutor : public NodeExecutor { public: - Status LoadTask(const HybridModel &model, - const NodePtr &node, - std::shared_ptr &task) const override; + Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; Status PrepareTask(NodeTask &task, TaskContext &context) const override; }; -} -} -#endif //GE_HYBRID_KERNEL_AICPU_NODE_EXECUTOR_H_ +} // namespace hybrid +} // namespace ge +#endif // GE_HYBRID_KERNEL_AICPU_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 3c4065ea..122af0f5 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,23 +29,19 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::COMPILED_SUBGRAPH, KnownNodeExecutor); -Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::function done_callback) { +Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTaskExecuteAsync] Start"); GELOGI("[%s] KnownNodeTask::ExecuteAsync in.", context.GetNodeName()); if (davinci_model_->GetTaskList().size() == 0) { GELOGW("KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo."); // todo if data is connected to netoutput, forward address ? copy data? - if (context.NumInputs() == context.NumOutputs()){ - GELOGW("[%s] KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo.", - context.GetNodeName()); + if (context.NumInputs() == context.NumOutputs()) { + GELOGW("[%s] KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo.", context.GetNodeName()); for (int i = 0; i < context.NumInputs(); ++i) { auto tensor = context.MutableInput(i); GE_CHECK_NOTNULL(tensor); - GE_CHK_STATUS_RET(context.SetOutput(i, *tensor), - "[%s] Failed to set output[%d]", - context.GetNodeName(), - i); + GE_CHK_STATUS_RET(context.SetOutput(i, *tensor), "[%s] Failed to set output[%d]", context.GetNodeName(), i); } } @@ -58,7 +54,8 @@ Status KnownNodeTask:: ExecuteAsync(TaskContext &context, std::functionGetRtModelHandle(), context.GetStream(), 0); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - GELOGE(rt_ret, "rtModelExecute error, ret: hybrid_model_executorOx%X", rt_ret); return FAILED;); + GELOGE(rt_ret, "rtModelExecute error, ret: hybrid_model_executorOx%X", rt_ret); + return FAILED;); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodertModelExecute] End"); GELOGI("rtModelExecute end"); @@ -112,8 +109,8 @@ Status KnownNodeTask::Init(TaskContext &context) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTask_AllocateWorkspace] Start"); GE_CHK_STATUS_RET( - context.AllocateWorkspace(davinci_model_->TotalMemSize(), &buffer, davinci_model_->GetRuntimeParam().mem_base), - "known node task allocate workspace failed."); + context.AllocateWorkspace(davinci_model_->TotalMemSize(), &buffer, davinci_model_->GetRuntimeParam().mem_base), + "known node task allocate workspace failed."); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeTask_AllocateWorkspace] End, size %zu", davinci_model_->TotalMemSize()); bool addr_not_changed = false; @@ -123,15 +120,16 @@ Status KnownNodeTask::Init(TaskContext &context) { davinci_model_->SetKnownNodeAddrNotChanged(addr_not_changed); // update mem base davinci_model_->UpdateMemBase(static_cast(buffer)); - GELOGI("KnownNodeTask::Init mem base is %p, size %u.", - davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size); + GELOGI("KnownNodeTask::Init mem base is %p, size %u.", davinci_model_->GetRuntimeParam().mem_base, + davinci_model_->GetRuntimeParam().mem_size); } if (!load_flag_) { GE_CHK_STATUS_RET(davinci_model_->Init(), "KnownNodeExecutor::InitDavinciModel failed."); load_flag_ = true; } else { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), - davinci_model_->Id()), "KnownNodeTask::Init destroy aicpu kernel failed."); + GE_CHK_STATUS_RET( + ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(), davinci_model_->Id()), + "KnownNodeTask::Init destroy aicpu kernel failed."); } GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName()); return SUCCESS; @@ -152,8 +150,7 @@ Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons return SUCCESS; } -Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, - shared_ptr &task) const { +Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { GELOGI("[%s] KnownNodeExecutor::LoadTask in.", node->GetName().c_str()); GE_CHECK_NOTNULL(node); @@ -180,8 +177,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node Status KnownNodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorExecuteTask] Start"); - GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), - "Failed to execute task. node = %s", + GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), "Failed to execute task. node = %s", context.GetNodeItem().NodeName().c_str()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorExecuteTask] End"); return SUCCESS; diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h index fb1966b4..5847c833 100644 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,15 +27,14 @@ class HybridModel; class KnownNodeTask : public NodeTask { public: - KnownNodeTask(std::shared_ptr davinci_model) - : davinci_model_(davinci_model) - {} + KnownNodeTask(std::shared_ptr davinci_model) : davinci_model_(davinci_model) {} ~KnownNodeTask() {} Status UpdateArgs(TaskContext &context) override; Status ExecuteAsync(TaskContext &context, std::function done_callback) override; Status Init(TaskContext &context) override; + private: std::shared_ptr davinci_model_ = nullptr; bool load_flag_ = false; @@ -47,10 +46,11 @@ class KnownNodeExecutor : public NodeExecutor { Status PrepareTask(NodeTask &task, TaskContext &context) const; Status ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const; ~KnownNodeExecutor() {} + private: std::shared_ptr davinci_model_ = nullptr; }; } // namespace hybrid } // namespace ge -#endif //HYBRID_KNOWN_NODE_EXECUTOR_H_ +#endif // HYBRID_KNOWN_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc index 5f9dde2a..2bf7407c 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.cc +++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "control_op_executor.h" #include "graph/utils/node_utils.h" #include "graph/utils/type_utils.h" @@ -23,27 +24,21 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::CONTROL_OP, ControlOpNodeExecutor); namespace { -template +template Status CopyScalarValueToHost(const TensorValue &tensor, T &value) { GE_CHECK_GE(tensor.GetSize(), sizeof(value)); - GE_CHK_RT_RET(rtMemcpy(&value, - sizeof(value), - tensor.GetData(), - sizeof(value), - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(&value, sizeof(value), tensor.GetData(), sizeof(value), RT_MEMCPY_DEVICE_TO_HOST)); return SUCCESS; } -} +} // namespace -Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph, - TaskContext &task_context, +Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph, TaskContext &task_context, const std::function &done_callback) { GELOGD("[%s] Start to execute subgraph.", subgraph->GetName().c_str()); auto execution_context = const_cast(task_context.GetExecutionContext()); auto executor = MakeShared(subgraph, execution_context); GE_CHECK_NOTNULL(executor); - GE_CHK_STATUS_RET(executor->ExecuteAsync(task_context), - "[%s] Failed to execute partitioned call.", + GE_CHK_STATUS_RET(executor->ExecuteAsync(task_context), "[%s] Failed to execute partitioned call.", subgraph->GetName().c_str()); auto callback = [executor, done_callback]() mutable { @@ -61,12 +56,12 @@ Status ControlOpNodeTask::ExecuteSubgraph(const GraphItem *subgraph, Status ControlOpNodeTask::ToBool(const TensorValue &tensor, DataType data_type, bool &value) { switch (data_type) { -#define CASE(DT, T) \ - case (DT): { \ - T val{}; \ - GE_CHK_STATUS_RET(CopyScalarValueToHost(tensor, val)); \ - value = val != 0; \ - break; \ +#define CASE(DT, T) \ + case (DT): { \ + T val{}; \ + GE_CHK_STATUS_RET(CopyScalarValueToHost(tensor, val)); \ + value = val != 0; \ + break; \ } // DT_STRING was handled in CondPass CASE(DT_FLOAT, float) @@ -125,24 +120,19 @@ Status IfOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::functi if (shape.IsScalar()) { auto cond_tensor = task_context.GetInput(kIfCondIndex); GE_CHECK_NOTNULL(cond_tensor); - GE_CHK_STATUS_RET(ToBool(*cond_tensor, data_type, cond_val), - "[%s] Failed to get cond value.", + GE_CHK_STATUS_RET(ToBool(*cond_tensor, data_type, cond_val), "[%s] Failed to get cond value.", task_context.GetNodeName()); } else { // true if num elements is non-zero cond_val = shape.GetShapeSize() != 0; - GELOGD("[%s] Cond tensor shape = [%s], cond value = %d", - task_context.GetNodeName(), - shape.ToString().c_str(), + GELOGD("[%s] Cond tensor shape = [%s], cond value = %d", task_context.GetNodeName(), shape.ToString().c_str(), cond_val); } auto subgraph = cond_val ? then_ : else_; GELOGD("[%s] Taking subgraph [%s] by cond = [%d]", task_context.GetNodeName(), subgraph->GetName().c_str(), cond_val); GE_CHK_STATUS_RET(ExecuteSubgraph(subgraph, task_context, done_callback), - "[%s] Failed to execute subgraph. cond = %d", - task_context.GetNodeName(), - cond_val); + "[%s] Failed to execute subgraph. cond = %d", task_context.GetNodeName(), cond_val); GELOGD("[%s] Done executing with cond = %d successfully.", task_context.GetNodeName(), cond_val); return SUCCESS; @@ -171,8 +161,7 @@ Status CaseOpNodeTask::Init(const NodePtr &node, const HybridModel &model) { const GraphItem *CaseOpNodeTask::SelectBranch(int32_t branch_index) const { // subgraphs_ is non-empty. checked int Init if (branch_index < 0 || static_cast(branch_index) >= subgraphs_.size()) { - GELOGI("Branch index out of range. index = %d, num_subgraphs = %zu, will taking last branch.", - branch_index, + GELOGI("Branch index out of range. index = %d, num_subgraphs = %zu, will taking last branch.", branch_index, subgraphs_.size()); branch_index = subgraphs_.size() - 1; } @@ -186,9 +175,7 @@ Status CaseOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::func int32_t branch_index = 0; GE_CHK_STATUS_RET(CopyScalarValueToHost(*branch_tensor, branch_index)); const GraphItem *subgraph = SelectBranch(branch_index); - GELOGI("[%s] Taking subgraph [%s] by branch = [%d]", - task_context.GetNodeName(), - subgraph->GetName().c_str(), + GELOGI("[%s] Taking subgraph [%s] by branch = [%d]", task_context.GetNodeName(), subgraph->GetName().c_str(), branch_index); std::vector inputs; @@ -199,8 +186,7 @@ Status CaseOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::func inputs.emplace_back(*input_tensor); } - GE_CHK_STATUS_RET(ExecuteSubgraph(subgraph, task_context, done_callback), - "[%s] Failed to execute else-subgraph.", + GE_CHK_STATUS_RET(ExecuteSubgraph(subgraph, task_context, done_callback), "[%s] Failed to execute else-subgraph.", task_context.GetNodeName()); GELOGD("[%s] Done executing subgraph[%d] successfully.", task_context.GetNodeName(), branch_index); @@ -227,17 +213,13 @@ Status WhileOpNodeTask::Init(const NodePtr &node, const HybridModel &model) { Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::function &done_callback) const { if (task_context.NumInputs() != task_context.NumOutputs()) { - GELOGE(INTERNAL_ERROR, - "[%s] Invalid while args. num_inputs = %d, num_outputs = %d", - task_context.GetNodeName(), - task_context.NumInputs(), - task_context.NumOutputs()); + GELOGE(INTERNAL_ERROR, "[%s] Invalid while args. num_inputs = %d, num_outputs = %d", task_context.GetNodeName(), + task_context.NumInputs(), task_context.NumOutputs()); return INTERNAL_ERROR; } bool is_continue = false; - GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), - "[%s] Failed to execute iteration 0.", + GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), "[%s] Failed to execute iteration 0.", task_context.GetNodeName()); if (!is_continue) { for (int i = 0; i < task_context.NumInputs(); ++i) { @@ -268,10 +250,8 @@ Status WhileOpNodeTask::DoExecuteAsync(TaskContext &task_context, const std::fun int iteration = 1; while (true) { GELOGD("[%s] Start to execute, iteration = %d", task_context.GetNodeName(), iteration); - GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), - "[%s] Failed to execute iteration %d.", - task_context.GetNodeName(), - iteration); + GE_CHK_STATUS_RET(ExecuteOneLoop(task_context, is_continue), "[%s] Failed to execute iteration %d.", + task_context.GetNodeName(), iteration); if (!is_continue) { GELOGD("[%s] Quit from loop. current iteration = %d", task_context.GetNodeName(), iteration); @@ -314,21 +294,16 @@ Status WhileOpNodeTask::ExecuteCond(TaskContext &task_context, bool &is_continue GELOGD("[%s] Start to execute cond-subgraph.", task_context.GetNodeName()); GE_CHK_STATUS_RET(executor->ExecuteAsync(inputs, input_desc), "Failed to execute partitioned call."); GELOGD("[%s] Done executing cond-subgraph successfully.", cond_->GetName().c_str()); - GE_CHK_STATUS_RET_NOLOG(task_context.RegisterCallback([executor]() mutable { - executor.reset(); - })); + GE_CHK_STATUS_RET_NOLOG(task_context.RegisterCallback([executor]() mutable { executor.reset(); })); // get cond output GE_CHK_STATUS_RET(executor->Synchronize(), "[%s] Failed to sync cond-subgraph result.", cond_->GetName().c_str()); std::vector cond_outputs; std::vector cond_output_desc_list; - GE_CHK_STATUS_RET(executor->GetOutputs(cond_outputs, cond_output_desc_list), - "[%s] Failed to get cond-output.", + GE_CHK_STATUS_RET(executor->GetOutputs(cond_outputs, cond_output_desc_list), "[%s] Failed to get cond-output.", cond_->GetName().c_str()); if (cond_outputs.size() != kCondOutputSize || cond_output_desc_list.size() != kCondOutputSize) { - GELOGE(INTERNAL_ERROR, - "[%s] Number of cond outputs is invalid. number = %zu", - task_context.GetNodeName(), + GELOGE(INTERNAL_ERROR, "[%s] Number of cond outputs is invalid. number = %zu", task_context.GetNodeName(), cond_outputs.size()); return INTERNAL_ERROR; } @@ -337,15 +312,12 @@ Status WhileOpNodeTask::ExecuteCond(TaskContext &task_context, bool &is_continue const auto &shape = cond_tensor_desc->GetShape(); if (shape.IsScalar()) { auto data_type = cond_tensor_desc->GetDataType(); - GE_CHK_STATUS_RET(ToBool(cond_outputs[0], data_type, is_continue), - "[%s] Failed to get cond value.", + GE_CHK_STATUS_RET(ToBool(cond_outputs[0], data_type, is_continue), "[%s] Failed to get cond value.", task_context.GetNodeName()); } else { // true if num elements is non-zero is_continue = shape.GetShapeSize() > 0; - GELOGD("[%s] Cond tensor shape = [%s], is_continue = %d", - task_context.GetNodeName(), - shape.ToString().c_str(), + GELOGD("[%s] Cond tensor shape = [%s], is_continue = %d", task_context.GetNodeName(), shape.ToString().c_str(), is_continue); } @@ -364,9 +336,7 @@ Status WhileOpNodeTask::MoveOutputs2Inputs(TaskContext &task_context) { auto output_tensor_desc = task_context.MutableOutputDesc(i); GE_CHECK_NOTNULL(output_tensor_desc); - GELOGD("[%s] To update input shape[%d] by output shape. from [%s] to [%s]", - task_context.GetNodeName(), - i, + GELOGD("[%s] To update input shape[%d] by output shape. from [%s] to [%s]", task_context.GetNodeName(), i, task_context.MutableInputDesc(i)->GetShape().ToString().c_str(), output_tensor_desc->GetShape().ToString().c_str()); *task_context.MutableInputDesc(i) = *output_tensor_desc; @@ -376,28 +346,25 @@ Status WhileOpNodeTask::MoveOutputs2Inputs(TaskContext &task_context) { } Status WhileOpNodeTask::ExecuteOneLoop(TaskContext &task_context, bool &is_continue) const { - GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), - "[%s] Failed to execute cond-subgraph", + GE_CHK_STATUS_RET(ExecuteCond(task_context, is_continue), "[%s] Failed to execute cond-subgraph", task_context.GetNodeName()); if (!is_continue) { return SUCCESS; } GELOGD("[%s] Start to execute body-subgraph.", task_context.GetNodeName()); - GE_CHK_STATUS_RET(ExecuteSubgraph(body_, task_context, nullptr), - "[%s] Failed to execute cond-subgraph", task_context.GetNodeName()); + GE_CHK_STATUS_RET(ExecuteSubgraph(body_, task_context, nullptr), "[%s] Failed to execute cond-subgraph", + task_context.GetNodeName()); GELOGD("[%s] Done executing body-subgraph successfully.", task_context.GetNodeName()); // set outputs to inputs for next iteration - GE_CHK_STATUS_RET(MoveOutputs2Inputs(task_context), - "[%s] Failed to move outputs to inputs", + GE_CHK_STATUS_RET(MoveOutputs2Inputs(task_context), "[%s] Failed to move outputs to inputs", task_context.GetNodeName()); return SUCCESS; } -Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, - const NodePtr &node, +Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { auto node_item = model.GetNodeItem(node); GE_CHECK_NOTNULL(node_item); @@ -405,11 +372,11 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, unique_ptr node_task; auto node_type = node->GetType(); if (node_type == IF) { - node_task.reset(new(std::nothrow) IfOpNodeTask()); + node_task.reset(new (std::nothrow) IfOpNodeTask()); } else if (node_type == CASE) { - node_task.reset(new(std::nothrow) CaseOpNodeTask()); + node_task.reset(new (std::nothrow) CaseOpNodeTask()); } else if (node_type == WHILE) { - node_task.reset(new(std::nothrow) WhileOpNodeTask()); + node_task.reset(new (std::nothrow) WhileOpNodeTask()); } else { GELOGE(PARAM_INVALID, "[%s] Unsupported type: %s", node->GetName().c_str(), node_type.c_str()); return PARAM_INVALID; @@ -422,8 +389,6 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model, return SUCCESS; } -Status ControlOpNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { - return SUCCESS; -} +Status ControlOpNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { return SUCCESS; } } // namespace hybrid } // namespace ge \ No newline at end of file diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.h b/ge/hybrid/node_executor/controlop/control_op_executor.h index 7520afd1..68db7e91 100644 --- a/ge/hybrid/node_executor/controlop/control_op_executor.h +++ b/ge/hybrid/node_executor/controlop/control_op_executor.h @@ -33,8 +33,7 @@ class ControlOpNodeTask : public NodeTask { protected: virtual Status DoExecuteAsync(TaskContext &task_context, const std::function &done_callback) const = 0; static Status ToBool(const TensorValue &tensor_value, DataType data_type, bool &value); - static Status ExecuteSubgraph(const GraphItem *subgraph, - TaskContext &task_context, + static Status ExecuteSubgraph(const GraphItem *subgraph, TaskContext &task_context, const std::function &done_callback); }; @@ -59,7 +58,7 @@ class CaseOpNodeTask : public ControlOpNodeTask { Status Init(const NodePtr &node, const HybridModel &model) override; protected: - const GraphItem* SelectBranch(int32_t branch_index) const; + const GraphItem *SelectBranch(int32_t branch_index) const; Status DoExecuteAsync(TaskContext &task_context, const std::function &done_callback) const override; private: @@ -98,4 +97,4 @@ class ControlOpNodeExecutor : public NodeExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_CONTROLOP_CONTROL_OP_EXECUTOR_H_ +#endif // GE_HYBRID_CONTROLOP_CONTROL_OP_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc index ee45964c..cc140b08 100755 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc @@ -27,14 +27,8 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); -const std::unordered_map> - RefInputTask::out_ref_input_index_ = {{DATA, {}}, - {AIPPDATA, {}}, - {RESHAPE, {}}, - {EXPANDDIMS, {}}, - {SQUEEZE, {}}, - {BROADCASTGRADIENTARGS, {}} - }; +const std::unordered_map> RefInputTask::out_ref_input_index_ = { + {DATA, {}}, {AIPPDATA, {}}, {RESHAPE, {}}, {EXPANDDIMS, {}}, {SQUEEZE, {}}, {BROADCASTGRADIENTARGS, {}}}; const std::unordered_set DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; @@ -46,8 +40,7 @@ Status RefInputTask::UpdateArgs(TaskContext &) { Status RefInputTask::Execute(TaskContext &context) { auto iter = out_ref_input_index_.find(node_type_); if (iter == out_ref_input_index_.end()) { - GELOGE(UNSUPPORTED, "node %s type %s can not use RefInputTask.", - node_name_.c_str(), node_type_.c_str()); + GELOGE(UNSUPPORTED, "node %s type %s can not use RefInputTask.", node_name_.c_str(), node_type_.c_str()); return UNSUPPORTED; } @@ -72,8 +65,8 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { auto input = context.GetInput(out_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); - GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", - node_name_.c_str(), node_type_.c_str(), out_index, out_index, input->GetData()); + GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, + out_index, input->GetData()); } GELOGI("node %s type %s ref input one by one end.", node_name_.c_str(), node_type_.c_str()); return SUCCESS; @@ -83,8 +76,8 @@ Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskCont GELOGI("node %s type %s ref input by order begin.", node_name_.c_str(), node_type_.c_str()); int32_t output_num = context.NumOutputs(); if (ref_order.size() != static_cast(output_num)) { - GELOGE(INTERNAL_ERROR, "node %s type %s has %d outputs but only has %zu out ref index.", - node_name_.c_str(), node_type_.c_str(), output_num, ref_order.size()); + GELOGE(INTERNAL_ERROR, "node %s type %s has %d outputs but only has %zu out ref index.", node_name_.c_str(), + node_type_.c_str(), output_num, ref_order.size()); return INTERNAL_ERROR; } for (auto out_index = 0; out_index < output_num; ++out_index) { @@ -92,8 +85,8 @@ Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskCont auto input = context.GetInput(ref_input_index); GE_CHECK_NOTNULL(input); GE_CHK_STATUS_RET(context.SetOutput(out_index, *input)); - GELOGD("node %s type %s output[%d] ref input[%u] addr=%p.", - node_name_.c_str(), node_type_.c_str(), out_index, ref_input_index, input->GetData()); + GELOGD("node %s type %s output[%d] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, + ref_input_index, input->GetData()); } GELOGI("node %s type %s ref input by order end.", node_name_.c_str(), node_type_.c_str()); return SUCCESS; @@ -101,8 +94,8 @@ Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskCont Status RefInputTask::ExecuteAsync(TaskContext &context, std::function done_callback) { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[RefInputTaskExecuteAsync] Start"); - GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s ref input task execute failed", - node_name_.c_str(), node_type_.c_str()); + GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s ref input task execute failed", node_name_.c_str(), + node_type_.c_str()); if (done_callback != nullptr) { // host cpu no need register callback, call it directly. GE_CHK_STATUS_RET(context.TryExecuteCallback(done_callback)); @@ -111,9 +104,7 @@ Status RefInputTask::ExecuteAsync(TaskContext &context, std::function do return SUCCESS; } -bool RefInputTask::IsBelong(const std::string &op_type) { - return out_ref_input_index_.count(op_type) > 0; -} +bool RefInputTask::IsBelong(const std::string &op_type) { return out_ref_input_index_.count(op_type) > 0; } Status DependInputShapeTask::UpdateArgs(TaskContext &) { // no need update args @@ -125,15 +116,14 @@ Status DependInputShapeTask::Execute(TaskContext &context) { std::string node_type = node_->GetType(); auto kernel = factory.Create(node_type); if (kernel == nullptr) { - GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", - node_->GetName().c_str(), node_type.c_str()); + GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", node_->GetName().c_str(), + node_type.c_str()); return UNSUPPORTED; } std::vector outputs; Status compute_ret = kernel->Compute(node_, outputs); if (compute_ret != SUCCESS) { - GELOGE(compute_ret, "node %s type %s compute failed or not imply.", - node_->GetName().c_str(), node_type.c_str()); + GELOGE(compute_ret, "node %s type %s compute failed or not imply.", node_->GetName().c_str(), node_type.c_str()); return compute_ret; } int32_t output_num = context.NumOutputs(); @@ -159,19 +149,15 @@ Status DependInputShapeTask::Execute(TaskContext &context) { return INTERNAL_ERROR; } - GELOGI("node:%s type:%s [%d]th output data=%p, out size=%zu, data size=%zu.", - node_->GetName().c_str(), node_type.c_str(), i, - tensor_value->GetData(), tensor_value->GetSize(), tensor_data.GetSize()); + GELOGI("node:%s type:%s [%d]th output data=%p, out size=%zu, data size=%zu.", node_->GetName().c_str(), + node_type.c_str(), i, tensor_value->GetData(), tensor_value->GetSize(), tensor_data.GetSize()); if (tensor_data.GetSize() > 0) { - GE_CHK_RT_RET(rtMemcpy(tensor_value->MutableData(), - tensor_value->GetSize(), - tensor_data.GetData(), - tensor_data.GetSize(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(tensor_value->MutableData(), tensor_value->GetSize(), tensor_data.GetData(), + tensor_data.GetSize(), RT_MEMCPY_HOST_TO_DEVICE)); } - GELOGI("node:%s type:%s [%d]th set data success, data size=%zu.", - node_->GetName().c_str(), node_type.c_str(), i, tensor_data.GetSize()); + GELOGI("node:%s type:%s [%d]th set data success, data size=%zu.", node_->GetName().c_str(), node_type.c_str(), i, + tensor_data.GetSize()); } return SUCCESS; } @@ -190,9 +176,7 @@ Status DependInputShapeTask::ExecuteAsync(TaskContext &context, std::function 0; -} +bool DependInputShapeTask::IsBelong(const std::string &op_type) { return depend_input_shape_ops_.count(op_type) > 0; } Status GeLocalNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), @@ -202,26 +186,24 @@ Status GeLocalNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) co return status; } -Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, - const NodePtr &node, +Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const { GE_CHECK_NOTNULL(node); std::string node_type = node->GetType(); if (RefInputTask::IsBelong(node_type)) { - GELOGI("node %s type %s is ref input task, use RefInputTask.", - node->GetName().c_str(), node_type.c_str()); + GELOGI("node %s type %s is ref input task, use RefInputTask.", node->GetName().c_str(), node_type.c_str()); task = MakeShared(node); if (task == nullptr) { GELOGE(MEMALLOC_FAILED, "create RefInputTask for node %s failed.", node->GetName().c_str()); return MEMALLOC_FAILED; } } else if (DependInputShapeTask::IsBelong(node_type)) { - GELOGI("node %s type %s is depend input shape task, use DependInputShapeTask.", - node->GetName().c_str(), node_type.c_str()); + GELOGI("node %s type %s is depend input shape task, use DependInputShapeTask.", node->GetName().c_str(), + node_type.c_str()); task = MakeShared(node); if (task == nullptr) { - GELOGE(MEMALLOC_FAILED, "create DependInputShapeTask for node %s type %s failed.", - node->GetName().c_str(), node_type.c_str()); + GELOGE(MEMALLOC_FAILED, "create DependInputShapeTask for node %s type %s failed.", node->GetName().c_str(), + node_type.c_str()); return MEMALLOC_FAILED; } } else if (node_type == CONSTANTOP || node_type == VARIABLE) { @@ -235,8 +217,8 @@ Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, task = MakeShared(tensor); GE_CHECK_NOTNULL(task); } else { - GELOGE(UNSUPPORTED, "node %s type %s is not support in GeLocalNodeExecutor now.", - node->GetName().c_str(), node_type.c_str()); + GELOGE(UNSUPPORTED, "node %s type %s is not support in GeLocalNodeExecutor now.", node->GetName().c_str(), + node_type.c_str()); return UNSUPPORTED; } return SUCCESS; @@ -244,9 +226,7 @@ Status GeLocalNodeExecutor::LoadTask(const HybridModel &model, ConstantNodeTask::ConstantNodeTask(const TensorValue *tensor) : tensor_(tensor) {} -Status ConstantNodeTask::UpdateArgs(TaskContext &context) { - return SUCCESS; -} +Status ConstantNodeTask::UpdateArgs(TaskContext &context) { return SUCCESS; } Status ConstantNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGD("[%s] Start execute.", context.GetNodeName()); diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h index 9de8d0f9..0195e76c 100644 --- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h +++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.h @@ -25,16 +25,14 @@ namespace ge { namespace hybrid { class RefInputTask : public NodeTask { public: - explicit RefInputTask(const NodePtr &node) - : node_name_(node->GetName()), - node_type_(node->GetType()) { - } + explicit RefInputTask(const NodePtr &node) : node_name_(node->GetName()), node_type_(node->GetType()) {} ~RefInputTask() = default; virtual Status UpdateArgs(TaskContext &context) override; virtual Status ExecuteAsync(TaskContext &context, std::function done_callback) override; static bool IsBelong(const std::string &op_type); + private: Status Execute(TaskContext &context); Status RefOneByOne(TaskContext &context); @@ -51,16 +49,17 @@ class RefInputTask : public NodeTask { class DependInputShapeTask : public NodeTask { public: - explicit DependInputShapeTask(const NodePtr &node) : node_(node) { - } + explicit DependInputShapeTask(const NodePtr &node) : node_(node) {} ~DependInputShapeTask() = default; virtual Status UpdateArgs(TaskContext &context) override; virtual Status ExecuteAsync(TaskContext &context, std::function done_callback) override; static bool IsBelong(const std::string &op_type); + private: Status Execute(TaskContext &context); + private: const NodePtr node_; @@ -82,13 +81,11 @@ class ConstantNodeTask : public NodeTask { class GeLocalNodeExecutor : public NodeExecutor { public: - Status PrepareTask(NodeTask &task, TaskContext &context) const override; - virtual Status LoadTask(const HybridModel &model, - const NodePtr &node, + virtual Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_GE_LOCAL_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_KERNEL_GE_LOCAL_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 0d6f52e8..f2cd1888 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include "hybrid/node_executor/hccl/hccl_node_executor.h" #include "common/ge/ge_util.h" #include "common/ge/plugin_manager.h" -#include "common/math/math_util.h" #include "framework/common/debug/ge_log.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" @@ -42,8 +41,8 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do GELOGE(FAILED, "hccl handle is nullptr! "); return FAILED; } - auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function))dlsym( - context.handle_, "EnqueueHcomOpertion"); + auto EnqueueHcomOpertion = + (HcclResult(*)(HcomOpertion, std::function))dlsym(context.handle_, "EnqueueHcomOpertion"); if (EnqueueHcomOpertion == nullptr) { GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function."); if (dlclose(context.handle_) != 0) { @@ -163,13 +162,12 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector(reinterpret_cast(tv->MutableData())); - addr_infos.resize(dims.front()); for (auto idx = 0; idx < dims.front(); ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - addr_infos[idx] = {static_cast(data[line_idx]), data[line_idx + kVarTableIdxAddr], local_addr, - data[line_idx + kVarTableIdxLen]}; - local_addr += data[line_idx + kVarTableIdxLen]; + addr_infos.push_back({static_cast(data[idx * kVarTableRowCnt]), + data[idx * kVarTableRowCnt + kVarTableIdxAddr], local_addr, + data[idx * kVarTableRowCnt + kVarTableIdxLen]}); + local_addr += data[idx * kVarTableRowCnt + kVarTableIdxLen]; } return SUCCESS; @@ -204,8 +200,8 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector done_callback) { GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); auto EnqueueRemoteAccess = - (HcclResult(*)(const string &, const vector &, - std::function))dlsym(context.handle_, "EnqueueRemoteAccess"); + (HcclResult(*)(const string &, const vector &, + std::function))dlsym(context.handle_, "EnqueueRemoteAccess"); if (EnqueueRemoteAccess == nullptr) { GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function."); if (dlclose(context.handle_) != 0) { diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 07dd848b..ddf6eb3a 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ #ifndef HYBRID_HCCL_NODE_EXECUTOR_H_ #define HYBRID_HCCL_NODE_EXECUTOR_H_ -#include "common/opskernel/ge_task_info.h" #include "graph/op_desc.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/node_executor/node_executor.h" diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc index 49fc3de4..1c98abee 100755 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -31,8 +31,8 @@ Status HostNodeTaskBase::UpdateArgs(TaskContext &) { Status HostNodeTaskBase::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGD("[%s] Start execute.", context.GetNodeName()); - GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s, task execute failed.", - node_->GetName().c_str(), node_->GetType().c_str()) + GE_CHK_STATUS_RET(Execute(context), "node:%s type:%s, task execute failed.", node_->GetName().c_str(), + node_->GetType().c_str()) if (done_callback) { GELOGD("[%s] Start invoke callback.", context.GetNodeName()); done_callback(); @@ -49,8 +49,7 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { for (int32_t i = 0; i < context.NumInputs(); ++i) { const auto &input_desc = op_desc->GetInputDesc(i); GE_CHECK_NOTNULL(context.GetInput(i)); - auto in_tensor = MakeShared(input_desc, - reinterpret_cast(context.GetInput(i)->GetData()), + auto in_tensor = MakeShared(input_desc, reinterpret_cast(context.GetInput(i)->GetData()), context.GetInput(i)->GetSize()); GE_CHECK_NOTNULL(in_tensor); in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); @@ -71,9 +70,8 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { } auto tensor = context.GetOutput(i); GE_CHECK_NOTNULL(tensor); - auto out_tensor = MakeShared(output_desc, - reinterpret_cast(tensor->GetData()), - tensor->GetSize()); + auto out_tensor = + MakeShared(output_desc, reinterpret_cast(tensor->GetData()), tensor->GetSize()); GE_CHECK_NOTNULL(out_tensor); out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); @@ -89,24 +87,22 @@ Status HostCpuNodeTask::Execute(TaskContext &context) { RunContext run_context; auto host_kernel = hybrid::host_cpu::KernelFactory::Instance().CreateKernel(node_); if (host_kernel == nullptr) { - GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", - node_->GetName().c_str(), node_->GetType().c_str()); + GELOGE(UNSUPPORTED, "node %s type %s is not supported by host kernel.", node_->GetName().c_str(), + node_->GetType().c_str()); return UNSUPPORTED; } Status compute_ret = host_kernel->Compute(context); if (compute_ret != SUCCESS) { - GELOGE(compute_ret, "node %s type %s compute failed or not imply.", - node_->GetName().c_str(), node_->GetType().c_str()); + GELOGE(compute_ret, "node %s type %s compute failed or not imply.", node_->GetName().c_str(), + node_->GetType().c_str()); return compute_ret; } return SUCCESS; } -Status HostCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { - return task.UpdateArgs(context); -} +Status HostCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { return task.UpdateArgs(context); } Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const { @@ -114,9 +110,7 @@ Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &no auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); auto mem_type = static_cast(HOST_DDR); - for (size_t i = 0; i < op_desc->GetOutputsSize(); i++) { - (void)AttrUtils::SetInt(op_desc->MutableOutputDesc(i), ATTR_OUTPUT_MEMORY_TYPE, mem_type); - } + (void)AttrUtils::SetInt(op_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type); const std::string &name = node->GetName(); const std::string &type = node->GetType(); if (HostCpuEngine::GetInstance().CheckSupported(type)) { @@ -134,4 +128,4 @@ Status HostCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &no return SUCCESS; } } // namespace hybrid -} // namespace ge +} // namespace ge \ No newline at end of file diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h index 10657379..036a0c60 100644 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.h @@ -58,10 +58,8 @@ class HostCpuNodeExecutor : public NodeExecutor { public: Status PrepareTask(NodeTask &task, TaskContext &context) const override; - Status LoadTask(const HybridModel &model, - const NodePtr &node, - std::shared_ptr &task) const override; + Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_HOST_CPU_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_KERNEL_HOST_CPU_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc index 3bf71013..3655fcdb 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ const size_t kAssignInputNum = 2; const size_t kAssignRefInputIndex = 0; const size_t kAssignValueInputIndex = 1; const size_t kAssignRefOutputIndex = 0; -} +} // namespace namespace ge { namespace hybrid { @@ -37,19 +37,19 @@ Status AssignKernel::Compute(TaskContext& context) { const auto value_tensor = context.GetInput(kAssignValueInputIndex); GE_CHECK_NOTNULL(value_tensor); if (value_tensor->GetSize() > ref_tensor->GetSize()) { - GELOGE(INTERNAL_ERROR, "[%s] value_input_size=%zu, but ref_input_size=%zu.", - node_->GetName().c_str(), value_tensor->GetSize(), ref_tensor->GetSize()); + GELOGE(INTERNAL_ERROR, "[%s] value_input_size=%zu, but ref_input_size=%zu.", node_->GetName().c_str(), + value_tensor->GetSize(), ref_tensor->GetSize()); return INTERNAL_ERROR; } - GELOGI("[%s] value_input_data=%p, ref_input_size=%zu, value_input_size=%zu.", - node_->GetName().c_str(), ref_tensor->GetData(), ref_tensor->GetSize(), value_tensor->GetSize()); + GELOGI("[%s] value_input_data=%p, ref_input_size=%zu, value_input_size=%zu.", node_->GetName().c_str(), + ref_tensor->GetData(), ref_tensor->GetSize(), value_tensor->GetSize()); if (value_tensor->GetSize() > 0) { GE_CHK_RT_RET(rtMemcpy(ref_tensor->MutableData(), ref_tensor->GetSize(), value_tensor->GetData(), value_tensor->GetSize(), RT_MEMCPY_HOST_TO_HOST)); } - GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), - "[%s] Failed to set output.", context.GetNodeName()); + GE_CHK_STATUS_RET(context.SetOutput(kAssignRefOutputIndex, *ref_tensor), "[%s] Failed to set output.", + context.GetNodeName()); GELOGI("[%s] compute success.", node_->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h index bfa24325..c3b4862b 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ class AssignKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext& context) override; + Status Compute(TaskContext &context) override; }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel/kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/kernel.h index 0a9f32b7..4fe8f8a3 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ class Kernel { public: Kernel(const NodePtr &node) : node_(node) {} virtual ~Kernel() = default; - virtual Status Compute(TaskContext& context) = 0; + virtual Status Compute(TaskContext &context) = 0; protected: const NodePtr &node_; diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc index ff5a7c6d..47e6e534 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h index 6677ce4a..302a7e16 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/no_op_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ class NoOpKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext& context) override; + Status Compute(TaskContext &context) override; }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc index 37b07e37..7e87c114 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ const char *const kAttrDtype = "dtype"; namespace ge { namespace hybrid { namespace host_cpu { -Status RandomUniformKernel::Compute(TaskContext& context) { +Status RandomUniformKernel::Compute(TaskContext &context) { GELOGI("[%s] compute begin.", node_->GetName().c_str()); int64_t seed = 0; @@ -72,7 +72,7 @@ Status RandomUniformKernel::Compute(TaskContext& context) { template Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, - TaskContext& context) { + TaskContext &context) { GE_CHECK_NOTNULL(op_desc_ptr); // RandomUniformOp has and only has one output int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize(); @@ -80,10 +80,8 @@ Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t s attr.SetMemType(HOST_DDR); auto tensor_size = data_num * sizeof(T); TensorValue tensor; - GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), - "[%s] Failed to allocate output of size %zu", - context.GetNodeName(), - tensor_size); + GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), "[%s] Failed to allocate output of size %zu", + context.GetNodeName(), tensor_size); auto *buf = reinterpret_cast(tensor.MutableData()); int64_t final_seed; @@ -108,7 +106,7 @@ Status RandomUniformKernel::Generate(const ge::OpDescPtr &op_desc_ptr, int64_t s } Status RandomUniformKernel::GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, - TaskContext& context) { + TaskContext &context) { GE_CHECK_NOTNULL(op_desc_ptr); // RandomUniformOp has and only has one output int64_t data_num = op_desc_ptr->GetOutputDesc(0).GetShape().GetShapeSize(); @@ -116,10 +114,8 @@ Status RandomUniformKernel::GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64 attr.SetMemType(HOST_DDR); auto tensor_size = data_num * sizeof(fp16_t); TensorValue tensor; - GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), - "[%s] Failed to allocate output of size %zu", - context.GetNodeName(), - tensor_size); + GE_CHK_STATUS_RET(context.AllocateTensor(tensor_size, tensor, &attr), "[%s] Failed to allocate output of size %zu", + context.GetNodeName(), tensor_size); auto *buf = reinterpret_cast(tensor.MutableData()); int64_t final_seed; diff --git a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h index 30557064..7024b103 100755 --- a/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,13 +33,13 @@ class RandomUniformKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext& context) override; + Status Compute(TaskContext &context) override; private: template - Status Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context); + Status Generate(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext &context); - static Status GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext& context); + static Status GenerateFP16(const ge::OpDescPtr &op_desc_ptr, int64_t seed, int64_t seed2, TaskContext &context); }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc index 2a836458..db5c0f9c 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h index f20d6221..1625e49e 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h +++ b/ge/hybrid/node_executor/host_cpu/kernel/variable_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ class VariableKernel : public Kernel { * @brief compute for node_task. * @return result */ - Status Compute(TaskContext& context) override; + Status Compute(TaskContext &context) override; }; } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/host_cpu/kernel_factory.cc b/ge/hybrid/node_executor/host_cpu/kernel_factory.cc index aabae999..83899fa6 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel_factory.cc +++ b/ge/hybrid/node_executor/host_cpu/kernel_factory.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/hybrid/node_executor/host_cpu/kernel_factory.h b/ge/hybrid/node_executor/host_cpu/kernel_factory.h index d03f12fc..4923756b 100644 --- a/ge/hybrid/node_executor/host_cpu/kernel_factory.h +++ b/ge/hybrid/node_executor/host_cpu/kernel_factory.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,10 +76,8 @@ class KernelRegistrar { KernelRegistrar &operator=(KernelRegistrar &&) = delete; }; -#define REGISTER_KERNEL_CREATOR(type, clazz) \ - std::shared_ptr Creator_##type##Kernel(const NodePtr &node) { \ - return MakeShared(node); \ - } \ +#define REGISTER_KERNEL_CREATOR(type, clazz) \ + std::shared_ptr Creator_##type##Kernel(const NodePtr &node) { return MakeShared(node); } \ KernelRegistrar g_##type##Kernel_creator(#type, Creator_##type##Kernel) } // namespace host_cpu } // namespace hybrid diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index fdfdfb51..8de15ea0 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -19,31 +19,27 @@ #include "graph/utils/node_utils.h" #include "init/gelib.h" #include "hybrid/model/hybrid_model.h" -#include "graph/debug/ge_attr_define.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" namespace ge { namespace hybrid { namespace { const char *const kEngineNameAiCore = "AIcoreEngine"; const char *const kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE"; -const char *const kEngineNameAiCpu = "aicpu_ascend_kernel"; -const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel"; +const char *const kEngineNameAiCpu = "aicpu_kernel"; const char *const kEngineNameHccl = "ops_kernel_info_hccl"; const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE"; -} +} // namespace Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); - GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws + GE_CHK_STATUS_RET_NOLOG(task.UpdateTilingData(context)); // update op_desc before alloc ws GE_CHK_STATUS_RET_NOLOG(context.AllocateWorkspaces()); GE_CHK_STATUS_RET_NOLOG(task.UpdateArgs(context)); return SUCCESS; } Status NodeExecutor::ExecuteTask(NodeTask &task, TaskContext &context, const std::function &callback) const { - GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), - "Failed to execute task. node = %s", + GE_CHK_STATUS_RET(task.ExecuteAsync(context, callback), "Failed to execute task. node = %s", context.GetNodeItem().NodeName().c_str()); return SUCCESS; } @@ -65,12 +61,23 @@ Status NodeExecutorManager::EnsureInitialized() { engine_mapping_.emplace(kEngineNameAiCore, NodeExecutorManager::ExecutorType::AICORE); engine_mapping_.emplace(kEngineNameGeLocal, NodeExecutorManager::ExecutorType::GE_LOCAL); - engine_mapping_.emplace(kEngineNameAiCpuTf, NodeExecutorManager::ExecutorType::AICPU_TF); engine_mapping_.emplace(kEngineNameAiCpu, NodeExecutorManager::ExecutorType::AICPU_TF); engine_mapping_.emplace(kEngineNameHccl, NodeExecutorManager::ExecutorType::HCCL); engine_mapping_.emplace(kEngineNameRts, NodeExecutorManager::ExecutorType::RTS); engine_mapping_.emplace(kEngineNameHostCpu, NodeExecutorManager::ExecutorType::HOST_CPU); + std::shared_ptr instance_ptr = GELib::GetInstance(); + if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) { + GELOGW("GELib not initialized"); + return FAILED; + } + + OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj(); + for (auto &it : ops_kernel_manager.GetAllOpsKernelInfoStores()) { + GELOGD("add kernel store: %s", it.first.c_str()); + kernel_stores_.emplace(it.first, it.second.get()); + } + initialized_ = true; GELOGI("Initializing NodeExecutors successfully"); return SUCCESS; @@ -79,13 +86,8 @@ Status NodeExecutorManager::EnsureInitialized() { NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node &node) const { auto op_type = node.GetType(); if (op_type == PARTITIONEDCALL) { - const auto &subgraph = NodeUtils::GetSubgraph(node, 0); - if (subgraph != nullptr && subgraph->GetGraphUnknownFlag()) { - GELOGD("node %s was marked as unknown shape in node_executor.", node.GetName().c_str()); - return ExecutorType::DYNAMIC_SUBGRAPH; - } bool is_dynamic = false; - (void) NodeUtils::GetNodeUnknownShapeStatus(node, is_dynamic); + (void)NodeUtils::GetNodeUnknownShapeStatus(node, is_dynamic); if (is_dynamic) { return ExecutorType::DYNAMIC_SUBGRAPH; } @@ -101,7 +103,7 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node return ExecutorType::CONTROL_OP; } - auto op_desc = node.GetOpDesc(); // checked before + auto op_desc = node.GetOpDesc(); // checked before const auto &lib_name = op_desc->GetOpKernelLibName(); auto it = engine_mapping_.find(lib_name); if (it == engine_mapping_.end()) { @@ -142,6 +144,13 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { TensorUtils::SetSize(*(output_tensor.get()), 0); } + auto it = kernel_stores_.find(op_desc->GetOpKernelLibName()); + if (it == kernel_stores_.end()) { + GELOGE(INTERNAL_ERROR, "Failed to get OpKernelStore. libName = %s, node = %s", + op_desc->GetOpKernelLibName().c_str(), op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + // calc hccl output size independent, hccl ops kernel manager should GetSize for // input which is the output size of input-op, but sometimes return error // when multi-thread @@ -154,8 +163,8 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { int64_t output_mem_size = 0; GE_CHK_STATUS_RET(TensorUtils::CalcTensorMemSize(output_shape, format, data_type, output_mem_size), "hccl calc tensor mem size failed."); - output_mem_size = ((output_mem_size + - MEMORY_ALIGN_RATIO * MEMORY_ALIGN_SIZE - 1) / MEMORY_ALIGN_SIZE) * MEMORY_ALIGN_SIZE; + output_mem_size = + ((output_mem_size + MEMORY_ALIGN_RATIO * MEMORY_ALIGN_SIZE - 1) / MEMORY_ALIGN_SIZE) * MEMORY_ALIGN_SIZE; TensorUtils::SetSize(output_tensor, output_mem_size); GE_CHK_STATUS_RET(op_desc->UpdateOutputDesc(static_cast(i), output_tensor), "hccl update output size failed."); @@ -164,8 +173,7 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { } return SUCCESS; } - - return OpsKernelBuilderManager::Instance().CalcOpRunningParam(node); + return it->second->CalcOpRunningParam(node); } Status NodeExecutorManager::InitializeExecutors() { diff --git a/ge/hybrid/node_executor/node_executor.h b/ge/hybrid/node_executor/node_executor.h index c2d32250..79726b09 100644 --- a/ge/hybrid/node_executor/node_executor.h +++ b/ge/hybrid/node_executor/node_executor.h @@ -18,7 +18,7 @@ #define GE_HYBRID_NODE_EXECUTOR_NODE_EXECUTOR_H_ #include "external/ge/ge_api_error_codes.h" -#include "common/opskernel/ops_kernel_builder.h" +#include "common/opskernel/ops_kernel_info_store.h" #include "graph/node.h" #include "task_context.h" @@ -38,26 +38,20 @@ class NodeTask { * @param context instance of TaskContext * @return SUCCESS on success, error code otherwise */ - virtual Status UpdateTilingData(TaskContext &context) { - return SUCCESS; - } + virtual Status UpdateTilingData(TaskContext &context) { return SUCCESS; } /** * Init * @param context instance of TaskContext * @return SUCCESS on success, error code otherwise */ - virtual Status Init(TaskContext &context) { - return SUCCESS; - } + virtual Status Init(TaskContext &context) { return SUCCESS; } /** * Whether this task supports dynamic shape * @return true if this task supports dynamic shape, false otherwise */ - virtual bool IsSupportDynamicShape() { - return true; - } + virtual bool IsSupportDynamicShape() { return true; } /** * Update args for execution @@ -85,17 +79,13 @@ class NodeExecutor { * Initialize node executor * @return SUCCESS on success, error code otherwise */ - virtual Status Initialize() { - return SUCCESS; - } + virtual Status Initialize() { return SUCCESS; } /** * Finalize node executor * @return SUCCESS on success, error code otherwise */ - virtual Status Finalize() { - return SUCCESS; - } + virtual Status Finalize() { return SUCCESS; } /** * Load task in load stage @@ -104,9 +94,7 @@ class NodeExecutor { * @param task generated node task * @return SUCCESS on success, error code otherwise */ - virtual Status LoadTask(const HybridModel &model, - const NodePtr &node, - std::shared_ptr &task) const; + virtual Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const; /** * Compile task in run stage @@ -115,9 +103,7 @@ class NodeExecutor { * @param task generated node task * @return SUCCESS on success, error code otherwise */ - virtual Status CompileTask(const HybridModel &model, - const NodePtr &node, - std::shared_ptr &task) const; + virtual Status CompileTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const; /** * Preparation actions before execution @@ -200,6 +186,7 @@ class NodeExecutorManager { private: std::map> executors_; std::map> builders_; + std::map kernel_stores_; std::map engine_mapping_; std::mutex mu_; bool initialized_ = false; @@ -209,24 +196,21 @@ class NodeExecutorManager { class NodeExecutorRegistrar { public: - NodeExecutorRegistrar(NodeExecutorManager::ExecutorType executor_type, - NodeExecutor *(*builder)()); + NodeExecutorRegistrar(NodeExecutorManager::ExecutorType executor_type, NodeExecutor *(*builder)()); ~NodeExecutorRegistrar() = default; }; } // namespace hybrid } // namespace ge #define REGISTER_NODE_EXECUTOR_BUILDER(engine_type, executor) \ - REGISTER_NODE_EXECUTOR_BUILDER_UNIQ_HELPER(__COUNTER__, engine_type, executor) + REGISTER_NODE_EXECUTOR_BUILDER_UNIQ_HELPER(__COUNTER__, engine_type, executor) #define REGISTER_NODE_EXECUTOR_BUILDER_UNIQ_HELPER(ctr, engine_type, executor) \ - REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) + REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) -#define REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) \ - static ::ge::hybrid::NodeExecutorRegistrar register_##ctr \ - __attribute__((unused)) = \ - ::ge::hybrid::NodeExecutorRegistrar(engine_type, []()->::ge::hybrid::NodeExecutor* { \ - return new (std::nothrow) executor(); \ - }) +#define REGISTER_NODE_EXECUTOR_BUILDER_UNIQ(ctr, engine_type, executor) \ + static ::ge::hybrid::NodeExecutorRegistrar register_##ctr __attribute__((unused)) = \ + ::ge::hybrid::NodeExecutorRegistrar( \ + engine_type, []() -> ::ge::hybrid::NodeExecutor * { return new (std::nothrow) executor(); }) -#endif // GE_HYBRID_NODE_EXECUTOR_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_NODE_EXECUTOR_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc index f01cb21e..4c9cf7bf 100755 --- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc +++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc @@ -21,9 +21,7 @@ namespace ge { namespace hybrid { REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::DYNAMIC_SUBGRAPH, PartitionedCallNodeExecutor); -PartitionedCallNodeTask::PartitionedCallNodeTask(const GraphItem *graph_item) - : graph_item_(graph_item) { -} +PartitionedCallNodeTask::PartitionedCallNodeTask(const GraphItem *graph_item) : graph_item_(graph_item) {} PartitionedCallNodeTask::~PartitionedCallNodeTask() { GELOGD("[%s] PartitionedCallNodeTask destroyed.", graph_item_->GetName().c_str()); @@ -31,21 +29,18 @@ PartitionedCallNodeTask::~PartitionedCallNodeTask() { Status PartitionedCallNodeTask::Init(TaskContext &context) { auto execution_context = const_cast(context.GetExecutionContext()); - subgraph_executor_.reset(new(std::nothrow)SubgraphExecutor(graph_item_, execution_context)); + subgraph_executor_.reset(new (std::nothrow) SubgraphExecutor(graph_item_, execution_context)); GE_CHECK_NOTNULL(subgraph_executor_); return SUCCESS; } Status PartitionedCallNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { - GE_CHK_STATUS_RET(subgraph_executor_->ExecuteAsync(context), - "[%s] Failed to set inputs", graph_item_->GetName().c_str()); + GE_CHK_STATUS_RET(subgraph_executor_->ExecuteAsync(context), "[%s] Failed to set inputs", + graph_item_->GetName().c_str()); - auto callback = [=]() { - Callback(done_callback); - }; + auto callback = [=]() { Callback(done_callback); }; - GE_CHK_STATUS_RET(context.RegisterCallback(callback), - "[%s] Failed to register callback", + GE_CHK_STATUS_RET(context.RegisterCallback(callback), "[%s] Failed to register callback", graph_item_->GetName().c_str()); GELOGD("[%s] Done executing subgraph successfully.", graph_item_->GetName().c_str()); return SUCCESS; @@ -63,19 +58,16 @@ Status PartitionedCallNodeTask::Callback(const std::function &done_callb return SUCCESS; } -Status PartitionedCallNodeTask::UpdateArgs(TaskContext &context) { - return SUCCESS; -} +Status PartitionedCallNodeTask::UpdateArgs(TaskContext &context) { return SUCCESS; } -Status PartitionedCallNodeExecutor::LoadTask(const ge::hybrid::HybridModel &model, - const ge::NodePtr &node, +Status PartitionedCallNodeExecutor::LoadTask(const ge::hybrid::HybridModel &model, const ge::NodePtr &node, std::shared_ptr &task) const { GELOGD("Load dynamic partitioned call: [%s]", node->GetName().c_str()); auto subgraph = NodeUtils::GetSubgraph(*node, 0); GE_CHECK_NOTNULL(subgraph); auto partitioned_call = model.GetSubgraphItem(subgraph); GE_CHECK_NOTNULL(partitioned_call); - task.reset(new(std::nothrow) PartitionedCallNodeTask(partitioned_call)); + task.reset(new (std::nothrow) PartitionedCallNodeTask(partitioned_call)); GE_CHECK_NOTNULL(task); GELOGD("Done loading dynamic partitioned call: [%s]", node->GetName().c_str()); return SUCCESS; diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h index 9ea544a1..fd87d6c1 100644 --- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h +++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h @@ -51,4 +51,4 @@ class PartitionedCallNodeExecutor : public NodeExecutor { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_NODE_EXECUTOR_SUBGRAPH_SUBGRAPH_EXECUTOR_H_ +#endif // GE_HYBRID_NODE_EXECUTOR_SUBGRAPH_SUBGRAPH_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.cc b/ge/hybrid/node_executor/rts/rts_node_executor.cc index 18b875fd..51241e55 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.cc +++ b/ge/hybrid/node_executor/rts/rts_node_executor.cc @@ -36,12 +36,8 @@ Status IdentityNodeTask::DoCopyTensor(TaskContext &context, int index) { auto output = context.MutableOutput(index); GE_CHECK_NOTNULL(input); GE_CHECK_NOTNULL(output); - GE_CHK_RT_RET(rtMemcpyAsync(output->MutableData(), - output->GetSize(), - input->GetData(), - copy_size, - RT_MEMCPY_DEVICE_TO_DEVICE, - context.GetStream())); + GE_CHK_RT_RET(rtMemcpyAsync(output->MutableData(), output->GetSize(), input->GetData(), copy_size, + RT_MEMCPY_DEVICE_TO_DEVICE, context.GetStream())); } else { GELOGW("[%s] index = %d, copy size = 0", context.GetNodeName(), index); } @@ -61,9 +57,7 @@ Status IdentityNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGD("[%s] Start to execute.", context.GetNodeName()); diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.h b/ge/hybrid/node_executor/rts/rts_node_executor.h index 2576b73b..9da28966 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.h +++ b/ge/hybrid/node_executor/rts/rts_node_executor.h @@ -42,4 +42,4 @@ class RtsNodeExecutor : public NodeExecutor { } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ +#endif // GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 29fc777b..e49a2b43 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -24,11 +24,9 @@ namespace ge { namespace hybrid { -TaskContext::TaskContext(GraphExecutionContext *execution_context, - const NodeItem *node_item, +TaskContext::TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, SubgraphContext *subgraph_context) - : node_item_(node_item), execution_context_(execution_context), subgraph_context_(subgraph_context) { -} + : node_item_(node_item), execution_context_(execution_context), subgraph_context_(subgraph_context) {} TaskContext::~TaskContext() { GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); @@ -45,25 +43,19 @@ TaskContext::~TaskContext() { } } -std::unique_ptr TaskContext::Create(const NodeItem &node_item, - GraphExecutionContext *execution_context, +std::unique_ptr TaskContext::Create(const NodeItem &node_item, GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) { GELOGI("[%s] To create task context, input start = %d, num_inputs = %d, output start = %d, num_outputs = %d.", - node_item.NodeName().c_str(), - node_item.input_start, - node_item.num_inputs, - node_item.output_start, + node_item.NodeName().c_str(), node_item.input_start, node_item.num_inputs, node_item.output_start, node_item.num_outputs); if (node_item.input_start < 0 || node_item.output_start < 0) { - GELOGE(INTERNAL_ERROR, - "NodeItem not property initialized. input_start = %d, output_start = %d", - node_item.input_start, - node_item.output_start); + GELOGE(INTERNAL_ERROR, "NodeItem not property initialized. input_start = %d, output_start = %d", + node_item.input_start, node_item.output_start); return nullptr; } - auto task_context = std::unique_ptr( - new(std::nothrow)TaskContext(execution_context, &node_item, subgraph_context)); + auto task_context = + std::unique_ptr(new (std::nothrow) TaskContext(execution_context, &node_item, subgraph_context)); if (task_context == nullptr) { GELOGE(MEMALLOC_FAILED, "[%s] Failed to create instance of TaskContext.", node_item.NodeName().c_str()); return nullptr; @@ -76,13 +68,9 @@ std::unique_ptr TaskContext::Create(const NodeItem &node_item, return task_context; } -int TaskContext::NumInputs() const { - return node_item_->num_inputs; -} +int TaskContext::NumInputs() const { return node_item_->num_inputs; } -int TaskContext::NumOutputs() const { - return node_item_->num_outputs; -} +int TaskContext::NumOutputs() const { return node_item_->num_outputs; } TensorValue *TaskContext::MutableInput(int index) { if (index < 0 || index >= node_item_->num_inputs) { @@ -111,9 +99,7 @@ TensorValue *TaskContext::MutableOutput(int index) { return outputs_start_ + index; } -std::size_t TaskContext::NumWorkspaces() const { - return workspaces_.size(); -} +std::size_t TaskContext::NumWorkspaces() const { return workspaces_.size(); } void *TaskContext::MutableWorkspace(int index) { if (index < 0 || static_cast(index) >= workspaces_.size()) { @@ -189,13 +175,9 @@ Status TaskContext::AllocateTensor(const GeTensorDesc &tensor_desc, TensorValue return SUCCESS; } -Status TaskContext::AllocateOutput(int index, - const GeTensorDesc &tensor_desc, - TensorValue **tensor, +Status TaskContext::AllocateOutput(int index, const GeTensorDesc &tensor_desc, TensorValue **tensor, AllocationAttr *attr) { - GELOGI("To allocate output for node: %s. index = %d, tensor desc = %s", - node_item_->NodeName().c_str(), - index, + GELOGI("To allocate output for node: %s. index = %d, tensor desc = %s", node_item_->NodeName().c_str(), index, TensorDesc2String(tensor_desc).c_str()); if (index < 0 || index >= node_item_->num_outputs) { @@ -211,11 +193,8 @@ Status TaskContext::AllocateOutput(int index, auto it = node_item_->ref_outputs.find(index); if (it != node_item_->ref_outputs.end()) { auto &ref_node = it->second; - GELOGD("source node of %s:%d = %s, op_type = %s", - node_item_->NodeName().c_str(), - index, - ref_node->GetName().c_str(), - ref_node->GetType().c_str()); + GELOGD("source node of %s:%d = %s, op_type = %s", node_item_->NodeName().c_str(), index, + ref_node->GetName().c_str(), ref_node->GetType().c_str()); TensorValue *ref_tensor = execution_context_->model->GetVariable(ref_node->GetName()); GE_CHECK_NOTNULL(ref_tensor); @@ -227,9 +206,7 @@ Status TaskContext::AllocateOutput(int index, outputs_start_[index] = inputs_start_[reuse_input->second]; } else { GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr)); - GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", - node_item_->NodeName().c_str(), - index, + GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", node_item_->NodeName().c_str(), index, outputs_start_[index].GetSize()); } } @@ -250,7 +227,7 @@ Status TaskContext::AllocateOutputs(AllocationAttr *attr) { const auto &output_desc = node_item_->op_desc->MutableOutputDesc(i); GE_CHECK_NOTNULL(output_desc); uint32_t mem_type = 0; - (void)AttrUtils::GetInt(output_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type); + (void)AttrUtils::GetInt(node_item_->op_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type); if (attr == nullptr) { auto tmp_attr = AllocationAttr(0, nullptr, static_cast(mem_type)); GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, &tmp_attr)); @@ -274,9 +251,7 @@ Status TaskContext::AllocateTensor(size_t size, TensorValue &tensor, AllocationA return SUCCESS; } -const NodeItem &TaskContext::GetNodeItem() const { - return *node_item_; -} +const NodeItem &TaskContext::GetNodeItem() const { return *node_item_; } Status TaskContext::SetOutput(int index, const TensorValue &tensor) { if (index < 0 || index >= node_item_->num_outputs) { @@ -284,25 +259,16 @@ Status TaskContext::SetOutput(int index, const TensorValue &tensor) { return PARAM_INVALID; } - GELOGD("Set %s:%d with tensor: %s", - node_item_->NodeName().c_str(), - index, - tensor.DebugString().c_str()); + GELOGD("Set %s:%d with tensor: %s", node_item_->NodeName().c_str(), index, tensor.DebugString().c_str()); outputs_start_[index] = tensor; return SUCCESS; } -rtStream_t TaskContext::GetStream() { - return execution_context_->stream; -} +rtStream_t TaskContext::GetStream() { return execution_context_->stream; } -int64_t TaskContext::GetSessionId() const { - return execution_context_->session_id; -} +int64_t TaskContext::GetSessionId() const { return execution_context_->session_id; } -Status TaskContext::GetStatus() const { - return status_; -} +Status TaskContext::GetStatus() const { return status_; } void TaskContext::SetStatus(Status status) { status_ = status; @@ -344,27 +310,20 @@ Status TaskContext::PropagateOutputs() { auto dst_node_item = dst_input_index_and_node.second; auto input_offset = dst_node_item->input_start + dst_input_idx; GELOGI( - "Propagate output of node %s, output index = %d, dst node = %s, " - "dst_input_index = %d, dst_input_offset = %d.", - node_item_->NodeName().c_str(), - i, - dst_node_item->NodeName().c_str(), - dst_input_idx, - input_offset); + "Propagate output of node %s, output index = %d, dst node = %s, " + "dst_input_index = %d, dst_input_offset = %d.", + node_item_->NodeName().c_str(), i, dst_node_item->NodeName().c_str(), dst_input_idx, input_offset); if (subgraph_context_->all_inputs_.size() <= static_cast(input_offset)) { - GELOGE(INTERNAL_ERROR, - "[%s] input index out of range. index = %d, total input num = %zu", - GetNodeName(), - input_offset, - subgraph_context_->all_inputs_.size()); + GELOGE(INTERNAL_ERROR, "[%s] input index out of range. index = %d, total input num = %zu", GetNodeName(), + input_offset, subgraph_context_->all_inputs_.size()); return INTERNAL_ERROR; } subgraph_context_->all_inputs_[input_offset] = *tensor; if (execution_context_->trace_enabled) { - subgraph_context_->all_inputs_[input_offset].SetName( - node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); + subgraph_context_->all_inputs_[input_offset].SetName(node_item_->NodeName() + "_in_" + + std::to_string(dst_input_idx)); } } } @@ -372,13 +331,9 @@ Status TaskContext::PropagateOutputs() { return SUCCESS; } -const void *TaskContext::GetVarBaseAddr() { - return execution_context_->model->GetVarMemBase(); -} +const void *TaskContext::GetVarBaseAddr() { return execution_context_->model->GetVarMemBase(); } -const char *TaskContext::GetNodeName() const { - return node_item_->NodeName().c_str(); -} +const char *TaskContext::GetNodeName() const { return node_item_->NodeName().c_str(); } void TaskContext::ReleaseInput(int index) { auto input_tensor = MutableInput(index); @@ -404,38 +359,24 @@ GeTensorDescPtr TaskContext::MutableOutputDesc(int index) { return node_item_->op_desc->MutableOutputDesc(static_cast(index)); } -bool TaskContext::IsForceInferShape() const { - return force_infer_shape_; -} +bool TaskContext::IsForceInferShape() const { return force_infer_shape_; } -void TaskContext::SetForceInferShape(bool force_infer_shape) { - force_infer_shape_ = force_infer_shape; -} +void TaskContext::SetForceInferShape(bool force_infer_shape) { force_infer_shape_ = force_infer_shape; } -void TaskContext::NodeDone() { - subgraph_context_->NodeDone(node_item_->node); -} +void TaskContext::NodeDone() { subgraph_context_->NodeDone(node_item_->node); } void TaskContext::OnError(Status error) { subgraph_context_->OnError(error); execution_context_->SetErrorCode(error); } -bool TaskContext::IsTraceEnabled() const { - return execution_context_->trace_enabled; -} +bool TaskContext::IsTraceEnabled() const { return execution_context_->trace_enabled; } -TensorValue *TaskContext::GetVariable(const std::string &name) { - return execution_context_->model->GetVariable(name); -} +TensorValue *TaskContext::GetVariable(const std::string &name) { return execution_context_->model->GetVariable(name); } -uint64_t TaskContext::GetIterationNumber() const { - return iteration_; -} +uint64_t TaskContext::GetIterationNumber() const { return iteration_; } -bool TaskContext::IsDumpEnabled() const { - return execution_context_->dump_enabled; -} +bool TaskContext::IsDumpEnabled() const { return execution_context_->dump_enabled; } Status TaskContext::TryExecuteCallback(const function &callback_fun) const { if (!callback_fun) { @@ -449,8 +390,6 @@ Status TaskContext::TryExecuteCallback(const function &callback_fun) con callback_fun(); return SUCCESS; } -const DumpProperties &TaskContext::GetDumpProperties() const { - return execution_context_->dump_properties; -} +const DumpProperties &TaskContext::GetDumpProperties() const { return execution_context_->dump_properties; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index d52ab0be..ed45116d 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -34,8 +34,7 @@ class SubgraphContext; class TaskContext { public: - static std::unique_ptr Create(const NodeItem &node_item, - GraphExecutionContext *execution_context, + static std::unique_ptr Create(const NodeItem &node_item, GraphExecutionContext *execution_context, SubgraphContext *subgraph_context); ~TaskContext(); @@ -63,9 +62,7 @@ class TaskContext { void OnError(Status error); Status SetOutput(int index, const TensorValue &tensor); - Status AllocateOutput(int index, - const GeTensorDesc &tensor_desc, - TensorValue **tensor, + Status AllocateOutput(int index, const GeTensorDesc &tensor_desc, TensorValue **tensor, AllocationAttr *attr = nullptr); Status AllocateOutputs(AllocationAttr *attr = nullptr); Status AllocateWorkspaces(); @@ -75,11 +72,9 @@ class TaskContext { bool IsDumpEnabled() const; - const DumpProperties& GetDumpProperties() const; + const DumpProperties &GetDumpProperties() const; - const GraphExecutionContext *GetExecutionContext() { - return execution_context_; - } + const GraphExecutionContext *GetExecutionContext() { return execution_context_; } Status AllocateTensor(size_t size, TensorValue &tensor, AllocationAttr *attr = nullptr); void *MutableWorkspace(int index); @@ -99,9 +94,7 @@ class TaskContext { void *handle_ = nullptr; private: - TaskContext(GraphExecutionContext *execution_context, - const NodeItem *node_item, - SubgraphContext *subgraph_context); + TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item, SubgraphContext *subgraph_context); static string TensorDesc2String(const GeTensorDesc &desc); Status AllocateTensor(const GeTensorDesc &tensor_desc, TensorValue &tensor, AllocationAttr *attr); @@ -118,4 +111,4 @@ class TaskContext { }; } // namespace hybrid } // namespace ge -#endif // GE_HYBRID_KERNEL_TASK_CONTEXT_H_ +#endif // GE_HYBRID_KERNEL_TASK_CONTEXT_H_ diff --git a/ge/inc/graph_pass.h b/ge/inc/graph_pass.h index 642b94ea..d4abdd2f 100644 --- a/ge/inc/graph_pass.h +++ b/ge/inc/graph_pass.h @@ -22,7 +22,6 @@ #include "common/op/attr_value_util.h" #include "common/op/ge_op_utils.h" -#include "common/types.h" #include "framework/common/debug/ge_log.h" #include "graph/compute_graph.h" #include "graph/utils/attr_utils.h" @@ -79,8 +78,8 @@ class GraphPass : public Pass { return true; } else if (node->GetOpDesc()->GetType() == FRAMEWORKOP) { string type; - GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type), - return false, "Get original_type for op %s fail!", node->GetName().c_str()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, type), return false, + "Get original_type for op %s fail!", node->GetName().c_str()); GE_IF_BOOL_EXEC(type == CONSTANT, GELOGI("Is const op"); return true); return false; } else { diff --git a/ge/inc/kernel.h b/ge/inc/kernel.h index 84af5234..9f7e1308 100644 --- a/ge/inc/kernel.h +++ b/ge/inc/kernel.h @@ -24,9 +24,9 @@ #include "graph/graph.h" #include "graph/op_desc.h" -using std::vector; -using std::unique_ptr; using std::shared_ptr; +using std::unique_ptr; +using std::vector; namespace ge { /// diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 85a742b2..e00268ea 100755 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,11 +39,9 @@ #include "graph/ge_global_options.h" #include "graph/load/new_model_manager/model_manager.h" #include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "omm/csa_interact.h" #include "runtime/kernel.h" -#include "opskernel_manager/ops_kernel_builder_manager.h" using Json = nlohmann::json; @@ -61,8 +59,6 @@ static std::shared_ptr instancePtr_ = nullptr; // Initial each module of GE, if one failed, release all Status GELib::Initialize(const map &options) { - - GELOGI("initial start"); GEEVENT("[GEPERFTRACE] GE Init Start"); // Multiple initializations are not allowed @@ -78,7 +74,6 @@ Status GELib::Initialize(const map &options) { GELOGE(ret, "GeLib initial failed."); return ret; } - instancePtr_->SetDefaultPrecisionMode(new_options); GetMutableGlobalOptions().insert(new_options.begin(), new_options.end()); GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); GE_TIMESTAMP_START(Init); @@ -129,16 +124,6 @@ Status GELib::InnerInitialize(const map &options) { return initOpsStatus; } - GELOGI("opsBuilderManager initial."); - GE_TIMESTAMP_START(OpsKernelBuilderManagerInitialize); - Status initOpsBuilderStatus = OpsKernelBuilderManager::Instance().Initialize(options); - GE_TIMESTAMP_END(OpsKernelBuilderManagerInitialize, "InnerInitialize::OpsKernelBuilderManager"); - if (initOpsBuilderStatus != SUCCESS) { - GELOGE(initOpsBuilderStatus); - RollbackInit(); - return initOpsBuilderStatus; - } - GELOGI("sessionManager initial."); GE_TIMESTAMP_START(SessionManagerInitialize); Status initSmStatus = sessionManager_.Initialize(options); @@ -210,26 +195,6 @@ void GELib::InitProfiling(Options &options) { } } -void GELib::SetDefaultPrecisionMode(map &new_options) { - auto iter = new_options.find(PRECISION_MODE); - if (iter != new_options.end()) { - GELOGI("Find precision_mode in options, value is %s", iter->second.c_str()); - return; - } - iter = new_options.find(OPTION_GRAPH_RUN_MODE); - if (iter != new_options.end()) { - if (GraphRunMode(std::strtol(iter->second.c_str(), nullptr, kDecimal)) >= TRAIN) { - // only train mode need to be set allow_fp32_to_fp16. - GELOGI("This is train mode, precision_mode need to be set allow_fp32_to_fp16"); - new_options.insert(std::make_pair(PRECISION_MODE, "allow_fp32_to_fp16")); - return; - } - } - GELOGI("This is not train mode, precision_mode need to be set force_fp16"); - new_options.insert(std::make_pair(PRECISION_MODE, "force_fp16")); - return; -} - Status GELib::SetRTSocVersion(const map &options, map &new_options) { GELOGI("Start to set SOC_VERSION"); new_options.insert(options.begin(), options.end()); @@ -316,14 +281,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt std::vector mem_type; mem_type.push_back(RT_MEMORY_HBM); - mem_type.push_back(RT_MEMORY_P2P_DDR); Status initMmStatus = MemManager::Instance().Initialize(mem_type); if (initMmStatus != SUCCESS) { GELOGE(initMmStatus, "[Initialize] MemoryAllocatorManager initialize failed."); return initMmStatus; } - GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); // Update CSA file CsaInteract::GetInstance().Init(options.device_id, GetContext().TraceId()); Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_RUNNING, JOBSUBSTATE_ENV_INIT); @@ -371,13 +334,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithout std::vector mem_type; mem_type.push_back(RT_MEMORY_HBM); - mem_type.push_back(RT_MEMORY_P2P_DDR); Status initMmStatus = MemManager::Instance().Initialize(mem_type); if (initMmStatus != SUCCESS) { GELOGE(initMmStatus, "[Initialize] MemoryAllocatorManager initialize failed."); return initMmStatus; } - GE_CHK_STATUS_RET(HostMemManager::Instance().Initialize()); static bool is_inited = false; if (is_inited) { @@ -418,12 +379,6 @@ Status GELib::Finalize() { final_state = mid_state; } - GELOGI("opsBuilderManager finalization."); - mid_state = OpsKernelBuilderManager::Instance().Finalize(); - if (mid_state != SUCCESS) { - GELOGW("opsBuilderManager finalize failed"); - final_state = mid_state; - } GELOGI("opsManager finalization."); mid_state = opsManager_.Finalize(); if (mid_state != SUCCESS) { @@ -437,9 +392,6 @@ Status GELib::Finalize() { GELOGI("MemManager finalization."); MemManager::Instance().Finalize(); - GELOGI("HostMemManager finalization."); - HostMemManager::Instance().Finalize(); - GELOGI("HostCpuEngine finalization."); HostCpuEngine::GetInstance().Finalize(); @@ -501,7 +453,6 @@ void GELib::RollbackInit() { (void)sessionManager_.Finalize(); } MemManager::Instance().Finalize(); - HostMemManager::Instance().Finalize(); VarManagerPool::Instance().Destory(); } } // namespace ge diff --git a/ge/init/gelib.h b/ge/init/gelib.h index e52b8dd6..b5621dfd 100644 --- a/ge/init/gelib.h +++ b/ge/init/gelib.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,8 +26,8 @@ #include "common/ge_inner_error_codes.h" #include "common/ge_types.h" -using std::string; using std::map; +using std::string; using std::vector; namespace ge { @@ -81,7 +81,6 @@ class GELib { Status InnerInitialize(const map &options); Status SystemInitialize(const map &options); Status SetRTSocVersion(const map &options, map &new_options); - void SetDefaultPrecisionMode(map &new_options); void RollbackInit(); void InitOptions(const map &options); void SetDumpModelOptions(const map &options); diff --git a/ge/ir_build/atc_ir_common.cc b/ge/ir_build/atc_ir_common.cc index f0450f0f..1f8abf37 100755 --- a/ge/ir_build/atc_ir_common.cc +++ b/ge/ir_build/atc_ir_common.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "atc_ir_common.h" #include "common/util/error_manager/error_manager.h" #include "external/ge/ge_api_types.h" @@ -33,7 +34,7 @@ const size_t kMaxNDDimNum = 4; const size_t kMinNDDimNum = 1; // datatype/formats from user to GE, Unified to util interface file later const std::map kOutputTypeSupportDatatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; const char *const kOutputTypeSupport = "only support FP32, FP16, UINT8"; const std::set kBufferOptimizeSupportOption = {"l1_optimize", "l2_optimize", "off_optimize", "l1_and_l2_optimize"}; @@ -67,18 +68,21 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> int32_t size = 0; for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { vector shape = iter->second; - if (shape.empty()) { + if (shape.size() < 1) { ErrorManager::GetInstance().ATCReportErrMessage("E10012"); GELOGE(ge::PARAM_INVALID, "--input_shape's shape size can not be less than 1 when set --dynamic_batch_size."); return false; } - - if (std::count(shape.begin(), shape.end(), kDynamicInputDim) == 0) { - continue; - } - - bool ret = multibatch::CheckDynamicBatchShape(shape, iter->first); - if (ret) { + if (shape[0] == kDynamicInputDim) { + for (size_t i = 1; i < shape.size(); ++i) { + if (shape[i] < 1) { + ErrorManager::GetInstance().ATCReportErrMessage("E10018", {"index", "shape"}, + {std::to_string(i), std::to_string(shape[i])}); + GELOGE(ge::PARAM_INVALID, "Only batch N can be -1 when set --dynamic_batch_size, current shape[%zu] is %ld", + i, shape[i]); + return false; + } + } size++; } } @@ -91,8 +95,8 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> for (char c : dynamic_batch_size) { if (!isdigit(c) && (c != ',') && (c != ' ')) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E10033", {"value", "reason"}, {dynamic_batch_size, kDynamicBatchSizeError}); + ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"value", "reason"}, + {dynamic_batch_size, kDynamicBatchSizeError}); GELOGE(ge::PARAM_INVALID, "Input parameter[--dynamic_batch_size]'s value[%s] is invalid. reason: %s", dynamic_batch_size.c_str(), kDynamicBatchSizeError); return false; @@ -107,7 +111,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map> bool CheckDynamicImagesizeInputShapeValid(unordered_map> shape_map, const std::string input_format, std::string &dynamic_image_size) { int32_t size = 0; - for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { + for (unordered_map>::iterator iter = shape_map.begin(); iter != shape_map.end(); ++iter) { vector shape = iter->second; // only support four dim if (shape.size() != DIM_DEFAULT_SIZE) { @@ -120,14 +124,28 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> continue; } - if (std::count(shape.begin(), shape.end(), kDynamicInputDim) == 0) { - continue; + int64_t height = 0; + int64_t width = 0; + if (input_format == "NCHW") { + height = shape[NCHW_DIM_H]; + width = shape[NCHW_DIM_W]; } - auto ret = multibatch::CheckDynamicImageSizeShape(shape, iter->first, input_format); - if (ret) { + + if (input_format == "NHWC") { + height = shape[NHWC_DIM_H]; + width = shape[NHWC_DIM_W]; + } + + if (height == kDynamicInputDim && width == kDynamicInputDim && + std::count(shape.begin(), shape.end(), kDynamicInputDim) == kDynamicImageSizeNum) { size++; + } else if (std::count(shape.begin(), shape.end(), kDynamicInputDim) == 0) { + continue; } else { - return ret; + ErrorManager::GetInstance().ATCReportErrMessage("E10019"); + GELOGE(ge::PARAM_INVALID, + "--input_shape's shape is invalid, only height and width can be -1 when set --dynamic_image_size."); + return false; } } if (size == 0) { @@ -158,12 +176,12 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map> return true; } -bool CheckDynamicDimsInputShapeValid(const unordered_map> &shape_map, - string input_format, string &dynamic_dims) { +bool CheckDynamicDimsInputShapeValid(const unordered_map> &shape_map, string input_format, + string &dynamic_dims) { if (input_format != "ND") { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--input_format", input_format.c_str(), "input_format must be ND when set dynamic_dims"}); + "E10001", {"parameter", "value", "reason"}, + {"--input_format", input_format.c_str(), "input_format must be ND when set dynamic_dims"}); GELOGE(ge::PARAM_INVALID, "input_format must be ND when set dynamic_dims."); return false; } @@ -173,8 +191,8 @@ bool CheckDynamicDimsInputShapeValid(const unordered_map auto &shapes = info_shapes.second; if (shapes.size() > kMaxNDDimNum || shapes.size() < kMinNDDimNum) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--input_shape's dim", std::to_string(shapes.size()), "Dim num must within [1, 4] when set dynamic_dims"}); + "E10001", {"parameter", "value", "reason"}, + {"--input_shape's dim", std::to_string(shapes.size()), "Dim num must within [1, 4] when set dynamic_dims"}); GELOGE(ge::PARAM_INVALID, "Dim num must within [%zu, %zu] when set dynamic_dims.", kMinNDDimNum, kMaxNDDimNum); return false; } @@ -182,8 +200,8 @@ bool CheckDynamicDimsInputShapeValid(const unordered_map } if (dynamic_dim == 0) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--input_shape's dynamic dim num", "0", "at least one dim should be -1 when set dynamic_dims"}); + "E10001", {"parameter", "value", "reason"}, + {"--input_shape's dynamic dim num", "0", "at least one dim should be -1 when set dynamic_dims"}); GELOGE(ge::PARAM_INVALID, "input_shape's shape is invalid, at least one dim should be -1 when set dynamic_dims."); return false; } @@ -200,8 +218,8 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims EraseEndSemicolon(dynamic_dims); if (dynamic_dims.empty()) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--dynamic_dims", dynamic_dims.c_str(), "dynamic_dims can not be empty"}); + "E10001", {"parameter", "value", "reason"}, + {"--dynamic_dims", dynamic_dims.c_str(), "dynamic_dims can not be empty"}); GELOGE(ge::PARAM_INVALID, "dynamic_dims can not be empty."); return false; } @@ -209,7 +227,7 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims vector split_set = StringUtils::Split(dynamic_dims, ';'); if (split_set.size() > kMaxDynamicDimNum) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10042", {"parameter", "reason"}, {"dynamic_dims", "dynamic_dims's num of parameter set can not exceed 100"}); + "E10042", {"parameter", "reason"}, {"dynamic_dims", "dynamic_dims's num of parameter set can not exceed 100"}); GELOGE(ge::PARAM_INVALID, "dynamic_dims's num of parameter set can not exceed %zu.", kMaxDynamicDimNum); return false; } @@ -217,18 +235,19 @@ bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims vector one_set = StringUtils::Split(split_dim, ','); if (one_set.size() != static_cast(dynamic_dim_num)) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10042", {"parameter", "reason"}, - {"dynamic_dims", "Each gear setting needs to be consistent with the number of -1 in the inputshape"}); - GELOGE(ge::PARAM_INVALID, "Input parameter --dynamic_dims parse failed, " - "reason: Each gear setting needs to be consistent with the number of -1 in the inputshape."); + "E10042", {"parameter", "reason"}, + {"dynamic_dims", "Each gear setting needs to be consistent with the number of -1 in the inputshape"}); + GELOGE(ge::PARAM_INVALID, + "Input parameter --dynamic_dims parse failed, " + "reason: Each gear setting needs to be consistent with the number of -1 in the inputshape."); return false; } for (auto dim : one_set) { for (auto c : dim) { if (!isdigit(c)) { ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--dynamic_dims's parameter", dim.c_str(), "must be positive integer"}); + "E10001", {"parameter", "value", "reason"}, + {"--dynamic_dims's parameter", dim.c_str(), "must be positive integer"}); GELOGE(ge::PARAM_INVALID, "dynamic_dims's parameter must be positive integer."); return false; } @@ -360,9 +379,9 @@ bool ParseInputShape(const string &input_shape, unordered_map caffe_support_input_format = {"NCHW", "ND"}; @@ -37,15 +37,9 @@ static const char *const kTFFormatSupport = "only support NCHW, NHWC, ND, NCDHW, static const char *const kONNXFormatSupport = "only support NCHW, ND in ONNX model"; static std::map input_format_str_to_geformat = { - {"ND", domi::DOMI_TENSOR_ND}, - {"NCHW", domi::DOMI_TENSOR_NCHW}, - {"NHWC", domi::DOMI_TENSOR_NHWC}, - {"CHWN", domi::DOMI_TENSOR_CHWN}, - {"NC1HWC0", domi::DOMI_TENSOR_NC1HWC0}, - {"NHWC1C0", domi::DOMI_TENSOR_NHWC1C0}, - {"NCDHW", domi::DOMI_TENSOR_NCDHW}, - {"NDHWC", domi::DOMI_TENSOR_NDHWC} -}; + {"ND", domi::DOMI_TENSOR_ND}, {"NCHW", domi::DOMI_TENSOR_NCHW}, {"NHWC", domi::DOMI_TENSOR_NHWC}, + {"CHWN", domi::DOMI_TENSOR_CHWN}, {"NC1HWC0", domi::DOMI_TENSOR_NC1HWC0}, {"NHWC1C0", domi::DOMI_TENSOR_NHWC1C0}, + {"NCDHW", domi::DOMI_TENSOR_NCDHW}, {"NDHWC", domi::DOMI_TENSOR_NDHWC}}; static const std::string kEnableCompressWeightTrue = "1"; static const std::string kEnableCompressWeightFalse = "0"; @@ -77,5 +71,5 @@ Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream) Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); void PrintOptionMap(std::map &options, std::string tips); void EraseEndSemicolon(std::string ¶m); -} +} // namespace ge #endif // FRAMEWORK_DOMI_ATC_IR_COMMON_H_ diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 0fd613ed..86b304c1 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "external/ge/ge_ir_build.h" #include @@ -34,7 +35,6 @@ #include "init/gelib.h" #include "ir_build/atc_ir_common.h" #include "model/ge_model.h" -#include "graph/shape_refiner.h" using std::string; using namespace std; @@ -52,58 +52,54 @@ const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; static graphStatus CheckGlobalOptions(std::map &global_options) { // check param disable_reuse_memory - std::string disable_reuse_memory = global_options.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY) == - global_options.end() - ? IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT - : global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY]; + std::string disable_reuse_memory = + global_options.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY) == global_options.end() + ? IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT + : global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY]; GE_CHK_BOOL_EXEC(ge::CheckDisableReuseMemoryParamValid(disable_reuse_memory) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check disable_reuse_memory failed!"); + return ge::GRAPH_PARAM_INVALID, "check disable_reuse_memory failed!"); global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY] = disable_reuse_memory; // check buffer_optimize std::string buffer_optimize = global_options.find(ge::ir_option::BUFFER_OPTIMIZE) == global_options.end() - ? IR_OPTION_BUFFER_OPTIMIZE_DEFAULT - : global_options[ge::ir_option::BUFFER_OPTIMIZE]; - GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(buffer_optimize) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check buffer optimize failed!"); + ? IR_OPTION_BUFFER_OPTIMIZE_DEFAULT + : global_options[ge::ir_option::BUFFER_OPTIMIZE]; + GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(buffer_optimize) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID, + "check buffer optimize failed!"); global_options[ge::ir_option::BUFFER_OPTIMIZE] = buffer_optimize; // check enable_single_stream std::string enable_single_stream = global_options.find(ge::ir_option::ENABLE_SINGLE_STREAM) == global_options.end() - ? "" - : global_options[ge::ir_option::ENABLE_SINGLE_STREAM]; + ? "" + : global_options[ge::ir_option::ENABLE_SINGLE_STREAM]; GE_CHK_BOOL_EXEC(ge::CheckEnableSingleStreamParamValid(enable_single_stream) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check enable single stream failed!"); + return ge::GRAPH_PARAM_INVALID, "check enable single stream failed!"); // check compress_weight - std::string enable_compress_weight = global_options.find(ge::ir_option::ENABLE_COMPRESS_WEIGHT) == - global_options.end() - ? IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT - : global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT]; + std::string enable_compress_weight = + global_options.find(ge::ir_option::ENABLE_COMPRESS_WEIGHT) == global_options.end() + ? IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT + : global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT]; std::string compress_weight_conf = global_options.find(ge::ir_option::COMPRESS_WEIGHT_CONF) == global_options.end() - ? "" - : global_options[ge::ir_option::COMPRESS_WEIGHT_CONF]; + ? "" + : global_options[ge::ir_option::COMPRESS_WEIGHT_CONF]; GE_CHK_BOOL_EXEC(ge::CheckCompressWeightParamValid(enable_compress_weight, compress_weight_conf) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check compress weight failed!"); - global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT] = (enable_compress_weight == "true") ? - ge::kEnableCompressWeightTrue : - ge::kEnableCompressWeightFalse; + return ge::GRAPH_PARAM_INVALID, "check compress weight failed!"); + global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT] = + (enable_compress_weight == "true") ? ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse; // check optypelist_for_implmode and op_select_implmode - std::string optypelist_for_implmode = global_options.find(ge::ir_option::OPTYPELIST_FOR_IMPLMODE) == - global_options.end() - ? "" - : global_options[ge::ir_option::OPTYPELIST_FOR_IMPLMODE]; - std::string op_select_implmode = global_options.find(ge::ir_option::OP_SELECT_IMPL_MODE) == - global_options.end() - ? "" - : global_options[ge::ir_option::OP_SELECT_IMPL_MODE]; - GE_CHK_BOOL_EXEC( - ge::CheckImplmodeParamValid(optypelist_for_implmode, op_select_implmode) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); + std::string optypelist_for_implmode = + global_options.find(ge::ir_option::OPTYPELIST_FOR_IMPLMODE) == global_options.end() + ? "" + : global_options[ge::ir_option::OPTYPELIST_FOR_IMPLMODE]; + std::string op_select_implmode = global_options.find(ge::ir_option::OP_SELECT_IMPL_MODE) == global_options.end() + ? "" + : global_options[ge::ir_option::OP_SELECT_IMPL_MODE]; + GE_CHK_BOOL_EXEC(ge::CheckImplmodeParamValid(optypelist_for_implmode, op_select_implmode) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; // set precision mode default value - std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == - global_options.end() - ? "force_fp16" - : global_options[ge::ir_option::PRECISION_MODE]; + std::string precision_mode = global_options.find(ge::ir_option::PRECISION_MODE) == global_options.end() + ? "force_fp16" + : global_options[ge::ir_option::PRECISION_MODE]; global_options[ge::ir_option::PRECISION_MODE] = precision_mode; return GRAPH_SUCCESS; @@ -171,7 +167,7 @@ class Impl { graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); void SetRtSocVersion(); - void UpdateThreadContext(); + public: ge::GeGenerator generator_; std::map options_; @@ -185,8 +181,7 @@ graphStatus Impl::CheckOptions(const std::map &options if (it == ge::ir_option::ir_builder_suppported_options.end()) { auto it_lx_fusion = ir_builder_supported_options_for_lx_fusion.find(ele.first); if (it_lx_fusion == ir_builder_supported_options_for_lx_fusion.end()) { - GELOGE(GRAPH_PARAM_INVALID, "input options include unsupported option(%s).Please check!", - ele.first.c_str()); + GELOGE(GRAPH_PARAM_INVALID, "input options include unsupported option(%s).Please check!", ele.first.c_str()); return GRAPH_PARAM_INVALID; } } @@ -225,13 +220,15 @@ graphStatus Impl::Init(const std::map &options) { return ret; } + GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); + GetThreadLocalContext().SetGraphOption(options_); std::string build_mode = (options_.find(BUILD_MODE) == options_.end() || options_[BUILD_MODE] == BUILD_MODE_NORMAL) - ? "" : options_[BUILD_MODE]; + ? "" + : options_[BUILD_MODE]; options_[BUILD_MODE] = build_mode; // set log level - std::string log = options_.find(ge::ir_option::LOG_LEVEL) == options_.end() - ? IR_OPTION_LOG_LEVEL_DEFAULT - : options_[ge::ir_option::LOG_LEVEL]; + std::string log = options_.find(ge::ir_option::LOG_LEVEL) == options_.end() ? IR_OPTION_LOG_LEVEL_DEFAULT + : options_[ge::ir_option::LOG_LEVEL]; GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); options_[ge::ir_option::LOG_LEVEL] = log; @@ -239,13 +236,13 @@ graphStatus Impl::Init(const std::map &options) { string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() - ? "" - : options_[ge::ir_option::DYNAMIC_BATCH_SIZE]; + ? "" + : options_[ge::ir_option::DYNAMIC_BATCH_SIZE]; string dynamic_image_size = options_.find(ge::ir_option::DYNAMIC_IMAGE_SIZE) == options_.end() - ? "" - : options_[ge::ir_option::DYNAMIC_IMAGE_SIZE]; + ? "" + : options_[ge::ir_option::DYNAMIC_IMAGE_SIZE]; string dynamic_dims = - options_.find(ge::ir_option::DYNAMIC_DIMS) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_DIMS]; + options_.find(ge::ir_option::DYNAMIC_DIMS) == options_.end() ? "" : options_[ge::ir_option::DYNAMIC_DIMS]; auto status = CheckDynamicInputParamValid(dynamic_batch_size, dynamic_image_size, dynamic_dims, input_shape, input_format, is_dynamic_input_); @@ -259,20 +256,15 @@ graphStatus Impl::Init(const std::map &options) { omg_context_.dynamic_image_size = dynamic_image_size; omg_context_.dynamic_dims = dynamic_dims; // check output_type - std::string output_type = options_.find(ge::ir_option::OUTPUT_TYPE) == options_.end() - ? "" - : options_[ge::ir_option::OUTPUT_TYPE]; - GE_CHK_BOOL_EXEC(ge::CheckOutputTypeParamValid(output_type) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check output type failed!"); + std::string output_type = + options_.find(ge::ir_option::OUTPUT_TYPE) == options_.end() ? "" : options_[ge::ir_option::OUTPUT_TYPE]; + GE_CHK_BOOL_EXEC(ge::CheckOutputTypeParamValid(output_type) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID, + "check output type failed!"); // check insert_op_conf - std::string insert_op_conf = options_.find(ge::ir_option::INSERT_OP_FILE) == options_.end() - ? "" - : options_[ge::ir_option::INSERT_OP_FILE]; + std::string insert_op_conf = + options_.find(ge::ir_option::INSERT_OP_FILE) == options_.end() ? "" : options_[ge::ir_option::INSERT_OP_FILE]; GE_CHK_BOOL_EXEC(ge::CheckInsertOpConfParamValid(std::string(insert_op_conf)) == ge::SUCCESS, - return ge::GRAPH_PARAM_INVALID, "check insert op conf failed!"); - - GE_CHK_BOOL_EXEC(insert_op_conf.empty() || dynamic_dims.empty(), - return ge::GRAPH_PARAM_INVALID, "dynamic dims function does not support aipp"); + return ge::GRAPH_PARAM_INVALID, "check insert op conf failed!"); // for IR builder.Only support om mode, so here fixed; options_.insert(std::pair(string(IR_OPTION_MODE), to_string(0))); @@ -284,7 +276,7 @@ graphStatus Impl::Init(const std::map &options) { ge::PrintOptionMap(options_, "ge option"); SetRtSocVersion(); - UpdateThreadContext(); + // 3. init generator with options_ ret = generator_.Initialize(options_, omg_context_); if (ret != GRAPH_SUCCESS) { @@ -296,7 +288,7 @@ graphStatus Impl::Init(const std::map &options) { } void Impl::SetRtSocVersion() { - const auto &global_options = GetMutableGlobalOptions(); + auto &global_options = GetMutableGlobalOptions(); auto it = global_options.find(ge::SOC_VERSION); if (it != global_options.end()) { const char *soc_version = it->second.c_str(); @@ -308,11 +300,6 @@ void Impl::SetRtSocVersion() { } } -void Impl::UpdateThreadContext() { - GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); - GetThreadLocalContext().SetGraphOption(options_); -} - graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs) { auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); @@ -336,15 +323,13 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector(model.data.get()), + return FileSaver::SaveToFile((output_file + ".om"), reinterpret_cast(model.data.get()), static_cast(model.length)); } @@ -437,77 +422,4 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat *patch_version = IR_PATCH_VERSION; return GRAPH_SUCCESS; } - -graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { - auto compute_graph = GraphUtils::GetComputeGraph(graph); - GE_CHECK_NOTNULL(compute_graph); - - for (auto &node: compute_graph->GetAllNodes()) { - graphStatus ret = ShapeRefiner::InferShapeAndType(node); - if (ret == GRAPH_PARAM_INVALID) { - GELOGW("Can not find infershape func."); - continue; - } else if (ret != GRAPH_SUCCESS) { - GELOGE(ret, "Acl infershape failed."); - return ret; - } - } - - return GRAPH_SUCCESS; -} - -graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len) { - GE_CHECK_NOTNULL(file); - - if (len > PATH_MAX || len != strlen(file) || strlen(file) == 0) { - GELOGE(GRAPH_PARAM_INVALID, "File path invalid."); - return GRAPH_PARAM_INVALID; - } - - auto compute_graph = GraphUtils::GetComputeGraph(graph); - GE_CHECK_NOTNULL(compute_graph); - - string full_path(file, len); - for (size_t i = 0; i < len; i++) { - if (full_path[i] == '\\') { - full_path.replace(i, 1, "/"); - } - } - - string suffix; - string file_path; - int pos = full_path.rfind("/"); - if (pos != -1) { - suffix = full_path.substr(pos + 1, -1); - file_path = full_path.substr(0, pos); - } else { - suffix = full_path; - file_path = "./"; - } - - if (suffix.empty()) { - suffix = compute_graph->GetName(); - if (suffix.empty()) { - suffix = "graph"; - } - } - - char path[PATH_MAX] = {0}; - if (realpath(file_path.c_str(), path) == nullptr) { - GELOGE(GRAPH_PARAM_INVALID, "Dump file path:%s is invalid.", file); - return GRAPH_PARAM_INVALID; - } - - GraphUtils::DumpGEGrph(compute_graph, string(path), suffix); - GraphUtils::DumpGrphToOnnx(*compute_graph, string(path), suffix); - uint64_t i = 0; - for (const auto &sub_graph_func : compute_graph->GetAllSubgraphs()) { - auto sub_graph_func_name = suffix + std::string("_sub_graph_") + std::to_string(i++); - GraphUtils::DumpGEGrph(sub_graph_func, string(path), sub_graph_func_name); - GraphUtils::DumpGrphToOnnx(*sub_graph_func, string(path), sub_graph_func_name); - } - - return GRAPH_SUCCESS; -} - } // namespace ge diff --git a/ge/model/ge_model.cc b/ge/model/ge_model.cc index acaeff0d..70251876 100755 --- a/ge/model/ge_model.cc +++ b/ge/model/ge_model.cc @@ -23,7 +23,6 @@ namespace ge { void GeModel::Init() { (void)AttrUtils::SetInt(this, ATTR_MODEL_MEMORY_SIZE, 0); - (void)AttrUtils::SetInt(this, ATTR_MODEL_P2P_MEMORY_SIZE, 0); (void)AttrUtils::SetInt(this, ATTR_MODEL_STREAM_NUM, 0); (void)AttrUtils::SetInt(this, ATTR_MODEL_EVENT_NUM, 0); (void)AttrUtils::SetInt(this, ATTR_MODEL_LABEL_NUM, 0); @@ -60,9 +59,7 @@ void GeModel::SetGraph(const Graph &graph) { this->graph_ = graph; } void GeModel::SetModelTaskDef(const std::shared_ptr &task) { this->task_ = task; } -void GeModel::SetTBEKernelStore(const TBEKernelStore &tbe_kernal_store) { - this->tbe_kernal_store_ = tbe_kernal_store; -} +void GeModel::SetTBEKernelStore(const TBEKernelStore &tbe_kernal_store) { this->tbe_kernal_store_ = tbe_kernal_store; } void GeModel::SetCustAICPUKernelStore(const CustAICPUKernelStore &cust_aicpu_kernal_store) { this->cust_aicpu_kernal_store_ = cust_aicpu_kernal_store; diff --git a/ge/model/ge_model.h b/ge/model/ge_model.h index 5676c3b6..288b834f 100755 --- a/ge/model/ge_model.h +++ b/ge/model/ge_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,9 +64,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeModel : public AttrHolder ProtoAttrMapHelper MutableAttrMap() override; - using AttrHolder::SetAttr; - using AttrHolder::GetAllAttrs; using AttrHolder::GetAllAttrNames; + using AttrHolder::GetAllAttrs; + using AttrHolder::SetAttr; void SetModelId(uint32_t model_id) { model_id_ = model_id; } uint32_t GetModelId() const { return model_id_; } diff --git a/ge/model/ge_root_model.cc b/ge/model/ge_root_model.cc index 68f868dd..aee119fa 100644 --- a/ge/model/ge_root_model.cc +++ b/ge/model/ge_root_model.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/ge/model/ge_root_model.h b/ge/model/ge_root_model.h index 53174064..2b73c868 100755 --- a/ge/model/ge_root_model.h +++ b/ge/model/ge_root_model.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include #include "graph/compute_graph.h" #include "model/ge_model.h" @@ -23,7 +24,7 @@ namespace ge { class GeRootModel { public: - explicit GeRootModel(ComputeGraphPtr &root_graph) : root_graph_(root_graph), model_id_(INVALID_MODEL_ID) {}; + explicit GeRootModel(ComputeGraphPtr &root_graph) : root_graph_(root_graph), model_id_(INVALID_MODEL_ID){}; ~GeRootModel() = default; void SetSubgraphInstanceNameToModel(string instance_name, GeModelPtr ge_model); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 4eee1b89..9fa2cfba 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -1074,6 +1074,8 @@ domi::Status GenerateOmModel() { SaveCustomCaffeProtoPath(); + ret = ge::CheckCustomAiCpuOpLib(); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); #endif diff --git a/ge/offline/module.mk b/ge/offline/module.mk index 21fe3009..42b217db 100755 --- a/ge/offline/module.mk +++ b/ge/offline/module.mk @@ -23,7 +23,6 @@ LOCAL_C_INCLUDES := \ $(TOPDIR)inc/framework/domi \ $(TOPDIR)libc_sec/include \ $(TOPDIR)inc/common/util \ - $(TOPDIR)parser \ third_party/json/include \ third_party/gflags/include \ third_party/protobuf/include \ diff --git a/ge/offline/proto/ge_ir.proto b/ge/offline/proto/ge_ir.proto index e7bfe0cb..f60a0f89 100644 --- a/ge/offline/proto/ge_ir.proto +++ b/ge/offline/proto/ge_ir.proto @@ -1,190 +1 @@ -syntax = "proto3"; - -package ge.proto; - -enum DataType -{ - DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. - DT_FLOAT = 1; // float type - DT_FLOAT16 = 2; // fp16 type - DT_INT8 = 3; // int8 type - DT_UINT8 = 4; // uint8 type - DT_INT16 = 5; // int16 type - DT_UINT16 = 6; // uint16 type - DT_INT32 = 7; // - DT_INT64 = 8; // int64 type - DT_UINT32 = 9; // unsigned int32 - DT_UINT64 = 10; // unsigned int64 - DT_BOOL = 11; // bool type - DT_DOUBLE = 12; // double type - DT_STRING = 13; // string type - DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ - DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ - DT_COMPLEX64 = 16; // complex64 type - DT_COMPLEX128 = 17; // complex128 type - DT_QINT8 = 18; // qint8 type - DT_QINT16 = 19; // qint16 type - DT_QINT32 = 20; // qint32 type - DT_QUINT8 = 21; // quint8 type - DT_QUINT16 = 22; // quint16 type - DT_RESOURCE = 23; // resource type - DT_STRING_REF = 24; // string_ref type - DT_DUAL = 25; /**< dual output type */ -} - -message AttrDef -{ - message ListValue - { - enum ListValueType{ - VT_LIST_NONE = 0; - VT_LIST_STRING = 1; - VT_LIST_INT = 2; - VT_LIST_FLOAT = 3; - VT_LIST_BOOL = 4; - VT_LIST_BYTES = 5; - VT_LIST_TENSOR_DESC = 6; - VT_LIST_TENSOR = 7; - VT_LIST_GRAPH = 8; - VT_LIST_NAMED_ATTRS = 9; - VT_LIST_DATA_TYPE = 10; - } - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3; // "list(int)" - repeated float f = 4; // "list(float)" - repeated bool b = 5; // "list(bool)" - repeated bytes bt = 7; - repeated TensorDescriptor td = 8; - repeated TensorDef t = 9; - repeated GraphDef g = 10; - repeated NamedAttrs na = 11; - repeated int64 dt = 12; // list ge::DataType - - ListValueType val_type = 20; - } - - message ListListInt{ - message ListInt{ - repeated int64 list_i = 1; // list int - } - repeated ListInt list_list_i = 1; // list list int - } - - oneof value - { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; // Used to support attr nesting - TensorDescriptor td = 11; // GeTensorDesc type - TensorDef t = 12; // GeTensor type - GraphDef g = 13; // Graph type - ListListInt list_list_int = 14; // List List Int type - int64 dt = 15; // ge::DataType - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs -{ - string name = 1; - map attr = 2; -} - -// Shape / dimension description, using row-major order -message ShapeDef -{ - repeated int64 dim = 1; // Size of each dimension -} - -// Multidimensional data description -message TensorDescriptor -{ - string name = 1; // Optional parameter, tensor name - - DataType dtype = 2; // tensor datatype - ShapeDef shape = 3; // Shape / dimension - string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND" - - bool has_out_attr = 9; - int64 size = 10; - int64 weight_size = 11; - bool reuse_input = 12; - bool output_tensor = 13; - string device_type = 14; - bool input_tensor =15; - int64 real_dim_cnt = 16; - int64 reuse_input_index = 17; - int64 data_offset = 18; - int64 cmps_size = 19; - string cmps_tab = 20; - int64 cmps_tab_offset = 21; - - map attr = 5; // Set of extra parameter fields -} - -// GeTensor definition -message TensorDef -{ - TensorDescriptor desc = 1; // Tensor description - bytes data = 2; // Tensor data -} - - -// Operator description -message OpDef -{ - string name = 1; // name - string type = 2; // type - - repeated string input = 5; // input original op name + outgoing index. op_name:index - - map attr = 10; // Set of operator parameter fields - - bool has_out_attr = 20; - int64 id = 21; - int64 stream_id =22; - repeated string input_name = 23; - repeated string src_name = 24; - repeated int64 src_index = 25; - repeated string dst_name = 26; - repeated int64 dst_index = 27; - repeated int64 input_i = 28; - repeated int64 output_i = 29; - repeated int64 workspace = 30; - repeated int64 workspace_bytes = 31; - repeated bool is_input_const = 32; - repeated TensorDescriptor input_desc = 33; - repeated TensorDescriptor output_desc = 34; - repeated string subgraph_name = 35; -} - -// Graph definition -message GraphDef -{ - string name = 1; // name - - repeated string input = 4; // Graph input - repeated string output = 5; // Graph output - - repeated OpDef op = 6; // List of operators - - map attr = 11; // Extended field -} - -// model definition -message ModelDef -{ - string name = 1; // name - uint32 version = 2; // IR Proto verion - string custom_version = 3; // User model version number, passed in by user - - repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef - - map attr = 11; // Extended field -} - +../../../../inc/common/proto/ge_ir.proto \ No newline at end of file diff --git a/ge/offline/proto/insert_op.proto b/ge/offline/proto/insert_op.proto index bf918b20..27b233e5 100644 --- a/ge/offline/proto/insert_op.proto +++ b/ge/offline/proto/insert_op.proto @@ -1,139 +1 @@ -syntax = "proto3"; - -package domi; - -message InsertNewOps { - repeated AippOpParams aipp_op = 1; - repeated MultiShapeOpParams multi_shape_op = 2; -} - -message AippOpParams { - enum InputFormat { - UNDEFINED = 0; - YUV420SP_U8 = 1; - XRGB8888_U8 = 2; - RGB888_U8 = 3; - YUV400_U8 = 4; - NC1HWC0DI_FP16 = 5; - NC1HWC0DI_S8 = 6; - ARGB8888_U8 = 7; - YUYV_U8 = 8; - YUV422SP_U8 = 9; - AYUV444_U8 = 10; - RAW10 = 11; - RAW12 = 12; - RAW16 = 13; - RAW24 = 14; - RGB16 = 15; - RGB20 = 16; - RGB24 = 17; - RGB8_IR = 18; - RGB16_IR = 19; - RGB24_IR = 20; - } - - enum AippMode { - undefined = 0; - static = 1; - dynamic = 2; - } - - // AIPPģʽ£¬Çø·Ö¾²Ì¬AIPPºÍ¶¯Ì¬AIPP - AippMode aipp_mode = 1; - - // related_input_rank²ÎÊýΪ±ØÌÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§>=0, <=ÊäÈëDataËã×ӵĸöÊý£¬Ä¬ÈÏֵΪ0¡£ - // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ - uint32 related_input_rank = 2; - - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ - // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ - // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ - repeated uint32 input_edge_idx = 3; - - // [Begin] ¶¯Ì¬AIPP²ÎÊý£¬ÅäÖþ²Ì¬AIPPʱÎÞЧ - uint32 max_src_image_size = 4; - - // ÊÇ·ñÖ§³ÖÐýת¡£Ä¬Èϲ»Ö§³Ö£¬¿ªÆôÖ§³ÖÐýתʱ£¬»áÓжîÍâµÄ¿Õ¼äºÍÐÔÄÜËðʧ - bool support_rotation = 5; - - // [End] ¶¯Ì¬AIPP²ÎÊý - - - // [Begin] ¾²Ì¬AIPP²ÎÊý£¬ÅäÖö¯Ì¬AIPPʱÎÞЧ - InputFormat input_format = 51; - bool csc_switch = 52; - float cpadding_value = 53; - bool rbuv_swap_switch = 54; - bool ax_swap_switch = 55; - bool single_line_mode = 56; - - int32 src_image_size_w = 57; - int32 src_image_size_h = 58; - - bool crop = 59; - int32 load_start_pos_w = 60; - int32 load_start_pos_h = 61; - int32 crop_size_w = 62; - int32 crop_size_h = 63; - - bool resize = 64; - int32 resize_output_w = 65; - int32 resize_output_h = 66; - - bool padding = 67; - int32 left_padding_size = 68; - int32 right_padding_size = 69; - int32 top_padding_size = 70; - int32 bottom_padding_size = 71; - - int32 mean_chn_0 = 10; - int32 mean_chn_1 = 11; - int32 mean_chn_2 = 12; - int32 mean_chn_3 = 19; - float min_chn_0 = 13; - float min_chn_1 = 14; - float min_chn_2 = 15; - float min_chn_3 = 20; - repeated float var_reci_chn_0 = 16; - repeated float var_reci_chn_1 = 17; - repeated float var_reci_chn_2 = 18; - repeated float var_reci_chn_3 = 21; - - repeated int32 matrix_r0c0 = 30; - repeated int32 matrix_r0c1 = 31; - repeated int32 matrix_r0c2 = 32; - repeated int32 matrix_r1c0 = 33; - repeated int32 matrix_r1c1 = 34; - repeated int32 matrix_r1c2 = 35; - repeated int32 matrix_r2c0 = 36; - repeated int32 matrix_r2c1 = 37; - repeated int32 matrix_r2c2 = 38; - repeated int32 output_bias_0 = 39; - repeated int32 output_bias_1 = 40; - repeated int32 output_bias_2 = 41; - repeated int32 input_bias_0 = 42; - repeated int32 input_bias_1 = 43; - repeated int32 input_bias_2 = 44; - - // [End] ¾²Ì¬AIPP²ÎÊý - - // The n number that is used for raw/rgbir data into f16 transformation. - // The transformation equation is x/(2^n). If set to 0, no transform is performed. - uint32 raw_rgbir_to_f16_n = 45; -} - -message MultiShapeOpParams { - enum MultiShapeMode { - batch = 0; //¶¯Ì¬batch - resolution = 1; //¶¯Ì¬·Ö±æÂÊ£¬À©Õ¹Óà - } - - MultiShapeMode mode = 1; //Ëã×Óģʽ - uint32 related_input_rank = 2; //ÐÂÔöËã×Ó²åÈëµ½ÄĸöÊäÈë - - - repeated uint32 batch_list = 11; //batch_listÖµ£¬batch_listµÄ¸öÊýÊÇ2µ½8Ö®¼ä -} +../../../../inc/common/proto/insert_op.proto \ No newline at end of file diff --git a/ge/offline/proto/om.proto b/ge/offline/proto/om.proto index e15e5f80..91c581bb 100644 --- a/ge/offline/proto/om.proto +++ b/ge/offline/proto/om.proto @@ -1,396 +1 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -enum TargetType -{ - MINI = 0; - TINY = 1; - LITE = 2; -} - -// offline model -message ModelDef { - string name = 1; - uint32 version = 2; - - uint64 memory_size = 10; - uint32 stream_num = 11; - uint32 event_num = 12; - uint64 weight_size = 13; - uint32 label_num = 15; - repeated OpDef op = 20; - TargetType target_type = 23; - - map attr = 30; -}; - -// operator define -message OpDef { - string name = 1; - string type = 2; - - uint32 id = 3; - uint32 stream_id = 4; - - repeated string input_name = 5; - - repeated string src_name = 8; - repeated int32 src_index = 9; - repeated int64 input = 10; - repeated int64 output = 11; - repeated TensorDescriptor input_desc = 12; - repeated TensorDescriptor output_desc = 13; - repeated WeightDef weights = 14; - repeated string dst_name = 15; - repeated int32 dst_index = 16; - - repeated int64 workspace = 20; - repeated uint32 workspace_bytes = 21; - - repeated string weight_name = 22; - repeated bool is_input_const = 23; - - map attr = 30; - - QuantizeFactorParams quantize_factor = 31; - - oneof op_params { - // start at 100 here - SendOpParams sender_param = 100; - RecvOpParams receiver_param = 200; - ConvolutionOpParams convolution_param = 300; - PoolingOpParams pooling_param = 400; - EltwiseOpParams eltwise_param = 500; - BatchNormOpParams batchnorm_param = 600; - ScaleOpParams scale_param = 700; - FullConnectionOpParams full_connection_param = 800; - SoftmaxOpParams softmax_param = 900; - ActivationOpParams activation_param = 1000; - ReshapeOpParams reshape_param = 1100; - } -}; - -message SendOpParams { - uint32 event_id = 1; -}; - -message RecvOpParams { - uint32 event_id = 1; -}; - -enum QuantizeScaleType -{ - VECTOR_SCALE = 0; - SCALAR_SCALE = 1; -} - -enum QuantizeScaleMode -{ - NORMAL_MODE = 0; - SQRT_MODE = 1; -} - -enum QuantizeAlgorithm -{ - NON_OFFSET_ALGO = 0; - HALF_OFFSET_ALGO = 1; - ALL_OFFSET_ALGO = 2; -} -message QuantizeFactor -{ - QuantizeScaleMode scale_mode = 1; - bytes scale_value = 2; - int64 scale_offset = 3; - bytes offset_data_value = 4; - int64 offset_data_offset = 5; - bytes offset_weight_value = 6; - int64 offset_weight_offset = 7; - bytes offset_pad_value = 8; - int64 offset_pad_offset = 9; -}; - -message QuantizeCalcFactor -{ - bytes offsetw = 1; - int64 offsetw_offset = 2; - bytes offsetd = 3; - int64 offsetd_offset = 4; - bytes scalereq = 5; - int64 scaledreq_offset = 6; - bytes offsetdnext = 7; - int64 offsetdnext_offset = 8; -} - -message QuantizeFactorParams -{ - QuantizeAlgorithm quantize_algo = 1; - QuantizeScaleType scale_type = 2; - QuantizeFactor quantize_param = 3; - QuantizeFactor dequantize_param = 4; - QuantizeFactor requantize_param = 5; - QuantizeCalcFactor quantizecalc_param = 6; -}; - -message ConvolutionOpParams { - int32 mode = 1; - int32 algo = 2; - int32 pad_mode = 3; - uint32 group = 4; - uint32 num_output = 5; - - repeated uint32 pad = 10; - repeated uint32 stride = 11; - repeated uint32 dilation = 12; - repeated uint32 kernel = 13; - - float alpha = 20; - float beta = 21; - - WeightDef filter = 40; - WeightDef bias = 41; - - bool relu_flag = 62; - repeated uint32 adj = 70; - repeated uint32 target_shape = 71; - repeated uint32 before_pad = 72; -}; - -message PoolingOpParams { - int32 mode = 1; - int32 nan_opt = 2; - int32 pad_mode = 3; - bool global_pooling = 4; - - repeated uint32 window = 10; - repeated uint32 pad = 11; - repeated uint32 stride = 12; - bool ceil_mode = 13; - int32 data_mode = 14; - - float alpha = 20; - float beta = 21; - repeated uint32 before_pad = 22; -}; - -message EltwiseOpParams { - int32 mode = 1; - repeated float coeff = 2; - float alpha = 3; - float beta = 4; - repeated WeightDef weight = 5; - bool relu_flag = 6; -}; - -message ActivationOpParams { - int32 mode = 1; - float coef = 2; - float alpha = 3; - float beta = 4; -}; - -message BatchNormOpParams { - int32 mode = 1; - - float alpha = 2; - float beta = 3; - double epsilon = 4;//optinal,[default = 1e-5] - bool use_global_stats = 5; //optinal,by default true,testing mode - float moving_average_fraction = 6; //optinal,[default = .999]; - - WeightDef estimated_mean = 7; - WeightDef estimated_variance = 8; - - WeightDef scale = 9; - WeightDef bias = 10; -}; - -message ScaleOpParams { - WeightDef scale = 1; - WeightDef bias = 2; -}; - -message ReshapeOpParams { - float alpha = 1; - float beta = 2; - ShapeDef shape = 3; - int32 axis = 4; - int32 num_axes = 5; - int32 format = 6; -}; - -message SoftmaxOpParams { - int32 algo = 1; - int32 mode = 2; - float alpha = 3; - float beta = 4; -}; - -message FullConnectionOpParams { - WeightDef filter = 1; - WeightDef bias = 2; - uint32 num_output = 3; - bool relu_flag = 12; -}; - -message FlattenOpParams { - float alpha = 1; - float beta = 2; - int32 start_axis = 3; - int32 end_axis = 4; -} - -message AddLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message MulLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message AddOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message MulOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message SubOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message BiasAddOpParams { - float alpha = 1; - float beta = 2; - - WeightDef bias = 10; -}; - -message MatMulOpParams { - float alpha = 1; - float beta = 2; - bool transposeX = 3; - bool transposeW = 4; - - WeightDef filter = 10; - WeightDef bias = 12; -}; - -message RsqrtOpParams { - float alpha = 1; - float beta = 2; -}; - - -message WeightDef { - int32 format = 1; - int32 data_type = 2; - ShapeDef shape = 3; - bytes data = 4; - int64 data_offset = 5; - uint32 cmps_size = 6; - bytes cmps_tab = 7; - int64 cmps_tab_offset = 10; - CompressInfo cmps_info = 8; - AllOffsetQuantizeInfo alloffset_quantize_info = 11; -} - -message ShapeDef { - repeated int64 dim = 1; -} - -enum DeviceType { - NPU = 0; // In default, we will use NPU. - CPU = 1; // CPU -} - -message AllOffsetQuantizeInfo { - float scale = 1; - int32 offset = 2; -} - -message TensorDescriptor { - int32 format = 1; - int32 data_type = 2; - repeated int64 dim = 3; - uint32 size = 4; - bool reuse_input = 5; - bool output_tensor = 7; - DeviceType device_type = 8; - bool input_tensor = 9; - uint32 real_dim_cnt = 10; - uint32 reuse_input_index = 11; - AllOffsetQuantizeInfo alloffset_quantize_info = 12; -} - -message CompressInfo { - int32 blockRow = 1; // block row - int32 blockCol = 2; // block col - int32 fractalK = 3; // fractal K - int32 fractalN = 4; // fractal N - int32 lastFractalK = 5; // K of last fractal - int32 lastFractalN = 6; // N of last fractal - int32 cubeSize = 7; // cube's length - int32 loadDir = 8; // data load directtiono 0:col load 1:row load -} - -message AttrDef { - message ListValue { - repeated string s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated uint32 u = 6 [packed = true]; // "list(uint)" - repeated bytes bt = 7; - } - - oneof value { - string s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - uint32 u = 6; // "uint32" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs { - string name = 1; - map attr = 2; -} - +../../../../inc/common/proto/om.proto \ No newline at end of file diff --git a/ge/offline/proto/task.proto b/ge/offline/proto/task.proto index d0c09840..36ae4847 100644 --- a/ge/offline/proto/task.proto +++ b/ge/offline/proto/task.proto @@ -1,165 +1 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} +../../proto/task.proto \ No newline at end of file diff --git a/ge/offline/single_op_parser.cc b/ge/offline/single_op_parser.cc index bc1b39f8..34ac7d5f 100644 --- a/ge/offline/single_op_parser.cc +++ b/ge/offline/single_op_parser.cc @@ -227,11 +227,15 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { int index = 0; for (auto &tensor_desc : op_desc.input_desc) { - if ((tensor_desc.type == DT_UNDEFINED && tensor_desc.format != FORMAT_RESERVED) || - (tensor_desc.type != DT_UNDEFINED && tensor_desc.format == FORMAT_RESERVED)){ - ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, - {"intput", "datatype or format", std::to_string(index)}); - GELOGE(PARAM_INVALID, "Input's dataType or format is invalid when the index is %d", index); + if (tensor_desc.type == DT_UNDEFINED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"input", std::to_string(index)}); + GELOGE(false, "Input's dataType is invalid when the index is %d", index); + return false; + } + + if (tensor_desc.format == FORMAT_RESERVED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"input", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Input's format is invalid when the index is %d", index); return false; } ++index; @@ -240,15 +244,13 @@ bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { index = 0; for (auto &tensor_desc : op_desc.output_desc) { if (tensor_desc.type == DT_UNDEFINED) { - ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, - {"output", "datatype", std::to_string(index)}); + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"output", std::to_string(index)}); GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index); return false; } if (tensor_desc.format == FORMAT_RESERVED) { - ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "type", "index"}, - {"output", "format", std::to_string(index)}); + ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"output", std::to_string(index)}); GELOGE(PARAM_INVALID, "Output's format is invalid when the index is %d", index); return false; } @@ -459,39 +461,40 @@ Status SingleOpParser::SetShapeRange(const std::string &op_name, } Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector &op_list) { - int index = 0; - try { - Json single_op_list_json; - auto ret = ReadJsonFile(file, single_op_list_json); - if (ret != SUCCESS) { - return ret; - } + Json single_op_list_json; + auto ret = ReadJsonFile(file, single_op_list_json); + if (ret != SUCCESS) { + return ret; + } - for (const Json &single_op_json : single_op_list_json) { - SingleOpDesc single_op_desc; + int index = 0; + for (const Json &single_op_json : single_op_list_json) { + SingleOpDesc single_op_desc; + try { GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); single_op_desc = single_op_json; - if (!Validate(single_op_desc)) { - GELOGE(PARAM_INVALID, "Validate the index[%d] of op failed when read json file[%s].", index, file.c_str()); - return PARAM_INVALID; - } + } catch (const nlohmann::json::exception &e) { + ErrorManager::GetInstance().ATCReportErrMessage("E10032", {"index", "jsonfile", "exception"}, + {std::to_string(index), file, e.what()}); + GELOGE(PARAM_INVALID, "Parse the index[%d] of op failed when read json file[%s], exception %s", + index, file.c_str(), e.what()); + return PARAM_INVALID; + } - SingleOpBuildParam param; - ret = ConvertToBuildParam(index, single_op_desc, param); - if (ret != SUCCESS) { - return ret; - } + if (!Validate(single_op_desc)) { + GELOGE(PARAM_INVALID, "Validate the index[%d] of op failed when read json file[%s].", index, file.c_str()); + return PARAM_INVALID; + } - op_list.emplace_back(param); - GELOGI("Parse the index[%d] of op success", index); - index += 1; + SingleOpBuildParam param; + ret = ConvertToBuildParam(index, single_op_desc, param); + if (ret != SUCCESS) { + return ret; } - } catch (const nlohmann::json::exception &e) { - ErrorManager::GetInstance().ATCReportErrMessage("E10032", {"index", "jsonfile", "exception"}, - {std::to_string(index), file, e.what()}); - GELOGE(PARAM_INVALID, "Parse the index[%d] of op failed when read json file[%s], exception %s", - index, file.c_str(), e.what()); - return PARAM_INVALID; + + op_list.emplace_back(param); + GELOGI("Parse the index[%d] of op success", index); + index += 1; } return SUCCESS; diff --git a/ge/offline/single_op_parser.h b/ge/offline/single_op_parser.h index c679f0be..9a1bd962 100644 --- a/ge/offline/single_op_parser.h +++ b/ge/offline/single_op_parser.h @@ -71,9 +71,7 @@ class SingleOpParser { static std::unique_ptr CreateOpDesc(const std::string &op_type); static Status ConvertToBuildParam(int index, const SingleOpDesc &single_op_desc, SingleOpBuildParam &build_param); static Status VerifyOpInputOutputSizeByIr(const OpDesc ¤t_op_desc); - static Status SetShapeRange(const std::string &op_name, - const SingleOpTensorDesc &tensor_desc, - GeTensorDesc &ge_tensor_desc); + static Status SetShapeRange(const std::string &op_name, const SingleOpTensorDesc &tensor_desc, GeTensorDesc &ge_tensor_desc); }; } // namespace ge diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc deleted file mode 100644 index 8d9a48bb..00000000 --- a/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ /dev/null @@ -1,169 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "init/gelib.h" -#include "ops_kernel_builder_manager.h" -#include "register/ops_kernel_builder_registry.h" - -namespace ge { -namespace { -const std::vector kBasicBuilderLibs = { - "libge_local_opskernel_builder.so", - "libhost_cpu_opskernel_builder.so", - "librts_kernel_builder.so", - "libaicpu_ascend_builder.so", - "libaicpu_tf_builder.so" -}; - -const std::vector kHcclBuilderLibs = { - "libhcom_opskernel_builder.so", - "libhvd_opskernel_builder.so" -}; -} // namespace -OpsKernelBuilderManager::~OpsKernelBuilderManager() { - // it's OK to call Finalize multiply times - (void) Finalize(); -} - -OpsKernelBuilderManager &OpsKernelBuilderManager::Instance() { - static OpsKernelBuilderManager instance; - return instance; -} - -Status OpsKernelBuilderManager::Initialize(const map &options, bool is_train) { - if (is_train) { - std::string lib_paths; - GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths)); - plugin_manager_.reset(new (std::nothrow)PluginManager()); - GE_CHECK_NOTNULL(plugin_manager_); - GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs"); - } - - auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll(); - GELOGI("Number of OpBuild = %zu", kernel_builders.size()); - - for (const auto &it : kernel_builders) { - const std::string &kernel_lib_name = it.first; - GELOGI("Initialize ops kernel util for %s", kernel_lib_name.c_str()); - GE_CHECK_NOTNULL(it.second); - GE_CHK_STATUS_RET(it.second->Initialize(options), - "Failed to invoke Initialize, kernel lib name = %s", - kernel_lib_name.c_str()); - - ops_kernel_builders_.emplace(kernel_lib_name, it.second); - } - - return SUCCESS; -} - -Status OpsKernelBuilderManager::Finalize() { - for (const auto &it : ops_kernel_builders_) { - const std::string &kernel_lib_name = it.first; - GELOGI("Finalize ops kernel util for %s", kernel_lib_name.c_str()); - auto ret = it.second->Finalize(); - if (ret != SUCCESS) { - GELOGW("Failed to invoke Finalize, kernel lib name = %s", - kernel_lib_name.c_str()); - } - } - - ops_kernel_builders_.clear(); - plugin_manager_.reset(); - return SUCCESS; -} - -const map &OpsKernelBuilderManager::GetAllOpsKernelBuilders() const { - return ops_kernel_builders_; -} - -OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &name) const { - auto it = ops_kernel_builders_.find(name); - if (it != ops_kernel_builders_.end()) { - return it->second; - } - - GELOGW("Failed to get opsKernelInfoStore object by name. OpKernelLibName is %s", name.c_str()); - return nullptr; -} - -Status OpsKernelBuilderManager::GetLibPaths(const std::map &options, std::string &lib_paths) { - GELOGD("Start to execute GetLibPaths"); - std::string path_base = PluginManager::GetPath(); - std::string so_path = "plugin/opskernel/"; - std::string path = path_base + so_path; - std::string all_lib_paths; - for (const auto &lib_name : kBasicBuilderLibs) { - all_lib_paths += (path + lib_name + ":"); - } - - auto iter = options.find(OPTION_EXEC_HCCL_FLAG); - if (iter == options.end() || iter->second != "0") { - for (const auto &lib_name : kHcclBuilderLibs) { - all_lib_paths += (path + lib_name + ":"); - } - } - - lib_paths = std::move(all_lib_paths); - GELOGI("Get lib paths by default. paths = %s", lib_paths.c_str()); - return SUCCESS; -} - -Status OpsKernelBuilderManager::CalcOpRunningParam(Node &node) const { - auto op_desc = node.GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - const std::string &lib_name = op_desc->GetOpKernelLibName(); - auto it = ops_kernel_builders_.find(lib_name); - if (it == ops_kernel_builders_.end()) { - GELOGE(INTERNAL_ERROR, - "Failed to get OpKernelStore. libName = %s, node = %s", - lib_name.c_str(), - op_desc->GetName().c_str()); - return INTERNAL_ERROR; - } - - GELOGD("To invoke CalcOpRunningParam, node = %s, lib name = %s", op_desc->GetName().c_str(), lib_name.c_str()); - GE_CHK_STATUS_RET(it->second->CalcOpRunningParam(node), - "Failed to invoke CalcOpRunningParam, libName = %s, node = %s", - lib_name.c_str(), - op_desc->GetName().c_str()); - GELOGD("Done invoking CalcOpRunningParam successfully"); - return SUCCESS; -} - -Status OpsKernelBuilderManager::GenerateTask(const Node &node, - RunContext &context, - std::vector &tasks) const { - auto op_desc = node.GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - const std::string &lib_name = op_desc->GetOpKernelLibName(); - auto it = ops_kernel_builders_.find(lib_name); - if (it == ops_kernel_builders_.end()) { - GELOGE(INTERNAL_ERROR, - "Failed to get OpKernelStore. libName = %s, node = %s", - lib_name.c_str(), - op_desc->GetName().c_str()); - return INTERNAL_ERROR; - } - - GELOGD("To invoke GenerateTask, node = %s, lib name = %s", op_desc->GetName().c_str(), lib_name.c_str()); - GE_CHK_STATUS_RET(it->second->GenerateTask(node, context, tasks), - "Failed to invoke GenerateTask, libName = %s, node = %s", - lib_name.c_str(), - op_desc->GetName().c_str()); - GELOGD("Done invoking GenerateTask successfully"); - return SUCCESS; -} -} // namespace ge \ No newline at end of file diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h deleted file mode 100644 index 7a95ddfa..00000000 --- a/ge/opskernel_manager/ops_kernel_builder_manager.h +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_OPSKERNEL_MANAGER_OPS_KERNEL_BUILDER_MANAGER_H_ -#define GE_OPSKERNEL_MANAGER_OPS_KERNEL_BUILDER_MANAGER_H_ - -#include "common/ge/plugin_manager.h" -#include "common/opskernel/ops_kernel_builder.h" -#include "external/ge/ge_api_error_codes.h" - -namespace ge { -using OpsKernelBuilderPtr = std::shared_ptr; -class OpsKernelBuilderManager { - public: - ~OpsKernelBuilderManager(); - - static OpsKernelBuilderManager& Instance(); - - // opsKernelManager initialize, load all opsKernelInfoStore and graph_optimizer - Status Initialize(const std::map &options, bool is_train = true); - - // opsKernelManager finalize, unload all opsKernelInfoStore and graph_optimizer - Status Finalize(); - - // get opsKernelIBuilder by name - OpsKernelBuilderPtr GetOpsKernelBuilder(const std::string &name) const; - - // get all opsKernelBuilders - const std::map &GetAllOpsKernelBuilders() const; - - Status CalcOpRunningParam(Node &node) const; - - Status GenerateTask(const Node &node, RunContext &context, - std::vector &tasks) const; - - private: - OpsKernelBuilderManager() = default; - static Status GetLibPaths(const std::map &options, std::string &lib_paths); - - std::unique_ptr plugin_manager_; - std::map ops_kernel_builders_{}; -}; -} // namespace ge -#endif // GE_OPSKERNEL_MANAGER_OPS_KERNEL_BUILDER_MANAGER_H_ diff --git a/ge/opskernel_manager/ops_kernel_manager.cc b/ge/opskernel_manager/ops_kernel_manager.cc index 12894292..11eb3061 100644 --- a/ge/opskernel_manager/ops_kernel_manager.cc +++ b/ge/opskernel_manager/ops_kernel_manager.cc @@ -89,12 +89,12 @@ Status OpsKernelManager::Initialize(const map &options_const) { return GE_OPS_GET_NO_VALID_SO; } Status rst1 = - plugin_manager_.InvokeAll &>(kGetOpsKernelInfoStores, ops_kernel_store_); + plugin_manager_.InvokeAll &>(kGetOpsKernelInfoStores, ops_kernel_store_); if (rst1 != SUCCESS) { GELOGW("Initialize OpsKernelInfo failed."); } Status rst2 = - plugin_manager_.InvokeAll &>(kGetGraphOptimizerObjs, graph_optimizers_); + plugin_manager_.InvokeAll &>(kGetGraphOptimizerObjs, graph_optimizers_); if (rst2 != SUCCESS) { GELOGW("Initialize GraphOptimizerObjs failed."); } @@ -125,7 +125,7 @@ Status OpsKernelManager::Initialize(const map &options_const) { } } -void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path, const std::map& options) { +void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path, const std::map &options) { GELOGI("Enter get external engine so path schedule"); const char *path_env = std::getenv("ASCEND_ENGINE_PATH"); if (path_env != nullptr) { @@ -137,8 +137,8 @@ void OpsKernelManager::GetExternalEnginePath(std::string &extern_engine_path, co std::string so_path = "plugin/opskernel/"; std::string path = path_base + so_path; extern_engine_path = (path + "libfe.so" + ":") + (path + "libge_local_engine.so" + ":") + - (path + "librts_engine.so" + ":") + (path + "libaicpu_ascend_engine.so" + ":") + - (path + "libhost_cpu_engine.so" + ":") + (path + "libaicpu_tf_engine.so" + ":"); + (path + "librts_engine.so" + ":") + (path + "libaicpu_engine.so" + ":") + + (path + "libhost_cpu_engine.so" + ":"); auto iter = options.find(OPTION_EXEC_HCCL_FLAG); if (iter == options.end() || iter->second != "0") { extern_engine_path += (path_base + "libhcom_graph_adaptor.so"); @@ -175,8 +175,8 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, } else if (flag == 1) { enable_flag = true; } else { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } catch (std::invalid_argument &) { @@ -188,8 +188,8 @@ Status OpsKernelManager::ParsePluginOptions(const map &options, iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (...) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "option_key:%s, its value %s is invalid, it must be 0 or 1.", + plugin_name.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } else { diff --git a/ge/opskernel_manager/ops_kernel_manager.h b/ge/opskernel_manager/ops_kernel_manager.h index b34c483e..a5d4d85c 100644 --- a/ge/opskernel_manager/ops_kernel_manager.h +++ b/ge/opskernel_manager/ops_kernel_manager.h @@ -34,8 +34,8 @@ #include "ge/ge_api_types.h" #include "runtime/base.h" -using std::string; using std::map; +using std::string; using std::vector; namespace ge { @@ -89,7 +89,7 @@ class OpsKernelManager { Status CheckPluginPtr() const; - void GetExternalEnginePath(std::string &path, const std::map& options); + void GetExternalEnginePath(std::string &path, const std::map &options); void InitOpsKernelInfo(); @@ -99,7 +99,7 @@ class OpsKernelManager { Status ParsePluginOptions(const map &options, const string &plugin_name, bool &enable_flag); - Status LoadGEGraphOptimizer(map& graphOptimizer); + Status LoadGEGraphOptimizer(map &graphOptimizer); Status InitGraphOptimizerPriority(); diff --git a/ge/opskernel_manager/optimizer_priority.pbtxt b/ge/opskernel_manager/optimizer_priority.pbtxt index a53184a1..9f8a03fb 100755 --- a/ge/opskernel_manager/optimizer_priority.pbtxt +++ b/ge/opskernel_manager/optimizer_priority.pbtxt @@ -1 +1 @@ -optimizer:["aicpu_tf_optimizer","aicpu_ascend_optimizer","AIcoreEngine","VectorEngine","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE"] +optimizer:["aicpu_original_optimizer","AIcoreEngine","VectorEngine","aicpu_optimizer","hccl_graph_optimizer", "hvd_graph_optimizer", "DNN_VM_RTS_GRAPH_OPTIMIZER_STORE"] \ No newline at end of file diff --git a/ge/plugin/engine/dnnengines.cc b/ge/plugin/engine/dnnengines.cc index cf6b7517..d85d1668 100755 --- a/ge/plugin/engine/dnnengines.cc +++ b/ge/plugin/engine/dnnengines.cc @@ -55,13 +55,13 @@ void VectorCoreDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs AICpuDNNEngine::AICpuDNNEngine(const std::string &engine_name) { engine_attribute_.engine_name = engine_name; - engine_attribute_.compute_cost = COST_3; + engine_attribute_.compute_cost = COST_2; engine_attribute_.runtime_type = DEVICE; engine_attribute_.engine_input_format = FORMAT_RESERVED; engine_attribute_.engine_output_format = FORMAT_RESERVED; } -AICpuDNNEngine::AICpuDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } +AICpuDNNEngine::AICpuDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } Status AICpuDNNEngine::Initialize(const std::map &options) { return SUCCESS; } @@ -69,22 +69,6 @@ Status AICpuDNNEngine::Finalize() { return SUCCESS; } void AICpuDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } -AICpuTFDNNEngine::AICpuTFDNNEngine(const std::string &engine_name) { - engine_attribute_.engine_name = engine_name; - engine_attribute_.compute_cost = COST_2; - engine_attribute_.runtime_type = DEVICE; - engine_attribute_.engine_input_format = FORMAT_RESERVED; - engine_attribute_.engine_output_format = FORMAT_RESERVED; -} - -AICpuTFDNNEngine::AICpuTFDNNEngine(const DNNEngineAttribute &attrs) { engine_attribute_ = attrs; } - -Status AICpuTFDNNEngine::Initialize(const std::map &options) { return SUCCESS; } - -Status AICpuTFDNNEngine::Finalize() { return SUCCESS; } - -void AICpuTFDNNEngine::GetAttributes(DNNEngineAttribute &attrs) const { attrs = engine_attribute_; } - GeLocalDNNEngine::GeLocalDNNEngine(const std::string &engine_name) { engine_attribute_.engine_name = engine_name; engine_attribute_.engine_input_format = FORMAT_RESERVED; diff --git a/ge/plugin/engine/dnnengines.h b/ge/plugin/engine/dnnengines.h index 4a2a9df5..d776c2b9 100644 --- a/ge/plugin/engine/dnnengines.h +++ b/ge/plugin/engine/dnnengines.h @@ -55,7 +55,6 @@ class VectorCoreDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; - class AICpuDNNEngine : public DNNEngine { public: AICpuDNNEngine() = default; @@ -71,21 +70,6 @@ class AICpuDNNEngine : public DNNEngine { DNNEngineAttribute engine_attribute_; }; -class AICpuTFDNNEngine : public DNNEngine { - public: - AICpuTFDNNEngine() = default; - explicit AICpuTFDNNEngine(const std::string &engine_name); - explicit AICpuTFDNNEngine(const DNNEngineAttribute &attrs); - ~AICpuTFDNNEngine() = default; - - Status Initialize(const std::map &options); - Status Finalize(); - void GetAttributes(DNNEngineAttribute &attr) const; - - private: - DNNEngineAttribute engine_attribute_; -}; - class GeLocalDNNEngine : public DNNEngine { public: GeLocalDNNEngine() = default; @@ -102,7 +86,7 @@ class GeLocalDNNEngine : public DNNEngine { }; class HostCpuDNNEngine : public DNNEngine { -public: + public: HostCpuDNNEngine() = default; explicit HostCpuDNNEngine(const std::string &engine_name); explicit HostCpuDNNEngine(const DNNEngineAttribute &attrs); @@ -112,7 +96,7 @@ public: Status Finalize(); void GetAttributes(DNNEngineAttribute &attr) const; -private: + private: DNNEngineAttribute engine_attribute_; }; diff --git a/ge/plugin/engine/engine_manage.cc b/ge/plugin/engine/engine_manage.cc index a14c92ea..82cd90ee 100644 --- a/ge/plugin/engine/engine_manage.cc +++ b/ge/plugin/engine/engine_manage.cc @@ -89,10 +89,10 @@ void RegisterVectorEngine() { } void RegisterAiCpuEngine() { - const std::string vm_aicpu = "DNN_VM_AICPU_ASCEND"; + const std::string vm_aicpu = "DNN_VM_AICPU"; std::vector mem_type_aicpu; mem_type_aicpu.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); - DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_3, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; + DNNEngineAttribute attr_aicpu = {vm_aicpu, mem_type_aicpu, COST_2, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu); if (vm_engine_ptr == nullptr) { GELOGE(ge::FAILED, "make vm_engine_ptr failed"); @@ -103,21 +103,6 @@ void RegisterAiCpuEngine() { } } -void RegisterAiCpuTFEngine() { - const std::string vm_aicpu_tf = "DNN_VM_AICPU"; - std::vector mem_type_aicpu_tf; - mem_type_aicpu_tf.emplace_back(GE_ENGINE_ATTR_MEM_TYPE_HBM); - DNNEngineAttribute attr_aicpu_tf = {vm_aicpu_tf, mem_type_aicpu_tf, COST_2, DEVICE, FORMAT_RESERVED, FORMAT_RESERVED}; - DNNEnginePtr vm_engine_ptr = MakeShared(attr_aicpu_tf); - if (vm_engine_ptr == nullptr) { - GELOGE(ge::FAILED, "make vm_engine_ptr failed"); - return; - } - if (EngineManager::RegisterEngine(vm_aicpu_tf, vm_engine_ptr) != SUCCESS) { - GELOGW("register vmAicpuTFEngine failed"); - } -} - void RegisterGeLocalEngine() { const std::string vm_ge_local = "DNN_VM_GE_LOCAL"; std::vector mem_type_ge_local; @@ -183,7 +168,6 @@ void RegisterHcclEngine() { void GetDNNEngineObjs(std::map &engines) { RegisterAiCoreEngine(); RegisterVectorEngine(); - RegisterAiCpuTFEngine(); RegisterAiCpuEngine(); RegisterGeLocalEngine(); RegisterHostCpuEngine(); diff --git a/ge/proto/dump_task.proto b/ge/proto/dump_task.proto index b1e346cd..ecdf4792 100644 --- a/ge/proto/dump_task.proto +++ b/ge/proto/dump_task.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package toolkit.dumpdata; diff --git a/ge/proto/fusion_model.proto b/ge/proto/fusion_model.proto index c92c5581..6220963c 100755 --- a/ge/proto/fusion_model.proto +++ b/ge/proto/fusion_model.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; import "om.proto"; diff --git a/ge/proto/fwk_adapter.proto b/ge/proto/fwk_adapter.proto index 9335c926..99333d2e 100644 --- a/ge/proto/fwk_adapter.proto +++ b/ge/proto/fwk_adapter.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package aicpu.FWKAdapter; diff --git a/ge/proto/ge_api.proto b/ge/proto/ge_api.proto index 331c5aea..ac5b3b3a 100755 --- a/ge/proto/ge_api.proto +++ b/ge/proto/ge_api.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package ge.api_pb; diff --git a/ge/proto/ge_ir.proto b/ge/proto/ge_ir.proto index e7bfe0cb..87886c84 100644 --- a/ge/proto/ge_ir.proto +++ b/ge/proto/ge_ir.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package ge.proto; diff --git a/ge/proto/insert_op.proto b/ge/proto/insert_op.proto index bf918b20..a059e122 100644 --- a/ge/proto/insert_op.proto +++ b/ge/proto/insert_op.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package domi; @@ -45,9 +61,6 @@ message AippOpParams { // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ uint32 related_input_rank = 2; - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ diff --git a/ge/proto/om.proto b/ge/proto/om.proto index e15e5f80..dd992191 100644 --- a/ge/proto/om.proto +++ b/ge/proto/om.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/proto/op_mapping_info.proto b/ge/proto/op_mapping_info.proto index e23b7ebe..7b84a115 100644 --- a/ge/proto/op_mapping_info.proto +++ b/ge/proto/op_mapping_info.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package aicpu.dump; diff --git a/ge/proto/optimizer_priority.proto b/ge/proto/optimizer_priority.proto index 769619cf..3327be8a 100644 --- a/ge/proto/optimizer_priority.proto +++ b/ge/proto/optimizer_priority.proto @@ -1,3 +1,19 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + syntax = "proto3"; package ge.optimizers; diff --git a/ge/proto/task.proto b/ge/proto/task.proto index d0c09840..50ea061b 100644 --- a/ge/proto/task.proto +++ b/ge/proto/task.proto @@ -1,14 +1,19 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + syntax = "proto3"; package domi; diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index 22a3ed86..44c29460 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -1,26 +1,23 @@ /** -* Copyright 2020 Huawei Technologies Co., Ltd -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "session/inner_session.h" - #include #include #include - -#include "analyzer/analyzer.h" #include "adx_datadump_server.h" #include "common/dump/dump_properties.h" #include "common/util.h" @@ -28,7 +25,6 @@ #include "graph/ge_context.h" #include "graph/ge_global_options.h" #include "graph/ge_local_context.h" -#include "graph/common/local_context.h" #include "graph/load/new_model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/utils/tensor_adapter.h" @@ -39,312 +35,295 @@ namespace { const int32_t kDumpStatus = 0; Status CheckReuseMemoryOption(const std::map &options) { -auto iter = options.find(OPTION_EXEC_DISABLE_REUSED_MEMORY); -if (iter != options.end()) { -if (iter->second == "0") { -GELOGD("%s=0, reuse memory is open", OPTION_EXEC_DISABLE_REUSED_MEMORY); -} else if (iter->second == "1") { -GELOGD("%s=1, reuse memory is close", OPTION_EXEC_DISABLE_REUSED_MEMORY); -} else { -GELOGE(PARAM_INVALID, "option %s=%s is invalid", OPTION_EXEC_DISABLE_REUSED_MEMORY, iter->second.c_str()); -return FAILED; -} -} -return SUCCESS; -} + auto iter = options.find(OPTION_EXEC_DISABLE_REUSED_MEMORY); + if (iter != options.end()) { + if (iter->second == "0") { + GELOGD("%s=0, reuse memory is open", OPTION_EXEC_DISABLE_REUSED_MEMORY); + } else if (iter->second == "1") { + GELOGD("%s=1, reuse memory is close", OPTION_EXEC_DISABLE_REUSED_MEMORY); + } else { + GELOGE(PARAM_INVALID, "option %s=%s is invalid", OPTION_EXEC_DISABLE_REUSED_MEMORY, iter->second.c_str()); + return FAILED; + } + } + return SUCCESS; } +} // namespace static std::mutex mutex_; // BuildGraph and RunGraph use bool InnerSession::is_dump_server_inited_ = false; InnerSession::InnerSession(uint64_t session_id, const std::map &options) -: init_flag_(false), session_id_(session_id), options_(options) {} + : init_flag_(false), session_id_(session_id), options_(options), graph_manager_(domi::GetContext()) {} Status InnerSession::Initialize() { -if (init_flag_) { -GELOGW("[InnerSession:%lu] session already initialize.", session_id_); -return SUCCESS; -} + if (init_flag_) { + GELOGW("[InnerSession:%lu] session already initialize.", session_id_); + return SUCCESS; + } -// If the global options and the session options are duplicated, the session options is preferred. -auto all_options = options_; -all_options.insert(GetMutableGlobalOptions().begin(), GetMutableGlobalOptions().end()); + // If the global options and the session options are duplicated, the session options is preferred. + auto all_options = options_; + all_options.insert(GetMutableGlobalOptions().begin(), GetMutableGlobalOptions().end()); -Status ret = CheckReuseMemoryOption(all_options); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] check reuse memory option failed.", session_id_); -return ret; -} + Status ret = CheckReuseMemoryOption(all_options); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] check reuse memory option failed.", session_id_); + return ret; + } -UpdateThreadContext(std::map{}); + UpdateThreadContext(std::map{}); -GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); + GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); -DumpProperties dump_properties; -dump_properties.InitByOptions(); -GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed"); + DumpProperties dump_properties; + dump_properties.InitByOptions(); + GE_CHK_STATUS_RET(AddDumpProperties(dump_properties), "Add dump properties failed"); -ret = graph_manager_.Initialize(options_); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); -GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); -return ret; -} + ret = graph_manager_.Initialize(options_); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] initialize failed.", session_id_); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); + return ret; + } -ret = VarManager::Instance(session_id_)->SetMemoryMallocSize(all_options); -if (ret != SUCCESS) { -GELOGE(ret, "failed to set malloc size"); -(void)graph_manager_.Finalize(); -GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); -GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); -return ret; -} + ret = VarManager::Instance(session_id_)->SetMemoryMallocSize(all_options); + if (ret != SUCCESS) { + GELOGE(ret, "failed to set malloc size"); + (void)graph_manager_.Finalize(); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); + GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + return ret; + } -int32_t version = static_cast(SessionVersion::ClOUD_VERSION); -const int DEFAULT_DEVICE_ID = 0; -const int DEFAULT_JOB_ID = 0; -ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); -if (ret != SUCCESS) { -GELOGE(ret, "failed to init session instance"); -GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); -} -init_flag_ = true; -return SUCCESS; + int32_t version = static_cast(SessionVersion::ClOUD_VERSION); + const int DEFAULT_DEVICE_ID = 0; + const int DEFAULT_JOB_ID = 0; + ret = VarManager::Instance(session_id_)->Init(version, session_id_, DEFAULT_DEVICE_ID, DEFAULT_JOB_ID); + if (ret != SUCCESS) { + GELOGE(ret, "failed to init session instance"); + GE_CHK_STATUS(RemoveDumpProperties(), "Remove dump properties failed"); + } + init_flag_ = true; + return SUCCESS; } Status InnerSession::Finalize() { -std::lock_guard lock(resource_mutex_); -if (!init_flag_) { -GELOGW("[InnerSession:%lu] session does not initialize.", session_id_); -return SUCCESS; -} -UpdateThreadContext(std::map{}); -Status ret = graph_manager_.Finalize(); -if (ret != SUCCESS) { -// Subsequent code execution is required, so no return is required -GELOGE(ret, "[InnerSession:%lu] finalize failed.", session_id_); -} + std::lock_guard lock(resource_mutex_); + if (!init_flag_) { + GELOGW("[InnerSession:%lu] session does not initialize.", session_id_); + return SUCCESS; + } + UpdateThreadContext(std::map{}); + Status ret = graph_manager_.Finalize(); + if (ret != SUCCESS) { + // Subsequent code execution is required, so no return is required + GELOGE(ret, "[InnerSession:%lu] finalize failed.", session_id_); + } -ModelManager::GetInstance()->DestroyAicpuSession(session_id_); -init_flag_ = false; -// release var memory -GELOGI("VarManager free var memory."); -(void)VarManager::Instance(session_id_)->FreeVarMemory(); -// release analyzer saved info(Session Level) -Analyzer::GetInstance()->DestroySessionJsonObject(session_id_); + ModelManager::GetInstance()->DestroyAicpuSession(session_id_); + init_flag_ = false; + // release var memory + GELOGI("VarManager free var memory."); + (void)VarManager::Instance(session_id_)->FreeVarMemory(); -GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); -GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed"); + GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); + GE_CHK_STATUS_RET(RemoveDumpProperties(), "Remove dump properties failed"); -return ret; + return ret; } Status InnerSession::GetVariable(const std::string &name, Tensor &val) { -UpdateThreadContext(std::map{}); -return graph_manager_.GetVariable(name, val); + UpdateThreadContext(std::map{}); + return graph_manager_.GetVariable(name, val); } Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph) { -std::map options; -return AddGraph(graph_id, graph, options); + std::map options; + return AddGraph(graph_id, graph, options); } Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph, - const std::map &options) { -std::lock_guard lock(resource_mutex_); -if (!init_flag_) { -GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); -return GE_SESS_INIT_FAILED; -} -UpdateThreadContext(options); -Status ret = graph_manager_.AddGraph(graph_id, graph, options, domi::GetContext()); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] add graph %u failed.", session_id_, graph_id); -return ret; -} + const std::map &options) { + std::lock_guard lock(resource_mutex_); + if (!init_flag_) { + GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); + return GE_SESS_INIT_FAILED; + } + UpdateThreadContext(options); + Status ret = graph_manager_.AddGraph(graph_id, graph, options); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] add graph %u failed.", session_id_, graph_id); + return ret; + } -GELOGI("[InnerSession:%lu] add graph success, graph_id=%u.", session_id_, graph_id); -return SUCCESS; + GELOGI("[InnerSession:%lu] add graph success, graph_id=%u.", session_id_, graph_id); + return SUCCESS; } Status InnerSession::RunGraph(uint32_t graph_id, const std::vector &inputs, std::vector &outputs) { -GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); -if (mutex_.try_lock()) { -std::lock_guard lock(mutex_, std::adopt_lock); -if (!init_flag_) { -GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); -return GE_SESS_INIT_FAILED; -} -UpdateThreadContext(graph_id); -vector geInputs; -for (auto &item : inputs) { -geInputs.push_back(TensorAdapter::AsGeTensor(item)); -} -vector geOutputs; -Status ret = graph_manager_.RunGraph(graph_id, geInputs, geOutputs, session_id_); -domi::GetContext().out_nodes_map.clear(); -domi::GetContext().user_out_nodes.clear(); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id); -return ret; -} -outputs.clear(); -for (auto &item : geOutputs) { -outputs.push_back(TensorAdapter::AsTensor(item)); -} + GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); + if (mutex_.try_lock()) { + std::lock_guard lock(mutex_, std::adopt_lock); + if (!init_flag_) { + GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); + return GE_SESS_INIT_FAILED; + } + UpdateThreadContext(graph_id); + vector geInputs; + for (auto &item : inputs) { + geInputs.push_back(TensorAdapter::AsGeTensor(item)); + } + vector geOutputs; + Status ret = graph_manager_.RunGraph(graph_id, geInputs, geOutputs, session_id_); + domi::GetContext().out_nodes_map.clear(); + domi::GetContext().user_out_nodes.clear(); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id); + return ret; + } + outputs.clear(); + for (auto &item : geOutputs) { + outputs.push_back(TensorAdapter::AsTensor(item)); + } -GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id); -return SUCCESS; -} else { -GELOGE(GE_SESS_ALREADY_RUNNING, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id); -return GE_SESS_ALREADY_RUNNING; -} + GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id); + return SUCCESS; + } else { + GELOGE(GE_SESS_ALREADY_RUNNING, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id); + return GE_SESS_ALREADY_RUNNING; + } } Status InnerSession::RemoveGraph(uint32_t graph_id) { -std::lock_guard lock(resource_mutex_); -if (!init_flag_) { -GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); -return GE_SESS_INIT_FAILED; -} -UpdateThreadContext(graph_id); -Status ret = graph_manager_.RemoveGraph(graph_id); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] remove graph failed, graph_id=%u.", session_id_, graph_id); -return ret; -} + std::lock_guard lock(resource_mutex_); + if (!init_flag_) { + GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); + return GE_SESS_INIT_FAILED; + } + UpdateThreadContext(graph_id); + Status ret = graph_manager_.RemoveGraph(graph_id); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] remove graph failed, graph_id=%u.", session_id_, graph_id); + return ret; + } -GELOGI("[InnerSession:%lu] remove graph success, graph_id=%u.", session_id_, graph_id); -return SUCCESS; + GELOGI("[InnerSession:%lu] remove graph success, graph_id=%u.", session_id_, graph_id); + return SUCCESS; } Status InnerSession::RegisterCallBackFunc( -const std::string &key, -const std::function &)> &callback) { -std::lock_guard lock(resource_mutex_); -if (!init_flag_) { -GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); -return GE_SESS_INIT_FAILED; -} -UpdateThreadContext(std::map{}); -Status ret = graph_manager_.RegisterCallBackFunc(key, callback); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] register %s callback function failed.", session_id_, key.c_str()); -return ret; -} + const std::string &key, const std::function &)> &callback) { + std::lock_guard lock(resource_mutex_); + if (!init_flag_) { + GELOGE(GE_SESS_INIT_FAILED, "[InnerSession:%lu] initialize failed.", session_id_); + return GE_SESS_INIT_FAILED; + } + UpdateThreadContext(std::map{}); + Status ret = graph_manager_.RegisterCallBackFunc(key, callback); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] register %s callback function failed.", session_id_, key.c_str()); + return ret; + } -GELOGI("[InnerSession:%lu] register %s callback function success.", session_id_, key.c_str()); -return SUCCESS; + GELOGI("[InnerSession:%lu] register %s callback function success.", session_id_, key.c_str()); + return SUCCESS; } Status InnerSession::BuildGraph(uint32_t graph_id, const std::vector &inputs) { -UpdateThreadContext(graph_id); -GELOGI("[InnerSession:%lu] build graph on session, graph_id=%u.", session_id_, graph_id); -std::vector ge_inputs; -for (auto const &input : inputs) { -std::vector input_dims; -std::transform(input.dims.begin(), input.dims.end(), std::back_inserter(input_dims), - [](int64_t x) -> int64_t { return x; }); -GeShape input_shape(input_dims); -GeTensorDesc input_tensor_desc; -input_tensor_desc.SetShape(input_shape); -input_tensor_desc.SetDataType(static_cast(input.data_type)); -ge_inputs.emplace_back(input_tensor_desc); -} -GeRootModelPtr ge_root_model = nullptr; -Status ret = graph_manager_.BuildGraph(graph_id, ge_inputs, ge_root_model, session_id_, true); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] build graph failed, graph_id=%u.", session_id_, graph_id); -return ret; -} -GELOGI("[InnerSession:%lu] build graph success, graph_id=%u.", session_id_, graph_id); -return ret; + UpdateThreadContext(graph_id); + GELOGI("[InnerSession:%lu] build graph on session, graph_id=%u.", session_id_, graph_id); + std::vector ge_inputs; + for (auto const &input : inputs) { + std::vector input_dims; + std::transform(input.dims.begin(), input.dims.end(), std::back_inserter(input_dims), + [](int64_t x) -> int64_t { return x; }); + GeShape input_shape(input_dims); + GeTensorDesc input_tensor_desc; + input_tensor_desc.SetShape(input_shape); + input_tensor_desc.SetDataType(static_cast(input.data_type)); + ge_inputs.emplace_back(input_tensor_desc); + } + GeRootModelPtr ge_root_model = nullptr; + Status ret = graph_manager_.BuildGraph(graph_id, ge_inputs, ge_root_model, session_id_, true); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] build graph failed, graph_id=%u.", session_id_, graph_id); + return ret; + } + GELOGI("[InnerSession:%lu] build graph success, graph_id=%u.", session_id_, graph_id); + return ret; } Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector &inputs, - RunAsyncCallback callback) { -UpdateThreadContext(graph_id); -GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); -Status ret = graph_manager_.RunGraphAsync(graph_id, inputs, session_id_, callback); -if (ret != SUCCESS) { -GELOGE(ret, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id); -return ret; -} -GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id); -return ret; + RunAsyncCallback callback) { + UpdateThreadContext(graph_id); + GELOGI("[InnerSession:%lu] run graph on session, graph_id=%u.", session_id_, graph_id); + Status ret = graph_manager_.RunGraphAsync(graph_id, inputs, session_id_, callback); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] run graph failed, graph_id=%u.", session_id_, graph_id); + return ret; + } + GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id); + return ret; } const GraphManager &InnerSession::getGraphManagerObj() const { return graph_manager_; } void InnerSession::UpdateThreadContext(const std::map &options) { -GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); -GetThreadLocalContext().SetSessionOption(options_); -GetThreadLocalContext().SetGraphOption(options); -GetContext().SetSessionId(session_id_); -SetRtSocVersion(); + GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); + GetThreadLocalContext().SetSessionOption(options_); + GetThreadLocalContext().SetGraphOption(options); + GetContext().SetSessionId(session_id_); } void InnerSession::UpdateThreadContext(uint32_t graph_id) { -auto options = graph_manager_.GetGraphOptions(graph_id); -if (options == nullptr) { -GELOGW("graph level options is null."); -UpdateThreadContext(std::map{}); -} else { -UpdateThreadContext(*options); -} + auto options = graph_manager_.GetGraphOptions(graph_id); + if (options == nullptr) { + GELOGW("graph level options is null."); + UpdateThreadContext(std::map{}); + } else { + UpdateThreadContext(*options); + } } bool InnerSession::IsGraphNeedRebuild(uint32_t graph_id) { -UpdateThreadContext(graph_id); -return graph_manager_.IsGraphNeedRebuild(graph_id); + UpdateThreadContext(graph_id); + return graph_manager_.IsGraphNeedRebuild(graph_id); } Status InnerSession::GetAllVariables(std::map &all_variables) { -return VarManager::Instance(session_id_)->GetAllVariables(all_variables); + return VarManager::Instance(session_id_)->GetAllVariables(all_variables); } Status InnerSession::GenCheckPointGraph(const std::map &all_variables, Graph &graph) { -return graph_manager_.GenCheckPointGraph(all_variables, graph); + return graph_manager_.GenCheckPointGraph(all_variables, graph); } Status InnerSession::SaveVariables(const Graph &graph, const std::vector &var_names, - const std::vector &outputs, std::vector &var_values) { -return graph_manager_.SaveVariables(graph, var_names, outputs, var_values); + const std::vector &outputs, std::vector &var_values) { + return graph_manager_.SaveVariables(graph, var_names, outputs, var_values); } Status InnerSession::AddDumpProperties(const DumpProperties &dump_properties) { -if (!is_dump_server_inited_) { -if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { -GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server init failed"); - return PARAM_INVALID) -GELOGI("Init adx data dump server success"); -is_dump_server_inited_ = true; -} -} -PropertiesManager::Instance().AddDumpProperties(session_id_, dump_properties); -return SUCCESS; + if (!is_dump_server_inited_) { + if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) { + GE_IF_BOOL_EXEC(AdxDataDumpServerInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server init failed"); + return PARAM_INVALID) + GELOGI("Init adx data dump server success"); + is_dump_server_inited_ = true; + } + } + PropertiesManager::Instance().AddDumpProperties(session_id_, dump_properties); + return SUCCESS; } Status InnerSession::RemoveDumpProperties() { -PropertiesManager::Instance().RemoveDumpProperties(session_id_); -if (is_dump_server_inited_ && PropertiesManager::Instance().GetDumpPropertiesMap().empty()) { -GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server uninit failed"); - return PARAM_INVALID) -GELOGI("UnInit adx data dump server success"); -is_dump_server_inited_ = false; -} -return SUCCESS; -} - -void InnerSession::SetRtSocVersion() { - const auto &global_options = GetMutableGlobalOptions(); - auto it = global_options.find(ge::SOC_VERSION); - if (it != global_options.end()) { - const char *soc_version = it->second.c_str(); - rtError_t rt_ret = rtSetSocVersion(soc_version); - if (rt_ret != RT_ERROR_NONE) { - GELOGW("Set soc version %s failed. ret:0x%X", soc_version, rt_ret); - } - GELOGI("Set soc version %s success.", soc_version); + PropertiesManager::Instance().RemoveDumpProperties(session_id_); + if (is_dump_server_inited_ && PropertiesManager::Instance().GetDumpPropertiesMap().empty()) { + GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus, GELOGE(PARAM_INVALID, "Data dump server uninit failed"); + return PARAM_INVALID) + GELOGI("UnInit adx data dump server success"); + is_dump_server_inited_ = false; } + return SUCCESS; } } // namespace ge diff --git a/ge/session/inner_session.h b/ge/session/inner_session.h index 25f5c307..94d1ac12 100644 --- a/ge/session/inner_session.h +++ b/ge/session/inner_session.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,8 +57,7 @@ class InnerSession { Status GetVariable(const std::string &name, Tensor &val); Status RegisterCallBackFunc( - const std::string &key, - const std::function &)> &callback); + const std::string &key, const std::function &)> &callback); const GraphManager &getGraphManagerObj() const; @@ -68,8 +67,6 @@ class InnerSession { Status RemoveDumpProperties(); - void SetRtSocVersion(); - private: bool init_flag_; uint64_t session_id_; diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 0d8e084e..0fb342e1 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include #include "common/auth/file_saver.h" +#include "common/convert/pb2json.h" #include "common/debug/log.h" #include "common/debug/memory_dumper.h" #include "common/ge/ge_util.h" @@ -44,7 +45,6 @@ #include "omg/parser/parser_factory.h" #include "omg/parser/weights_parser.h" #include "parser/common/pre_checker.h" -#include "parser/common/convert/pb2json.h" #include "proto/ge_ir.pb.h" #include "register/op_registry.h" @@ -75,7 +75,7 @@ const std::set kOmBlackFields = {"output", "data_offset", "data", " "memory_size", "weight_size", "size", "bt", "quantize_factor"}; static std::map output_type_str_to_datatype = { - {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) { if ((s == "true") || (s == "false")) { @@ -257,11 +257,6 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p if (real_path.empty()) { // plugin path does not exist return; } - struct stat stat_buf; - if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) { - GELOGI("The path %s is not a directory.", real_path.c_str()); - return; - } struct dirent *dent(nullptr); DIR *dir = opendir(real_path.c_str()); @@ -277,11 +272,21 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p string full_name = real_path + "/" + name; const string so_suff = ".so"; const string caffe_parser_so_suff = "lib_caffe_parser.so"; + const string aicpu_so_suff = "_aicpu.so"; + const string aicpu_host_so_suff = "_online.so"; if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { if (full_name.size() >= caffe_parser_so_suff.size() && full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(), caffe_parser_so_suff) == 0) { caffe_parser_path = full_name; + } else if ((full_name.size() >= aicpu_so_suff.size() && + full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == + 0) || + (full_name.size() >= aicpu_host_so_suff.size() && + full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(), + aicpu_host_so_suff) == 0)) { + // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage; + domi::GetContext().aicpu_op_run_paths.push_back(full_name); } else { // save parser so path into file_list vector file_list.push_back(full_name); } @@ -294,6 +299,29 @@ void FindParserSo(const string &path, vector &file_list, string &caffe_p return; } +Status CheckCustomAiCpuOpLib() { + std::vector vec_op_type; + domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); + for (uint32_t i = 0; i < vec_op_type.size(); i++) { + bool aicpu_so_exist = false; + std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; + for (uint32_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { + string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; + if (bin_file_path.size() >= ai_cpu_so_name.size() && + bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == + 0) { + aicpu_so_exist = true; + break; + } + } + if (!aicpu_so_exist) { + GELOGE(domi::FAILED, "cant find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); + return domi::FAILED; + } + } + return domi::SUCCESS; +} + Status SetOutFormatAndDataTypeAttr(ge::OpDescPtr op_desc, const ge::Format format, const ge::DataType data_type) { if (op_desc == nullptr) { GELOGE(domi::FAILED, "Input op desc invalid."); @@ -419,40 +447,14 @@ Status CheckOutNode(ge::OpDescPtr op_desc, int32_t index) { "out_node [%s] output index:%d must be smaller " "than node output size:%d and can not be negative!", op_desc->GetName().c_str(), index, out_size); - std::string fail_reason = "output index:" + to_string(index) + " must be smaller than output size:" + - to_string(out_size) + " and can not be negative!"; + std::string fail_reason = "output index:" + to_string(index) + + " must be smaller than output size:" + to_string(out_size) + " and can not be negative!"; ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, {"out_nodes", op_desc->GetName(), fail_reason}); return domi::FAILED; } return domi::SUCCESS; } -Status GetDefaultOutInfo(ge::ComputeGraphPtr &compute_graph, - std::vector> &output_nodes_info) { - std::vector> default_out_nodes = domi::GetContext().default_out_nodes; - if (domi::GetContext().type == domi::CAFFE && !default_out_nodes.empty()) { - for (uint32_t i = 0; i < default_out_nodes.size(); ++i) { - ge::NodePtr out_node = compute_graph->FindNode(default_out_nodes[i].first); - if (out_node == nullptr) { - ErrorManager::GetInstance().ATCReportErrMessage("E10016", {"parameter", "opname"}, - {"out_nodes", default_out_nodes[i].first}); - GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", default_out_nodes[i].first.c_str()); - return domi::FAILED; - } - output_nodes_info.push_back(std::make_pair(out_node, default_out_nodes[i].second)); - GELOGD("Get default output node:%s.", out_node->GetName().c_str()); - } - return domi::SUCCESS; - } - - for (ge::NodePtr node : compute_graph->GetDirectNode()) { - if (!node->GetInAllNodes().empty() && node->GetOutAllNodes().empty()) { - Status ret = GetOutputLeaf(node, output_nodes_info); - GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); - } - } - return domi::SUCCESS; -} Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output) { ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); @@ -503,9 +505,11 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const } // default output node (leaf) if (user_out_nodes.empty()) { - if (GetDefaultOutInfo(compute_graph, output_nodes_info) != SUCCESS) { - GELOGE(domi::FAILED, "Get default output info failed."); - return domi::FAILED; + for (ge::NodePtr node : compute_graph->GetDirectNode()) { + if (!node->GetInAllNodes().empty() && node->GetOutAllNodes().empty()) { + Status ret = GetOutputLeaf(node, output_nodes_info); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); + } } } GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name); @@ -549,7 +553,6 @@ Status GetOutputLeaf(NodePtr node, std::vector> if (node->GetType() != NETOUTPUT) { for (size_t index = 0; index < size; ++index) { output_nodes_info.push_back(std::make_pair(node, index)); - GELOGD("Get output leaf node:%s.", node->GetName().c_str()); } } else { const auto in_anchors = node->GetAllInDataAnchors(); @@ -626,8 +629,8 @@ Status ParseOutNodes(const string &out_nodes) { continue; } ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); + "E10001", {"parameter", "value", "reason"}, + {"--out_nodes", node, "the correct format is \"node_name1:0;node_name1:1;node_name2:0\""}); GELOGE(PARAM_INVALID, "The input format of --out_nodes is invalid, the correct format is " "\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", @@ -637,7 +640,8 @@ Status ParseOutNodes(const string &out_nodes) { if (!domi::GetContext().user_out_nodes_top_vec.empty()) { ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, {"--out_nodes", out_nodes, "is not all index or top_name"}); - GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s", out_nodes.c_str()); + GELOGE(PARAM_INVALID, "This out_nodes str must be all index or top_name, while the actual input is %s", + out_nodes.c_str()); return PARAM_INVALID; } // stoi: The method may throw an exception: invalid_argument/out_of_range @@ -691,16 +695,16 @@ static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op } std::map &propertiesMap = domi::GetContext().op_conf_map; if (propertiesMap.empty()) { - ErrorManager::GetInstance().ATCReportErrMessage( - "E10003", {"parameter", "value", "reason"}, {"op_name_map", op_conf, "the file content is empty"}); + ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, + {"op_name_map", op_conf, "the file content is empty"}); GELOGE(PARAM_INVALID, "op_name_map file content is empty, please check file!"); return PARAM_INVALID; } for (auto iter = propertiesMap.begin(); iter != propertiesMap.end(); iter++) { GE_IF_BOOL_EXEC(graphNodeTypes.find(iter->second) == graphNodeTypes.end(), ErrorManager::GetInstance().ATCReportErrMessage( - "E10003", {"parameter", "value", "reason"}, - {"op_name_map", op_conf, "type[" + iter->second + "] is not found in model"}); + "E10003", {"parameter", "value", "reason"}, + {"op_name_map", op_conf, "type[" + iter->second + "] is not found in model"}); GELOGE(PARAM_INVALID, "Invalid parameter for op_name_map."); return PARAM_INVALID;); } return SUCCESS; @@ -830,35 +834,35 @@ void GetGroupName(ge::proto::ModelDef &model_def) { auto modelAttrMap = model_def.mutable_attr(); auto fusionModelOpListIter = modelAttrMap->find(MODEL_ATTR_FUSION_MODEL_DEF); GE_IF_BOOL_EXEC( - fusionModelOpListIter != modelAttrMap->end(), int fusionOpIndex = 0; - for (int i = 0; i < model_def.graph_size(); i++) { - auto graph = model_def.mutable_graph(i); - for (int j = 0; j < graph->op_size(); j++) { - int64_t scope_id = 0; - auto bt = fusionModelOpListIter->second.list().bt(fusionOpIndex++); - ge::proto::OpDef fusion_op_def; - GE_CHK_BOOL_EXEC(bt.size() != 0, GELOGW("Invalid bt size"); return;); - - (void)(fusion_op_def.ParseFromArray(bt.data(), bt.size())); - auto fusion_attr_map = fusion_op_def.mutable_attr(); - auto fusion_iter = fusion_attr_map->find(kScopeIdAttr); - GE_IF_BOOL_EXEC(fusion_iter == fusion_attr_map->end(), continue;); - - scope_id = fusion_iter->second.i(); - ge::proto::OpDef *opdef = graph->mutable_op(j); - auto attr_map = opdef->mutable_attr(); - - int64_t stream_id = opdef->stream_id(); - - uint16_t l1_id = (((uint64_t)scope_id & 0xFFFF0000)) >> 16; - GE_IF_BOOL_EXEC(l1_id != 0, ostringstream groupName; groupName << "group_op_l1_" << l1_id << "_" << stream_id; - (*attr_map)["group_op_name"].set_s(groupName.str()); continue;); - - uint16_t ub_id = ((uint64_t)scope_id & 0xFFFF); - GE_IF_BOOL_EXEC(ub_id != 0, ostringstream groupName; groupName << "group_op_ub_" << ub_id << "_" << stream_id; - (*attr_map)["group_op_name"].set_s(groupName.str());); - } - }); + fusionModelOpListIter != modelAttrMap->end(), int fusionOpIndex = 0; + for (int i = 0; i < model_def.graph_size(); i++) { + auto graph = model_def.mutable_graph(i); + for (int j = 0; j < graph->op_size(); j++) { + int64_t scope_id = 0; + auto bt = fusionModelOpListIter->second.list().bt(fusionOpIndex++); + ge::proto::OpDef fusion_op_def; + GE_CHK_BOOL_EXEC(bt.size() != 0, GELOGW("Invalid bt size"); return;); + + (void)(fusion_op_def.ParseFromArray(bt.data(), bt.size())); + auto fusion_attr_map = fusion_op_def.mutable_attr(); + auto fusion_iter = fusion_attr_map->find(kScopeIdAttr); + GE_IF_BOOL_EXEC(fusion_iter == fusion_attr_map->end(), continue;); + + scope_id = fusion_iter->second.i(); + ge::proto::OpDef *opdef = graph->mutable_op(j); + auto attr_map = opdef->mutable_attr(); + + int64_t stream_id = opdef->stream_id(); + + uint16_t l1_id = (((uint64_t)scope_id & 0xFFFF0000)) >> 16; + GE_IF_BOOL_EXEC(l1_id != 0, ostringstream groupName; groupName << "group_op_l1_" << l1_id << "_" << stream_id; + (*attr_map)["group_op_name"].set_s(groupName.str()); continue;); + + uint16_t ub_id = ((uint64_t)scope_id & 0xFFFF); + GE_IF_BOOL_EXEC(ub_id != 0, ostringstream groupName; groupName << "group_op_ub_" << ub_id << "_" << stream_id; + (*attr_map)["group_op_name"].set_s(groupName.str());); + } + }); } FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, const char *json_file) { @@ -878,66 +882,65 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con uint8_t *model_data = nullptr; uint32_t model_len = 0; - try { - // Parse the contents of the file to get the modeldef object - ret = ModelParserBase::ParseModelContent(model, model_data, model_len); - if (ret == SUCCESS) { - OmFileLoadHelper omFileLoadHelper; - ge::graphStatus status = omFileLoadHelper.Init(model_data, model_len); - if (status != ge::GRAPH_SUCCESS) { - GELOGE(ge::FAILED, "Om file init failed."); - if (model.model_data != nullptr) { - delete[](char *) model.model_data; - model.model_data = nullptr; - } - return status; + + // Parse the contents of the file to get the modeldef object + ret = ModelParserBase::ParseModelContent(model, model_data, model_len); + if (ret == SUCCESS) { + OmFileLoadHelper omFileLoadHelper; + ge::graphStatus status = omFileLoadHelper.Init(model_data, model_len); + if (status != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "Om file init failed."); + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; } + return status; + } - ModelPartition ir_part; - status = omFileLoadHelper.GetModelPartition(MODEL_DEF, ir_part); - if (status != ge::GRAPH_SUCCESS) { - GELOGE(ge::FAILED, "Get model part failed."); - if (model.model_data != nullptr) { - delete[](char *) model.model_data; - model.model_data = nullptr; - } - return status; + ModelPartition ir_part; + status = omFileLoadHelper.GetModelPartition(MODEL_DEF, ir_part); + if (status != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "Get model part failed."); + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; } + return status; + } - ge::proto::ModelDef model_def; + ge::proto::ModelDef model_def; - // De serialization - bool flag = ReadProtoFromArray(ir_part.data, ir_part.size, &model_def); - if (flag) { - GetGroupName(model_def); + // De serialization + bool flag = ReadProtoFromArray(ir_part.data, ir_part.size, &model_def); + if (flag) { + GetGroupName(model_def); - json j; - Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); + json j; + Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); - ret = ModelSaver::SaveJsonToFile(json_file, j); - } else { - ret = INTERNAL_ERROR; - GELOGE(ret, "ReadProtoFromArray failed."); - } + ret = ModelSaver::SaveJsonToFile(json_file, j); } else { - GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); + ret = INTERNAL_ERROR; + GELOGE(ret, "ReadProtoFromArray failed."); } + } else { + GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); + } - if (model.model_data != nullptr) { - delete[](char *) model.model_data; - model.model_data = nullptr; - } - return ret; - } catch (const std::exception &e) { - GELOGE(FAILED, "Convert om model to json failed, exception message : %s.", e.what()); - return FAILED; + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; } + + return ret; } FMK_FUNC_HOST_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char *json_file) { ge::ModelData model; + // Mode 2 does not need to verify the priority, and a default value of 0 is passed int32_t priority = 0; + // Load model from file Status ret = ModelParserBase::LoadFromFile(model_file, "", priority, model); auto free_model_data = [](void **ptr) -> void { @@ -951,36 +954,35 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const GELOGE(ret, "LoadFromFile failed."); return ret; } - + bool flag = false; + ge::proto::ModelDef model_def; try { - bool flag = false; - ge::proto::ModelDef model_def; flag = google::protobuf::TextFormat::ParseFromString(reinterpret_cast(model.model_data), &model_def); - - if (!flag) { - free_model_data(&model.model_data); - GELOGE(FAILED, "ParseFromString fail."); - return FAILED; - } - GetGroupName(model_def); - json j; - Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); - ret = ModelSaver::SaveJsonToFile(json_file, j); - if (ret != SUCCESS) { - free_model_data(&model.model_data); - GELOGE(ret, "Save json to file fail."); - return ret; - } - free_model_data(&model.model_data); - return SUCCESS; } catch (google::protobuf::FatalException &e) { free_model_data(&model.model_data); GELOGE(FAILED, "ParseFromString fail. exception message : %s", e.what()); return FAILED; - } catch (const std::exception &e) { - GELOGE(FAILED, "Convert pbtxt to json failed, exception message : %s.", e.what()); + } + + if (!flag) { + free_model_data(&model.model_data); + GELOGE(FAILED, "ParseFromString fail."); return FAILED; } + + GetGroupName(model_def); + json j; + Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); + ret = ModelSaver::SaveJsonToFile(json_file, j); + if (ret != SUCCESS) { + free_model_data(&model.model_data); + GELOGE(ret, "Save json to file fail."); + return ret; + } + + free_model_data(&model.model_data); + + return SUCCESS; } FMK_FUNC_HOST_VISIBILITY Status ConvertFwkModelToJson(const domi::FrameworkType framework, const char *model_file, @@ -993,8 +995,8 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertFwkModelToJson(const domi::FrameworkType } ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {"--framework", std::to_string(framework), "only support 0(Caffe) 3(TensorFlow)"}); + "E10001", {"parameter", "value", "reason"}, + {"--framework", std::to_string(framework), "only support 0(Caffe) 3(TensorFlow)"}); GELOGE(PARAM_INVALID, "Input parameter[--framework] is mandatory and it's value must be: 0(Caffe) 3(TensorFlow)."); return PARAM_INVALID; } @@ -1026,35 +1028,13 @@ FMK_FUNC_HOST_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const void UpdateOmgCtxWithParserCtx() { domi::GetContext().format = GetParserContext().format; domi::GetContext().input_dims = GetParserContext().input_dims; - domi::GetContext().user_input_dims = GetParserContext().user_input_dims; - domi::GetContext().is_dynamic_input = GetParserContext().is_dynamic_input; - domi::GetContext().type = GetParserContext().type; - domi::GetContext().user_out_nodes = GetParserContext().user_out_nodes; - domi::GetContext().train_flag = GetParserContext().train_flag; - domi::GetContext().run_mode = GetParserContext().run_mode; - domi::GetContext().op_conf_map = GetParserContext().op_conf_map; - domi::GetContext().out_nodes_map = GetParserContext().out_nodes_map; - domi::GetContext().input_nodes_format_map = GetParserContext().input_nodes_format_map; - domi::GetContext().out_top_names = GetParserContext().out_top_names; - domi::GetContext().user_out_nodes_top_vec = GetParserContext().user_out_nodes_top_vec; - domi::GetContext().default_out_nodes = GetParserContext().default_out_nodes; - domi::GetContext().data_top_names = GetParserContext().data_top_names; + return; } void UpdateParserCtxWithOmgCtx() { GetParserContext().format = domi::GetContext().format; GetParserContext().input_dims = domi::GetContext().input_dims; - GetParserContext().user_input_dims = domi::GetContext().user_input_dims; - GetParserContext().is_dynamic_input = domi::GetContext().is_dynamic_input; - GetParserContext().type = domi::GetContext().type; - GetParserContext().user_out_nodes = domi::GetContext().user_out_nodes; - GetParserContext().train_flag = domi::GetContext().train_flag; GetParserContext().run_mode = domi::GetContext().run_mode; - GetParserContext().op_conf_map = domi::GetContext().op_conf_map; - GetParserContext().out_nodes_map = domi::GetContext().out_nodes_map; - GetParserContext().input_nodes_format_map = domi::GetContext().input_nodes_format_map; - GetParserContext().out_top_names = domi::GetContext().out_top_names; - GetParserContext().user_out_nodes_top_vec = domi::GetContext().user_out_nodes_top_vec; - GetParserContext().data_top_names = domi::GetContext().data_top_names; + return; } } // namespace ge diff --git a/ge/session/session_manager.cc b/ge/session/session_manager.cc index 6f8c9432..35d97c31 100755 --- a/ge/session/session_manager.cc +++ b/ge/session/session_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -227,8 +227,8 @@ Status SessionManager::GetNextSessionId(SessionId &next_session_id) { } Status SessionManager::RegisterCallBackFunc( - SessionId session_id, const std::string &key, - const std::function &)> &callback) { + SessionId session_id, const std::string &key, + const std::function &)> &callback) { if (!init_flag_) { GELOGE(GE_SESSION_MANAGER_NOT_INIT); return GE_SESSION_MANAGER_NOT_INIT; diff --git a/ge/session/session_manager.h b/ge/session/session_manager.h index 88864f61..1efb47d8 100644 --- a/ge/session/session_manager.h +++ b/ge/session/session_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -132,8 +132,7 @@ class SessionManager { /// @param [out] var_values: variable values /// @return Status result of function /// - Status GetVariables(SessionId session_id, const std::vector &var_names, - std::vector &var_values); + Status GetVariables(SessionId session_id, const std::vector &var_names, std::vector &var_values); /// /// @ingroup ge_graph @@ -144,8 +143,8 @@ class SessionManager { /// @return Status result of function /// Status RegisterCallBackFunc( - SessionId session_id, const std::string &key, - const std::function &)> &callback); + SessionId session_id, const std::string &key, + const std::function &)> &callback); bool IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id); diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index bef3a558..f59fb7bd 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -36,8 +36,7 @@ size_t GetAlignedSize(size_t size) { } } // namespace -SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) { -} +SingleOp::SingleOp(std::mutex *stream_mutex, rtStream_t stream) : stream_mutex_(stream_mutex), stream_(stream) {} FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() { for (auto task : tasks_) { @@ -59,11 +58,13 @@ Status SingleOp::ValidateArgs(const std::vector &inputs, const std:: for (size_t i = 0; i < num_inputs; ++i) { // preventing from read out of bound size_t aligned_size = GetAlignedSize(inputs[i].length); - GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu", - i, aligned_size, inputs[i].length, input_sizes_[i]); + GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu", i, aligned_size, inputs[i].length, + input_sizes_[i]); if (aligned_size < input_sizes_[i]) { - GELOGE(PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu," - " but given %zu(after align)", i, input_sizes_[i], aligned_size); + GELOGE(PARAM_INVALID, + "Input size mismatch. index = %zu, model expect %zu," + " but given %zu(after align)", + i, input_sizes_[i], aligned_size); return PARAM_INVALID; } } @@ -77,11 +78,13 @@ Status SingleOp::ValidateArgs(const std::vector &inputs, const std:: for (size_t i = 0; i < num_outputs; ++i) { // preventing from write out of bound size_t aligned_size = GetAlignedSize(outputs[i].length); - GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu", - i, aligned_size, outputs[i].length, output_sizes_[i]); + GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu", i, aligned_size, outputs[i].length, + output_sizes_[i]); if (aligned_size < output_sizes_[i]) { - GELOGE(PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu," - "but given %zu(after align)", i, output_sizes_[i], aligned_size); + GELOGE(PARAM_INVALID, + "Output size mismatch. index = %zu, model expect %zu," + "but given %zu(after align)", + i, output_sizes_[i], aligned_size); return PARAM_INVALID; } } @@ -126,12 +129,8 @@ Status SingleOp::UpdateArgs(const std::vector &inputs, const std::ve GELOGD("Update aicpu_TF task args"); auto *dst_io_addr = const_cast(reinterpret_cast(task->GetIOAddr())); GE_CHECK_NOTNULL(dst_io_addr); - auto rt_ret = rtMemcpyAsync(dst_io_addr, - sizeof(uint64_t) * args_.size(), - &args_[0], - sizeof(uint64_t) * args_.size(), - RT_MEMCPY_HOST_TO_DEVICE_EX, - stream_); + auto rt_ret = rtMemcpyAsync(dst_io_addr, sizeof(uint64_t) * args_.size(), &args_[0], + sizeof(uint64_t) * args_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, stream_); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtMemcpyAsync addresses failed, ret = %d", rt_ret); return RT_FAILED; @@ -180,40 +179,29 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c return ret; } -void SingleOp::SetStream(rtStream_t stream) { - stream_ = stream; -} +void SingleOp::SetStream(rtStream_t stream) { stream_ = stream; } -void SingleOp::SetSessionID(uint64_t session_id) { - aicpu_session_id_ = session_id; -} +void SingleOp::SetSessionID(uint64_t session_id) { aicpu_session_id_ = session_id; } DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex, rtStream_t stream) - : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) { -} + : resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) {} DynamicSingleOp::~DynamicSingleOp() { GELOGI("DynamicSingleOp destory sessionId = %lu", aicpu_session_id_); ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); } -Status DynamicSingleOp::ValidateParams(const vector &input_desc, - const std::vector &inputs, - std::vector &output_desc, - std::vector &outputs) const { +Status DynamicSingleOp::ValidateParams(const vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs) const { if (inputs.size() != input_desc.size()) { - GELOGE(PARAM_INVALID, - "Input number mismatches input desc number. Input num = %zu, input desc num = %zu", - inputs.size(), - input_desc.size()); + GELOGE(PARAM_INVALID, "Input number mismatches input desc number. Input num = %zu, input desc num = %zu", + inputs.size(), input_desc.size()); return PARAM_INVALID; } if (outputs.size() != output_desc.size()) { - GELOGE(PARAM_INVALID, - "Output number mismatches output desc number. Output num = %zu, output desc num = %zu", - outputs.size(), - output_desc.size()); + GELOGE(PARAM_INVALID, "Output number mismatches output desc number. Output num = %zu, output desc num = %zu", + outputs.size(), output_desc.size()); return PARAM_INVALID; } @@ -263,10 +251,8 @@ Status DynamicSingleOp::AllocateWorkspaces(const std::vector &workspace return SUCCESS; } -Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, - const vector &inputs, - vector &output_desc, - vector &outputs) { +Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, const vector &inputs, + vector &output_desc, vector &outputs) { GE_CHK_STATUS_RET_NOLOG(op_task_->UpdateRunInfo(input_desc, output_desc)); std::vector workspace_buffers; @@ -275,10 +261,8 @@ Status DynamicSingleOp::ExecuteTbeTask(const vector &input_desc, return op_task_->LaunchKernel(inputs, outputs, workspace_buffers, stream_); } -Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, - const vector &input_buffers, - vector &output_desc, - vector &output_buffers) { +Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, + vector &output_desc, vector &output_buffers) { GE_CHECK_NOTNULL(op_task_); GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); std::lock_guard lk(*stream_mutex_); @@ -297,14 +281,11 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, } else if (op_task_->GetOpTaskType() == OP_TASK_AICPU || op_task_->GetOpTaskType() == OP_TASK_AICPUCC) { return op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_); } else { - GELOGE(UNSUPPORTED, - "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", + GELOGE(UNSUPPORTED, "Only TBE_Task, AI_CPU_Task and AI_CPUCC_Task are supported, but got %u", op_task_->GetOpTaskType()); return UNSUPPORTED; } } -void DynamicSingleOp::SetSessionID(uint64_t session_id) { - aicpu_session_id_ = session_id; -} +void DynamicSingleOp::SetSessionID(uint64_t session_id) { aicpu_session_id_ = session_id; } } // namespace ge diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index bd671017..0ca4afef 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -62,26 +62,19 @@ class DynamicSingleOp { public: DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex_, rtStream_t stream); ~DynamicSingleOp(); - Status ExecuteAsync(const vector &input_desc, - const std::vector &inputs, - std::vector &output_desc, - std::vector &outputs); + Status ExecuteAsync(const vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs); void SetSessionID(uint64_t session_id); private: friend class SingleOpModel; - Status ValidateParams(const vector &input_desc, - const std::vector &inputs, - std::vector &output_desc, - std::vector &outputs) const; + Status ValidateParams(const vector &input_desc, const std::vector &inputs, + std::vector &output_desc, std::vector &outputs) const; - Status AllocateWorkspaces(const std::vector &workspace_sizes, - std::vector &workspaces); + Status AllocateWorkspaces(const std::vector &workspace_sizes, std::vector &workspaces); - Status ExecuteTbeTask(const vector &input_desc, - const vector &inputs, - vector &output_desc, - vector &outputs); + Status ExecuteTbeTask(const vector &input_desc, const vector &inputs, + vector &output_desc, vector &outputs); std::unique_ptr op_task_; uintptr_t resource_id_ = 0; diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index 56bbdef6..709b238f 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -96,9 +96,7 @@ StreamResource *SingleOpManager::TryGetResource(uintptr_t resource_id) { return it->second; } -Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, - const ModelData &model_data, - void *stream, +Status SingleOpManager::GetDynamicOpFromModel(const string &model_name, const ModelData &model_data, void *stream, DynamicSingleOp **single_op) { if (!tiling_func_registered_) { RegisterTilingFunc(); diff --git a/ge/single_op/single_op_manager.h b/ge/single_op/single_op_manager.h index e6d10980..09ae0e4e 100644 --- a/ge/single_op/single_op_manager.h +++ b/ge/single_op/single_op_manager.h @@ -34,14 +34,10 @@ class SingleOpManager { return instance; } - Status GetOpFromModel(const std::string &model_name, - const ge::ModelData &model_data, - void *stream, + Status GetOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, SingleOp **single_op); - Status GetDynamicOpFromModel(const std::string &model_name, - const ge::ModelData &model_data, - void *stream, + Status GetDynamicOpFromModel(const std::string &model_name, const ge::ModelData &model_data, void *stream, DynamicSingleOp **dynamic_single_op); StreamResource *GetResource(uintptr_t resource_id, rtStream_t stream); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index a1e1e7dd..fb676008 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -108,11 +108,8 @@ Status SingleOpModel::InitModelMem(StreamResource &res) { auto weight_buffer = model_helper_.GetGeModel()->GetWeight(); GELOGI("To copy weight to device. weight size = %zu", weight_buffer.GetSize()); - GE_CHK_RT_RET(rtMemcpy(model_params_.weight_base, - model_params_.weight_size, - weight_buffer.GetData(), - weight_buffer.GetSize(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(model_params_.weight_base, model_params_.weight_size, weight_buffer.GetData(), + weight_buffer.GetSize(), RT_MEMCPY_HOST_TO_DEVICE)); } return SUCCESS; @@ -190,7 +187,6 @@ Status SingleOpModel::LoadAllNodes() { } ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc); - ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc); } return SUCCESS; @@ -248,7 +244,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); ParseArgTable(tbe_task, single_op); single_op.tasks_.emplace_back(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type == cce::ccKernelType::AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; auto ret = BuildCpuKernelTask(task_def.kernel(), &task); @@ -257,7 +253,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { } single_op.tasks_.emplace_back(task); } else { - GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); + GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); return UNSUPPORTED; } } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { @@ -327,8 +323,8 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa return SUCCESS; } -Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, - bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id) { +Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, + bool &depend_compute_flag, uint64_t session_id) { auto iter = op_list_.find(kernel_def.op_index()); if (iter == op_list_.end()) { GELOGE(INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index()); @@ -392,13 +388,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); single_op.op_task_.reset(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { + } else if (kernel_type == cce::ccKernelType::AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); single_op.op_task_.reset(task); } else { - GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); + GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); return UNSUPPORTED; } return SUCCESS; @@ -430,8 +426,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { bool depend_compute_flag = false; uint64_t dynamic_singleop_sessionid = aicpu_sessionid++; GELOGI("Build dynamic singleOp, sessionId = %lu", dynamic_singleop_sessionid); - GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, - depend_compute_flag, dynamic_singleop_sessionid)); + GE_CHK_STATUS_RET_NOLOG( + BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, depend_compute_flag, dynamic_singleop_sessionid)); if (depend_compute_flag) { if (i >= tasks.size() - 1) { GELOGE(FAILED, "The copy task of the fourth operator was not found."); diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 2e6b37dc..09b90050 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -45,9 +45,7 @@ struct SingleOpModelParam { class SingleOpModel { public: - SingleOpModel(const std::string &model_name, - const void *model_data, - uint32_t model_size); + SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size); ~SingleOpModel() = default; Status Init(); @@ -68,8 +66,8 @@ class SingleOpModel { Status BuildTaskList(SingleOp &single_op); Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); - Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, - bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id); + Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, + bool &depend_compute_flag, uint64_t session_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task); Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc index 388f447e..c2b93974 100755 --- a/ge/single_op/stream_resource.cc +++ b/ge/single_op/stream_resource.cc @@ -22,8 +22,7 @@ #include "single_op/single_op_model.h" namespace ge { -StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { -} +StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) {} StreamResource::~StreamResource() { for (auto mem : memory_list_) { @@ -61,13 +60,9 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { return it->second.get(); } -void StreamResource::SetStream(rtStream_t stream) { - stream_ = stream; -} +void StreamResource::SetStream(rtStream_t stream) { stream_ = stream; } -uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, - size_t size, - size_t &max_allocated, +uint8_t *StreamResource::DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, std::vector &allocated) { if (size <= max_allocated && !allocated.empty()) { GELOGD("reuse last memory"); @@ -116,8 +111,7 @@ uint8_t *StreamResource::MallocWeight(const std::string &purpose, size_t size) { return buffer; } -Status StreamResource::BuildDynamicOperator(const string &model_name, - const ModelData &model_data, +Status StreamResource::BuildDynamicOperator(const string &model_name, const ModelData &model_data, DynamicSingleOp **single_op) { std::lock_guard lk(mu_); auto it = dynamic_op_map_.find(model_data.model_data); @@ -133,7 +127,8 @@ Status StreamResource::BuildDynamicOperator(const string &model_name, return ret; } - auto new_op = std::unique_ptr(new(std::nothrow) DynamicSingleOp(resource_id_, &stream_mu_, stream_)); + auto new_op = + std::unique_ptr(new (std::nothrow) DynamicSingleOp(resource_id_, &stream_mu_, stream_)); GE_CHECK_NOTNULL(new_op); GELOGI("To build operator: %s", model_name.c_str()); @@ -158,7 +153,7 @@ Status StreamResource::BuildOperator(const string &model_name, const ModelData & return ret; } - auto new_op = std::unique_ptr(new(std::nothrow) SingleOp(&stream_mu_, stream_)); + auto new_op = std::unique_ptr(new (std::nothrow) SingleOp(&stream_mu_, stream_)); if (new_op == nullptr) { GELOGE(MEMALLOC_FAILED, "new SingleOp failed"); return MEMALLOC_FAILED; diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h index 39f08ebe..3c0dd03f 100755 --- a/ge/single_op/stream_resource.h +++ b/ge/single_op/stream_resource.h @@ -49,9 +49,7 @@ class StreamResource { uint8_t *MallocWeight(const std::string &purpose, size_t size); private: - uint8_t *DoMallocMemory(const std::string &purpose, - size_t size, - size_t &max_allocated, + uint8_t *DoMallocMemory(const std::string &purpose, size_t size, size_t &max_allocated, std::vector &allocated); uintptr_t resource_id_; diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index b9c5b9d0..cc334f41 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -15,8 +15,6 @@ */ #include "single_op/task/aicpu_kernel_task_builder.h" -#include "cce/taskdown_common.hpp" -#include "graph/load/new_model_manager/model_manager.h" namespace ge { AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def) @@ -29,7 +27,7 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { return RT_FAILED; } std::unique_ptr aicpu_args; - aicpu_args.reset(new(std::nothrow) uint8_t[aicpu_arg_size]()); + aicpu_args.reset(new (std::nothrow) uint8_t[aicpu_arg_size]()); if (aicpu_args == nullptr) { GELOGE(RT_FAILED, "malloc failed, size = %zu", aicpu_arg_size); return RT_FAILED; @@ -57,14 +55,6 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { task.SetkernelName(kernel_name); task.op_desc_ = op_desc_; - const auto &context = kernel_def_.context(); - auto kernel_type = static_cast(context.kernel_type()); - if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { - task.is_custom_ = true; - task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); - } - task.num_inputs_ = op_desc_->GetInputsSize(); task.num_outputs_ = op_desc_->GetOutputsSize(); @@ -72,8 +62,8 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { auto &kernel_ext_info = kernel_def_.kernel_ext_info(); auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", - kernel_ext_info.size(), kernel_ext_info_size); + "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), + kernel_ext_info_size); ret = task.SetExtInfoAndType(kernel_ext_info); if (ret != SUCCESS) { diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 3adfcd11..9ad52d81 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #include "single_op/task/aicpu_task_builder.h" #include @@ -23,140 +23,145 @@ #include "graph/load/new_model_manager/model_manager.h" namespace ge { - AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) - : op_desc_(op_desc), kernel_def_(kernel_def) {} - - Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector &addresses) { - size_t arg_size = kernel_def_.args_size(); - auto rt_ret = rtMalloc(io_addr, arg_size, RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMalloc failed, size = %zu, ret = %d", arg_size, rt_ret); - return RT_FAILED; - } - - const void *src_addr = reinterpret_cast(addresses.data()); - uint64_t src_len = sizeof(void *) * addresses.size(); - rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - (void)rtFree(*io_addr); - GELOGE(RT_FAILED, "rtMemcpy addresses failed, ret = %d", rt_ret); - return RT_FAILED; - } +AiCpuTaskBuilder::AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def) + : op_desc_(op_desc), kernel_def_(kernel_def) {} + +Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector &addresses) { + size_t arg_size = kernel_def_.args_size(); + auto rt_ret = rtMalloc(io_addr, arg_size, RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtMalloc failed, size = %zu, ret = %d", arg_size, rt_ret); + return RT_FAILED; + } - return SUCCESS; + const void *src_addr = reinterpret_cast(addresses.data()); + uint64_t src_len = sizeof(void *) * addresses.size(); + rt_ret = rtMemcpy(*io_addr, arg_size, src_addr, src_len, RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + (void)rtFree(*io_addr); + GELOGE(RT_FAILED, "rtMemcpy addresses failed, ret = %d", rt_ret); + return RT_FAILED; } - Status AiCpuTaskBuilder::SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &fwk_op_kernel) { - auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), - kernel_def_.args().data(), kernel_def_.args().size()); - if (sec_ret != EOK) { - GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); - return FAILED; - } + return SUCCESS; +} - auto io_addr_val = static_cast(reinterpret_cast(io_addr)); - fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = io_addr_val; - auto ws_addr_val = static_cast(reinterpret_cast(ws_addr)); - fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = ws_addr_val; - return SUCCESS; +Status AiCpuTaskBuilder::SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &fwk_op_kernel) { + auto sec_ret = + memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_def_.args().data(), kernel_def_.args().size()); + if (sec_ret != EOK) { + GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); + return FAILED; } - Status AiCpuTaskBuilder::SetKernelArgs(void **args, STR_FWK_OP_KERNEL &fwk_op_kernel) { - void *fwk_op_args = nullptr; - auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "malloc arg memory failed, ret = %d", rt_ret); - return RT_FAILED; - } - - rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, - sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); - if (rt_ret != RT_ERROR_NONE) { - (void)rtFree(fwk_op_args); - GELOGE(RT_FAILED, "copy args failed, ret = %d", rt_ret); - return RT_FAILED; - } - *args = fwk_op_args; - return SUCCESS; + auto io_addr_val = static_cast(reinterpret_cast(io_addr)); + fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = io_addr_val; + auto ws_addr_val = static_cast(reinterpret_cast(ws_addr)); + fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = ws_addr_val; + return SUCCESS; +} + +Status AiCpuTaskBuilder::SetKernelArgs(void **args, STR_FWK_OP_KERNEL &fwk_op_kernel) { + void *fwk_op_args = nullptr; + auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "malloc arg memory failed, ret = %d", rt_ret); + return RT_FAILED; } - Status AiCpuTaskBuilder::InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, - const SingleOpModelParam ¶m, bool dynamic_flag) { - if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { - GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", - sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); + rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), + RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + (void)rtFree(fwk_op_args); + GELOGE(RT_FAILED, "copy args failed, ret = %d", rt_ret); + return RT_FAILED; + } + *args = fwk_op_args; + return SUCCESS; +} + +Status AiCpuTaskBuilder::InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, const SingleOpModelParam ¶m, + bool dynamic_flag) { + if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), + kernel_def_.args_size()); + return PARAM_INVALID; + } + auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); + auto ws_addr_vec = addresses.at(BuildTaskUtils::kAddressIndexWorkspace); + + if (dynamic_flag) { + GE_CHK_RT_RET(rtMalloc(kernel_workspace, kernel_def_.task_info_size(), RT_MEMORY_HBM)); + } else { + if (ws_addr_vec.empty()) { + GELOGE(PARAM_INVALID, "workspace Data Address is empty."); return PARAM_INVALID; } - auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); - auto ws_addr_vec = addresses.at(BuildTaskUtils::kAddressIndexWorkspace); - - if (dynamic_flag) { - GE_CHK_RT_RET(rtMalloc(kernel_workspace, kernel_def_.task_info_size(), RT_MEMORY_HBM)); - } else { - if (ws_addr_vec.empty()) { - GELOGE(PARAM_INVALID, "workspace Data Address is empty."); - return PARAM_INVALID; - } - *kernel_workspace = ws_addr_vec[0]; - } - GE_CHK_RT_RET(rtMemcpy(*kernel_workspace, kernel_def_.task_info_size(), - kernel_def_.task_info().data(), kernel_def_.task_info_size(), - RT_MEMCPY_HOST_TO_DEVICE)); - - auto ret = SetInputOutputAddr(io_addr, BuildTaskUtils::JoinAddresses(addresses)); - if (ret != SUCCESS) { - return ret; - } - return SUCCESS; + *kernel_workspace = ws_addr_vec[0]; } + GE_CHK_RT_RET(rtMemcpy(*kernel_workspace, kernel_def_.task_info_size(), kernel_def_.task_info().data(), + kernel_def_.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); - Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, - bool dynamic_flag, uint64_t session_id) { - GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&task.io_addr_, &task.workspace_addr_, param, dynamic_flag)); - - STR_FWK_OP_KERNEL fwk_op_kernel = {0}; - auto ret = SetFmkOpKernel(task.io_addr_, task.workspace_addr_, fwk_op_kernel); - if (ret != SUCCESS) { - return ret; - } - - task.op_desc_ = op_desc_; - task.num_inputs_ = op_desc_->GetInputsSize(); - task.num_outputs_ = op_desc_->GetOutputsSize(); - - // get kernel_ext_info - auto &kernel_ext_info = kernel_def_.kernel_ext_info(); - auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); - GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, - "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", - kernel_ext_info.size(), kernel_ext_info_size); - GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info), "Init ext info failed."); - - if (task.ext_info_addr_dev_ != nullptr) { - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = kernel_ext_info_size; - } - GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "AiCpuTask init for summary and copy task failed."); - - // Create session - fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; - GELOGI("Begin to CreateAicpuSession, session id: %lu", session_id); - GE_CHECK_NOTNULL(ModelManager::GetInstance()); - GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, - GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); - return FAILED;) - ret = SetKernelArgs(&task.args_, fwk_op_kernel); - if (ret != SUCCESS) { - return ret; - } - - task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); - task.op_type_ = op_desc_->GetName(); - task.task_info_ = kernel_def_.task_info(); - task.dynamic_flag_ = dynamic_flag; + auto ret = SetInputOutputAddr(io_addr, BuildTaskUtils::JoinAddresses(addresses)); + if (ret != SUCCESS) { + return ret; + } + return SUCCESS; +} + +Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, + uint64_t session_id) { + void *io_addr = nullptr; + void *kernel_workspace = nullptr; + GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(&io_addr, &kernel_workspace, param, dynamic_flag)); + + STR_FWK_OP_KERNEL fwk_op_kernel = {0}; + auto ret = SetFmkOpKernel(io_addr, kernel_workspace, fwk_op_kernel); + if (ret != SUCCESS) { + (void)rtFree(io_addr); + return ret; + } - auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); - GELOGI("[TASK_INFO] %s %s", task.task_info_.c_str(), debug_info.c_str()); - return SUCCESS; + task.op_desc_ = op_desc_; + task.num_inputs_ = op_desc_->GetInputsSize(); + task.num_outputs_ = op_desc_->GetOutputsSize(); + + // get kernel_ext_info + auto &kernel_ext_info = kernel_def_.kernel_ext_info(); + auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), + kernel_ext_info_size); + GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info), "Init ext info failed."); + + if (task.ext_info_addr_dev_ != nullptr) { + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(task.ext_info_addr_dev_); + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = kernel_ext_info_size; } + GE_CHK_STATUS_RET(task.InitForSummaryAndCopy(), "AiCpuTask init for summary and copy task failed."); + + // Create session + fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; + GELOGI("Begin to CreateAicpuSession, session id: %lu", session_id); + GE_CHECK_NOTNULL(ModelManager::GetInstance()); + GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS, + GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id); + return FAILED;) + ret = SetKernelArgs(&task.args_, fwk_op_kernel); + if (ret != SUCCESS) { + (void)rtFree(io_addr); + return ret; + } + + task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); + task.op_type_ = op_desc_->GetName(); + task.io_addr_ = io_addr; + task.task_info_ = kernel_def_.task_info(); + task.workspace_addr_ = kernel_workspace; + task.dynamic_flag_ = dynamic_flag; + + auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); + GELOGI("[TASK_INFO] %s %s", task.task_info_.c_str(), debug_info.c_str()); + return SUCCESS; +} } // namespace ge diff --git a/ge/single_op/task/aicpu_task_builder.h b/ge/single_op/task/aicpu_task_builder.h index 6dcd7a0f..76ccb161 100755 --- a/ge/single_op/task/aicpu_task_builder.h +++ b/ge/single_op/task/aicpu_task_builder.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ #define GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ @@ -24,23 +24,23 @@ #include "cce/aicpu_engine_struct.h" namespace ge { - class AiCpuTaskBuilder { - public: - AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def); - ~AiCpuTaskBuilder() = default; +class AiCpuTaskBuilder { + public: + AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def); + ~AiCpuTaskBuilder() = default; - Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t session_id); + Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t session_id); - private: - static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); - Status SetInputOutputAddr(void **io_addr, const std::vector &addresses); - Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); - Status InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, - const SingleOpModelParam ¶m, bool dynamic_flag); + private: + static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); + Status SetInputOutputAddr(void **io_addr, const std::vector &addresses); + Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); + Status InitWorkspaceAndIO(void **io_addr, void **kernel_workspace, const SingleOpModelParam ¶m, + bool dynamic_flag); - const OpDescPtr op_desc_; - const domi::KernelExDef &kernel_def_; - }; + const OpDescPtr op_desc_; + const domi::KernelExDef &kernel_def_; +}; } // namespace ge #endif // GE_SINGLE_OP_TASK_AICPU_TASK_BUILDER_H_ \ No newline at end of file diff --git a/ge/single_op/task/build_task_utils.cc b/ge/single_op/task/build_task_utils.cc index 29f1657b..268cbfd1 100644 --- a/ge/single_op/task/build_task_utils.cc +++ b/ge/single_op/task/build_task_utils.cc @@ -29,7 +29,7 @@ const uint64_t kSessionId = UINT64_MAX; uint8_t *kVarBase = nullptr; const uint64_t kLogicVarBase = 0; const uint64_t kVarSize = 0; -} +} // namespace std::vector> BuildTaskUtils::GetAddresses(const OpDescPtr &op_desc, const SingleOpModelParam ¶m) { @@ -45,7 +45,6 @@ std::vector> BuildTaskUtils::GetAddresses(const OpDescPtr &o runtime_para.logic_var_base = kLogicVarBase; runtime_para.var_base = kVarBase; runtime_para.session_id = kSessionId; - runtime_para.is_single_op = true; ret.emplace_back(ModelUtils::GetInputDataAddrs(runtime_para, op_desc)); ret.emplace_back(ModelUtils::GetOutputDataAddrs(runtime_para, op_desc)); @@ -61,8 +60,7 @@ std::vector BuildTaskUtils::JoinAddresses(const std::vector BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, - const SingleOpModelParam ¶m) { +std::vector BuildTaskUtils::GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m) { auto addresses = GetAddresses(op_desc, param); return JoinAddresses(addresses); } diff --git a/ge/single_op/task/build_task_utils.h b/ge/single_op/task/build_task_utils.h index cddc7a2b..f5885fd2 100644 --- a/ge/single_op/task/build_task_utils.h +++ b/ge/single_op/task/build_task_utils.h @@ -33,9 +33,8 @@ class BuildTaskUtils { static std::vector JoinAddresses(const std::vector> &addresses); static std::vector GetKernelArgs(const OpDescPtr &op_desc, const SingleOpModelParam ¶m); static std::string GetTaskInfo(const OpDescPtr &op_desc); - template - static std::string VectorToString(const std::vector &values) - { + template + static std::string VectorToString(const std::vector &values) { std::stringstream ss; ss << '['; auto size = values.size(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index f1d293d5..78db835e 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -39,7 +39,7 @@ void FreeHbm(void *var) { (void)rtFree(var); } } -} +} // namespace Status OpTask::OpenDump(const std::vector &io_addr, rtStream_t stream) { if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { @@ -240,14 +240,12 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info) { } int32_t unknown_shape_type_val = 0; - (void) AttrUtils::GetInt(op_desc_, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); + (void)AttrUtils::GetInt(op_desc_, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); GELOGD("Get unknown_type is %d.", unknown_shape_type_val); unknown_type_ = static_cast(unknown_shape_type_val); - aicpu_ext_handle_.reset(new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), - num_inputs_, - num_outputs_, - unknown_type_)); + aicpu_ext_handle_.reset( + new (std::nothrow)::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), num_inputs_, num_outputs_, unknown_type_)); GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); @@ -257,12 +255,12 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info) { } GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, kernel_ext_info.size(), RT_MEMORY_HBM)); - GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, kernel_ext_info.size(), - kernel_ext_info.data(), kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, kernel_ext_info.size(), kernel_ext_info.data(), kernel_ext_info.size(), + RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } -Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, +Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, std::vector &output_desc) { GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); if (num_inputs_ == 0 && num_outputs_ == 0) { @@ -280,13 +278,14 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, for (size_t j = 0; j < num_outputs_; ++j) { GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), "Output[%zu] UpdateOutputShapeAndType failed.", j); + // debug code + GELOGD("No input and output, no need update ext info."); } } GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, - aicpu_ext_handle_->GetExtInfoLen(), // check size - aicpu_ext_handle_->GetExtInfo(), - aicpu_ext_handle_->GetExtInfoLen(), + aicpu_ext_handle_->GetExtInfoLen(), // check size + aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE)); GELOGI("Update ext info end."); @@ -300,18 +299,15 @@ Status AiCpuBaseTask::UpdateOutputShape(vector &output_desc) { } GELOGD("Start to update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape."); - GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), - aicpu_ext_handle_->GetExtInfoLen(), - ext_info_addr_dev_, - aicpu_ext_handle_->GetExtInfoLen(), - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_->GetExtInfo(), aicpu_ext_handle_->GetExtInfoLen(), ext_info_addr_dev_, + aicpu_ext_handle_->GetExtInfoLen(), RT_MEMCPY_DEVICE_TO_HOST)); for (size_t i = 0; i < num_outputs_; ++i) { GeShape shape; DataType data_type; aicpu_ext_handle_->GetOutputShapeAndType(i, shape, data_type); - GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), - "AiCpuCCTask Update [%zu]th output shape failed.", i); + GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, output_desc[i]), "AiCpuCCTask Update [%zu]th output shape failed.", + i); } GELOGD("Update DEPEND_SHAPE_RANGE AiCpuBaseTask outputshape finished."); return SUCCESS; @@ -332,16 +328,15 @@ Status AiCpuBaseTask::UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensor std::vector origin_dims_new; - auto trans_ret = formats::TransShape(format, shape_new.GetDims(), - output_desc.GetDataType(), origin_format, origin_dims_new); - GE_CHK_STATUS_RET(trans_ret, - "AiCpuTask originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", + auto trans_ret = + formats::TransShape(format, shape_new.GetDims(), output_desc.GetDataType(), origin_format, origin_dims_new); + GE_CHK_STATUS_RET(trans_ret, "AiCpuTask originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", origin_format, format, shape_new.ToString().c_str()); auto origin_shape_new = GeShape(origin_dims_new); output_desc.SetOriginShape(origin_shape_new); - GELOGD("AiCpuTask originFormat[%d] is not same as format[%d], need update from %s ro %s.", - origin_format, format, origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); + GELOGD("AiCpuTask originFormat[%d] is not same as format[%d], need update from %s ro %s.", origin_format, format, + origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); return SUCCESS; } @@ -397,9 +392,8 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs) { for (size_t i = 0; i < num_outputs_; ++i) { const auto &summary = output_summary_host_[i]; - GELOGI("Node out[%zu] summary, shape data=0x%lx, shape data size=%lu, raw data=0x%lx, raw data size=%lu.", - i, summary.shape_data_ptr, summary.shape_data_size, - summary.raw_data_ptr, summary.raw_data_size); + GELOGI("Node out[%zu] summary, shape data=0x%lx, shape data size=%lu, raw data=0x%lx, raw data size=%lu.", i, + summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); auto output = outputs[i]; copy_input_release_flag.emplace_back(kReleaseFlag); if (summary.raw_data_size > 0) { @@ -419,14 +413,14 @@ Status AiCpuTask::PrepareCopyInputs(vector &outputs) { const size_t copy_input_buf_len = num_outputs_ * kCopyNum * sizeof(uint64_t); - GE_CHK_RT_RET(rtMemcpy(copy_input_release_flag_dev_, copy_input_buf_len, - copy_input_release_flag.data(), copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT_RET(rtMemcpy(copy_input_data_size_dev_, copy_input_buf_len, - copy_input_data_size.data(), copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT_RET(rtMemcpy(copy_input_src_dev_, copy_input_buf_len, - copy_input_src.data(), copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_, copy_input_buf_len, - copy_input_dst.data(), copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_release_flag_dev_, copy_input_buf_len, copy_input_release_flag.data(), + copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_data_size_dev_, copy_input_buf_len, copy_input_data_size.data(), copy_input_buf_len, + RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_src_dev_, copy_input_buf_len, copy_input_src.data(), copy_input_buf_len, + RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_, copy_input_buf_len, copy_input_dst.data(), copy_input_buf_len, + RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } @@ -434,9 +428,8 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { for (size_t i = 0; i < num_outputs_; ++i) { auto &result_summary = output_summary_host_[i]; - GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), - output_summary_[i], sizeof(aicpu::FWKAdapter::ResultSummary), - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), output_summary_[i], + sizeof(aicpu::FWKAdapter::ResultSummary), RT_MEMCPY_DEVICE_TO_HOST)); auto shape_data_size = result_summary.shape_data_size; void *shape_buffer = nullptr; if (shape_data_size > 0) { @@ -447,12 +440,10 @@ Status AiCpuTask::ReadResultSummaryAndPrepareMemory() { return SUCCESS; } -Status AiCpuTask::CopyDataToHbm(vector &outputs, - rtStream_t stream) { +Status AiCpuTask::CopyDataToHbm(vector &outputs, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(outputs)); - GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), - RT_KERNEL_DEFAULT, stream)); + GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, stream)); GE_CHK_RT_RET(rtStreamSynchronize(stream)); return SUCCESS; } @@ -465,10 +456,10 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc) { const auto &shape_hbm = out_shape_hbm_[i]; uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); - std::unique_ptr shape_addr(new(std::nothrow) int64_t[dim_num]()); + std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); GE_CHECK_NOTNULL(shape_addr); - GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, - shape_hbm, result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm, + result_summary.shape_data_size, RT_MEMCPY_DEVICE_TO_HOST)); for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { shape_dims.emplace_back(shape_addr[dim_idx]); @@ -482,8 +473,7 @@ Status AiCpuTask::UpdateShapeByHbmBuffer(vector &output_desc) { return SUCCESS; } -Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, - vector &outputs, +Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, rtStream_t stream) { if (num_outputs_ == 0) { GELOGI("Output num is 0, there is no need to update the output and size."); @@ -496,14 +486,11 @@ Status AiCpuTask::UpdateShapeAndDataByResultSummary(vector &output FreeHbm(out_shape); } out_shape_hbm_.clear(); - GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), - "Read ResultSummary and update output shape failed."); + GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(), "Read ResultSummary and update output shape failed."); - GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), - "Copy data to output failed."); + GE_CHK_STATUS_RET(CopyDataToHbm(outputs, stream), "Copy data to output failed."); - GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), - "Update shape by hbm buffer failed."); + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(output_desc), "Update shape by hbm buffer failed."); for (auto out_shape : out_shape_hbm_) { FreeHbm(out_shape); @@ -538,11 +525,8 @@ Status AiCpuTask::SetIO(const vector &inputs, vector &outputs) { if (!io_addrs.empty()) { auto *dst_io_addr = const_cast(reinterpret_cast(io_addr_)); - GE_CHK_RT_RET(rtMemcpy(dst_io_addr, - sizeof(uint64_t) * io_addrs.size(), - &io_addrs[0], - sizeof(uint64_t) * io_addrs.size(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(dst_io_addr, sizeof(uint64_t) * io_addrs.size(), &io_addrs[0], + sizeof(uint64_t) * io_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE)); GE_CHECK_NOTNULL(dst_io_addr); }; return SUCCESS; @@ -580,24 +564,23 @@ Status AiCpuTask::InitForSummaryAndCopy() { GE_CHK_RT_RET(rtMalloc(©_ioaddr_dev_, copy_io_addr_size, RT_MEMORY_HBM)); - GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, - copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET( + rtMemcpy(copy_ioaddr_dev_, copy_io_addr_size, copy_io_addr.data(), copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { - GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", - sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); + GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), + kernel_def.args_size()); return PARAM_INVALID; } GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); - GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), - kernel_def.task_info().data(), kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), kernel_def.task_info().data(), + kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE)); STR_FWK_OP_KERNEL aicpu_task = {0}; - auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), - kernel_def.args().data(), kernel_def.args().size()); + auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), kernel_def.args().data(), kernel_def.args().size()); if (sec_ret != EOK) { GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); return FAILED; @@ -608,16 +591,14 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), - &aicpu_task, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_, sizeof(STR_FWK_OP_KERNEL), &aicpu_task, sizeof(STR_FWK_OP_KERNEL), + RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; } Status AiCpuTask::LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, - rtStream_t stream) { + const std::vector &input_buffers, std::vector &output_desc, + std::vector &output_buffers, rtStream_t stream) { GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); std::vector inputs; std::vector outputs; @@ -659,18 +640,16 @@ const void *AiCpuCCTask::GetArgs() const { return args_.get(); } size_t AiCpuCCTask::GetArgSize() const { return arg_size_; } -AiCpuCCTask::~AiCpuCCTask() { -} +AiCpuCCTask::~AiCpuCCTask() {} Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { GELOGI("To invoke rtCpuKernelLaunch. block_dim = %u, so_name is %s, kernel_name is %s", block_dim_, so_name_.data(), kernel_name_.data()); // sm_desc is nullptr, because l2 buffer does not support auto *sm_desc = reinterpret_cast(sm_desc_); - auto ret = rtCpuKernelLaunchWithFlag(static_cast(so_name_.data()), - static_cast(kernel_name_.data()), - block_dim_, args_.get(), static_cast(arg_size_), - sm_desc, stream, dump_flag_); + auto ret = + rtCpuKernelLaunch(static_cast(so_name_.data()), static_cast(kernel_name_.data()), + block_dim_, args_.get(), static_cast(arg_size_), sm_desc, stream); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); return RT_FAILED; @@ -681,13 +660,10 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { } Status AiCpuCCTask::LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, - rtStream_t stream) { + const std::vector &input_buffers, std::vector &output_desc, + std::vector &output_buffers, rtStream_t stream) { GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, - "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", - unknown_type_); + "AiCpuCCTask unknown type[%d] is depend compute, it's not supported now.", unknown_type_); GE_CHK_STATUS_RET_NOLOG(UpdateExtInfo(input_desc, output_desc)); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 4325da33..5f742197 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -44,32 +44,28 @@ class OpTask { OpTask() = default; virtual ~OpTask() = default; virtual Status LaunchKernel(rtStream_t stream) = 0; - virtual Status UpdateRunInfo(const vector &input_desc, - const vector &output_desc) { + virtual Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) { return UNSUPPORTED; } - virtual Status LaunchKernel(const std::vector &inputs, - const std::vector &outputs, - const std::vector &workspaces, - rtStream_t stream) { + virtual Status LaunchKernel(const std::vector &inputs, const std::vector &outputs, + const std::vector &workspaces, rtStream_t stream) { return UNSUPPORTED; } virtual OpTaskType GetOpTaskType() = 0; virtual const void *GetIOAddr() const = 0; const vector &GetWorkspaceSizes() const; void SetWorkspaceSizes(const vector &workspace_sizes); - const OpDescPtr &GetOpdesc() const {return op_desc_;} + const OpDescPtr &GetOpdesc() const { return op_desc_; } Status OpenDump(const std::vector &io_addr, rtStream_t stream); - virtual Status LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, + virtual Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, + std::vector &output_desc, std::vector &output_buffers, rtStream_t stream) { return UNSUPPORTED; } private: std::vector workspace_sizes_; + protected: DumpProperties dump_properties_; DumpOp dump_op_; @@ -80,22 +76,15 @@ class TbeOpTask : public OpTask { public: ~TbeOpTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { - return OP_TASK_TBE; - } - const void *GetIOAddr() const override { - return nullptr; - } + OpTaskType GetOpTaskType() override { return OP_TASK_TBE; } + const void *GetIOAddr() const override { return nullptr; } void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func); void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); - Status UpdateRunInfo(const vector &input_desc, - const vector &output_desc) override; + Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) override; - Status LaunchKernel(const vector &inputs, - const vector &outputs, - const vector &workspaces, + Status LaunchKernel(const vector &inputs, const vector &outputs, const vector &workspaces, rtStream_t stream) override; const void *GetArgs() const; @@ -105,8 +94,7 @@ class TbeOpTask : public OpTask { private: static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); - Status UpdateNodeByShape(const vector &input_desc, - const vector &output_desc); + Status UpdateNodeByShape(const vector &input_desc, const vector &output_desc); const void *stub_func_ = nullptr; std::unique_ptr args_; @@ -130,8 +118,7 @@ class AiCpuBaseTask : public OpTask { protected: Status SetExtInfoAndType(const std::string &kernel_ext_info); - Status UpdateExtInfo(const std::vector &input_desc, - std::vector &output_desc); + Status UpdateExtInfo(const std::vector &input_desc, std::vector &output_desc); Status UpdateOutputShape(vector &output_desc); Status UpdateShapeToOutputDesc(const GeShape &shape_new, GeTensorDesc &output_desc); @@ -149,15 +136,11 @@ class AiCpuTask : public AiCpuBaseTask { ~AiCpuTask() override; Status LaunchKernel(rtStream_t stream) override; - OpTaskType GetOpTaskType() override { - return OP_TASK_AICPU; - } + OpTaskType GetOpTaskType() override { return OP_TASK_AICPU; } const void *GetIOAddr() const override; - Status LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, + Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, + std::vector &output_desc, std::vector &output_buffers, rtStream_t stream) override; Status SetMemCopyTask(const domi::KernelExDef &kernel_def); @@ -166,8 +149,7 @@ class AiCpuTask : public AiCpuBaseTask { // for copy task. Status InitForSummaryAndCopy(); - Status UpdateShapeAndDataByResultSummary(vector &output_desc, - vector &outputs, + Status UpdateShapeAndDataByResultSummary(vector &output_desc, vector &outputs, rtStream_t stream); Status ReadResultSummaryAndPrepareMemory(); @@ -219,13 +201,11 @@ class AiCpuCCTask : public AiCpuBaseTask { void SetIoAddr(void *io_addr); size_t GetArgSize() const; - Status LaunchKernel(const std::vector &input_desc, - const std::vector &input_buffers, - std::vector &output_desc, - std::vector &output_buffers, - rtStream_t stream) override; + Status LaunchKernel(const std::vector &input_desc, const std::vector &input_buffers, + std::vector &output_desc, std::vector &output_buffers, + rtStream_t stream) override; -private: + private: friend class AiCpuCCTaskBuilder; std::string so_name_; std::string kernel_name_; @@ -234,8 +214,6 @@ private: uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; void *io_addr_ = nullptr; - bool is_custom_ = false; - uint32_t dump_flag_ = RT_KERNEL_DEFAULT; }; } // namespace ge diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 1c6b7a3e..68743bc8 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -238,10 +238,10 @@ enum GraphRunMode { PREDICTION = 0, TRAIN }; // Input/Output tensor info struct InputTensorInfo { - uint32_t data_type; // data type - std::vector dims; // shape description - void *data; // tensor data - int64_t length; // tensor length + uint32_t data_type; // data type + std::vector dims; // shape description + void *data; // tensor data + int64_t length; // tensor length }; struct OutputTensorInfo { @@ -250,11 +250,8 @@ struct OutputTensorInfo { std::unique_ptr data; // tensor data int64_t length; // tensor length OutputTensorInfo() : data_type(0), dims({}), data(nullptr), length(0) {} - OutputTensorInfo(OutputTensorInfo &&out) : - data_type(out.data_type), - dims(out.dims), - data(std::move(out.data)), - length(out.length) {} + OutputTensorInfo(OutputTensorInfo &&out) + : data_type(out.data_type), dims(out.dims), data(std::move(out.data)), length(out.length) {} OutputTensorInfo &operator=(OutputTensorInfo &&out) { if (this != &out) { @@ -273,67 +270,55 @@ using Status = uint32_t; using RunAsyncCallback = std::function &)>; // for ir build namespace ir_option { - static const char *const INPUT_FORMAT = "input_format"; - static const char *const INPUT_SHAPE = "input_shape"; - static const char *const OP_NAME_MAP = "op_name_map"; - static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; - static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; - static const char *const DYNAMIC_DIMS = kDynamicDims; - static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); - static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); - static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; - static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); - static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); - static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); - static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; - static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); - static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); - static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); - static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); - static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); - static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); - static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); - static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; - static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); - static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); - static const char *const LOG_LEVEL = "log"; - static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); - - // for interface: aclgrphBuildModel - const std::set ir_builder_suppported_options = { - INPUT_FORMAT, - INPUT_SHAPE, - OP_NAME_MAP, - DYNAMIC_BATCH_SIZE, - DYNAMIC_IMAGE_SIZE, - DYNAMIC_DIMS, - INSERT_OP_FILE, - PRECISION_MODE, - EXEC_DISABLE_REUSED_MEMORY, - AUTO_TUNE_MODE, - OUTPUT_TYPE, - OUT_NODES, - INPUT_FP16_NODES, - LOG_LEVEL - }; - // for interface: aclgrphBuildInitialize - const std::set global_options = { - CORE_TYPE, - SOC_VERSION, - BUFFER_OPTIMIZE, - ENABLE_COMPRESS_WEIGHT, - COMPRESS_WEIGHT_CONF, - PRECISION_MODE, - EXEC_DISABLE_REUSED_MEMORY, - AUTO_TUNE_MODE, - ENABLE_SINGLE_STREAM, - AICORE_NUM, - FUSION_SWITCH_FILE, - ENABLE_SMALL_CHANNEL, - OP_SELECT_IMPL_MODE, - OPTYPELIST_FOR_IMPLMODE - }; -} +static const char *const INPUT_FORMAT = "input_format"; +static const char *const INPUT_SHAPE = "input_shape"; +static const char *const OP_NAME_MAP = "op_name_map"; +static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; +static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; +static const char *const DYNAMIC_DIMS = kDynamicDims; +static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str(); +static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str(); +static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY; +static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str(); +static const char *const CORE_TYPE = ge::CORE_TYPE.c_str(); +static const char *const SOC_VERSION = ge::SOC_VERSION.c_str(); +static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM; +static const char *const AICORE_NUM = ge::AICORE_NUM.c_str(); +static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str(); +static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str(); +static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str(); +static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str(); +static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str(); +static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str(); +static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf"; +static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str(); +static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str(); +static const char *const LOG_LEVEL = "log"; +static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str(); + +// for interface: aclgrphBuildModel +const std::set ir_builder_suppported_options = { + INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, + DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, + INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, + INPUT_FP16_NODES, LOG_LEVEL}; +// for interface: aclgrphBuildInitialize +const std::set global_options = {CORE_TYPE, + SOC_VERSION, + BUFFER_OPTIMIZE, + ENABLE_COMPRESS_WEIGHT, + COMPRESS_WEIGHT_CONF, + PRECISION_MODE, + EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, + ENABLE_SINGLE_STREAM, + AICORE_NUM, + FUSION_SWITCH_FILE, + ENABLE_SMALL_CHANNEL, + OP_SELECT_IMPL_MODE, + OPTYPELIST_FOR_IMPLMODE}; +} // namespace ir_option } // namespace ge #endif // INC_EXTERNAL_GE_GE_API_TYPES_H_ diff --git a/inc/external/ge/ge_ir_build.h b/inc/external/ge/ge_ir_build.h index e6401093..acf6991a 100644 --- a/inc/external/ge/ge_ir_build.h +++ b/inc/external/ge/ge_ir_build.h @@ -1,18 +1,18 @@ /** -* Copyright 2020 Huawei Technologies Co., Ltd - -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at - -* http://www.apache.org/licenses/LICENSE-2.0 - -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef INC_EXTERNAL_GE_IR_BUILD_H_ #define INC_EXTERNAL_GE_IR_BUILD_H_ @@ -27,12 +27,11 @@ namespace { #define IR_MAJOR_VERSION (int(1)) #define IR_MINOR_VERSION (int(0)) #define IR_PATCH_VERSION (int(0)) -} +} // namespace -namespace ge{ +namespace ge { -struct ModelBufferData -{ +struct ModelBufferData { std::shared_ptr data = nullptr; uint64_t length; }; @@ -64,7 +63,8 @@ void aclgrphBuildFinalize(); * @retval GRAPH_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, ModelBufferData& model); +graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map &build_options, + ModelBufferData &model); /** * @ingroup AscendCL @@ -75,7 +75,7 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map +#include #if defined(BUILD_VERSION_PERF) #define DOMI_LOGE(fmt, ...) #else @@ -83,12 +83,12 @@ } while (0); // If expr is not GRAPH_SUCCESS, print the log and return FAILED -#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ - do { \ - if ((expr) != ge::GRAPH_SUCCESS) { \ - DOMI_LOGE(__VA_ARGS__); \ - return FAILED; \ - } \ +#define GE_CHK_GRAPH_STATUS_RET(expr, ...) \ + do { \ + if ((expr) != ge::GRAPH_SUCCESS) { \ + DOMI_LOGE(__VA_ARGS__); \ + return FAILED; \ + } \ } while (0); // If expr is not SUCCESS, print the log and execute a custom statement @@ -99,13 +99,13 @@ } while (0); // If expr is not true, print the log and return the specified status -#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ - do { \ - bool b = (expr); \ - if (!b) { \ - GELOGE(_status, __VA_ARGS__); \ - return _status; \ - } \ +#define GE_CHK_BOOL_RET_STATUS(expr, _status, ...) \ + do { \ + bool b = (expr); \ + if (!b) { \ + GELOGE(_status, __VA_ARGS__); \ + return _status; \ + } \ } while (0); // If expr is not true, print the log and return the specified status diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index 3697a526..3ab6cf06 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -125,13 +125,13 @@ GE_ERRORNO_CLIENT(GE_CLI_GE_ALREADY_INITIALIZED, 10, "GE is already initialized. GE_ERRORNO_CLIENT(GE_CLI_GE_NOT_INITIALIZED, 11, "GE is not yet initialized or is finalized."); // 1343229963 // Init module error code definition -GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 -GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 -GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 -GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 -GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 +GE_ERRORNO_INIT(GE_MULTI_INIT, 0, "Multiple initializations are not supported."); // 1343234048 +GE_ERRORNO_INIT(GE_FINALIZE_NOT_INIT, 1, "Finalize is not allowed before initialization."); // 1343234049 +GE_ERRORNO_INIT(GE_MULTI_FINALIZE, 2, "Multiple finalizations are not supported."); // 1343234050 +GE_ERRORNO_INIT(GE_PROF_MULTI_INIT, 3, "Multiple profiling initializations are not supported."); // 1343234051 +GE_ERRORNO_INIT(GE_PROF_NOT_INIT, 4, "Profing initializations have not been done."); // 1343234052 GE_ERRORNO_INIT(GE_PROF_MODE_CONFLICT, 5, - "Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 + "Profiling command mode which is preferred is running, the api mode will not work."); // 1343234053 // Session module error code definition GE_ERRORNO_SESSION(GE_SESS_INIT_FAILED, 0, "Failed to initialize session."); // 1343238144 @@ -216,8 +216,8 @@ GE_ERRORNO_ENGINE(GE_ENG_FINALIZE_FAILED, 1, "Engine finalize failed."); GE_ERRORNO_ENGINE(GE_ENG_MEMTYPE_ERROR, 2, "Memory type HBM is necessary when engine is in device"); // 1343246338 // Optimize errocode -GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 -GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 +GE_ERRORNO_GRAPH(TO_BE_DELETED, 63, "The node of the graph to be deleted."); // 1343242303 +GE_ERRORNO_GRAPH(NOT_CHANGED, 64, "The node of the graph no changed."); // 1343242304 // Ops module error code definition GE_ERRORNO_OPS(GE_OPS_KERNEL_STORE_INIT_FAILED, 0, "Failed to initialize OpsKernelInfoStore."); // 1343250432 @@ -304,7 +304,6 @@ GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, 16, "Failed to allocate wei GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_VAR_MEM_FAILED, 17, "Failed to allocate variable memory."); GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 18, "GE AIPP is not exist."); GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 19, "GE Dynamic AIPP is not support to query temporarily."); -GE_ERRORNO_EXECUTOR(GE_EXEC_ALLOC_P2P_MEM_FAILED, 20, "Failed to allocate P2P memory"); // Generator module error code definition GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 58ae9e3e..6033521c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -1,18 +1,18 @@ /** -* Copyright 2019-2020 Huawei Technologies Co., Ltd -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef INC_FRAMEWORK_COMMON_GE_TYPES_H_ #define INC_FRAMEWORK_COMMON_GE_TYPES_H_ @@ -28,38 +28,24 @@ #include "external/ge/ge_api_types.h" namespace ge { -enum RuntimeType { -HOST = 0, -DEVICE = 1 -}; +enum RuntimeType { HOST = 0, DEVICE = 1 }; -enum PerfLevel { -GEN_TASK_WITH_FUSION = -1, -GEN_TASK_WITHOUT_L2FUSION = 3, -GEN_TASK_WITHOUT_FUSION = 4 -}; +enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 }; enum FrameworkType { -CAFFE = 0, -MINDSPORE = 1, -TENSORFLOW = 3, -ANDROID_NN, -FRAMEWORK_RESERVED, + CAFFE = 0, + MINDSPORE = 1, + TENSORFLOW = 3, + ANDROID_NN, + FRAMEWORK_RESERVED, }; enum OpEngineType { -ENGINE_SYS = 0, // default engine -ENGINE_AICORE = 1, -ENGINE_VECTOR = 2, -ENGINE_AICUBE = 3, // not support -ENGINE_AIVECTOR = 4 // not support -}; - -enum InputAippType{ - DATA_WITHOUT_AIPP = 0, - DATA_WITH_STATIC_AIPP, - DATA_WITH_DYNAMIC_AIPP, - DYNAMIC_AIPP_NODE + ENGINE_SYS = 0, // default engine + ENGINE_AICORE = 1, + ENGINE_VECTOR = 2, + ENGINE_AICUBE = 3, // not support + ENGINE_AIVECTOR = 4 // not support }; enum InputAippType { DATA_WITHOUT_AIPP = 0, DATA_WITH_STATIC_AIPP, DATA_WITH_DYNAMIC_AIPP, DYNAMIC_AIPP_NODE }; @@ -109,7 +95,7 @@ struct OutputData { struct Command { std::string cmd_type; // Command type std::vector cmd_params; // Command params - uint64_t module_index; // prof module + uint64_t module_index; // prof module }; // The definition of I/O shape description diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h index 949d8b4c..fbe7e73f 100644 --- a/inc/framework/common/helper/model_helper.h +++ b/inc/framework/common/helper/model_helper.h @@ -32,10 +32,10 @@ class ModelHelper { ModelHelper() = default; ~ModelHelper(); - Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, - const std::string &output_file, ge::ModelBufferData &model); + Status SaveToOmModel(const GeModelPtr& ge_model, const SaveParam& save_param, const std::string& output_file, + ge::ModelBufferData& model); Status SaveOriginalGraphToOmModel(const ge::Graph& graph, const std::string& output_file); - Status LoadModel(const ge::ModelData &model_data); + Status LoadModel(const ge::ModelData& model_data); Status GetModelBufferData(ge::ModelBufferData& model); const ModelFileHeader* GetFileHeader() const { return file_header_; } @@ -44,15 +44,15 @@ class ModelHelper { void SetSaveMode(bool val) { is_offline_ = val; } bool GetSaveMode(void) const { return is_offline_; } - Status GetBaseNameFromFileName(const std::string &file_name, std::string &base_name); - Status GetModelNameFromMergedGraphName(const std::string &graph_name, std::string &model_name); + Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name); + Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name); private: bool is_assign_model_ = false; bool is_offline_ = true; ModelFileHeader* file_header_ = nullptr; // Encrypted model need delete temp model and unencrypted model need not delete model - uint8_t *model_addr_tmp_ = nullptr; + uint8_t* model_addr_tmp_ = nullptr; uint32_t model_len_tmp_ = 0; GeModelPtr model_; @@ -66,8 +66,8 @@ class ModelHelper { Status LoadTBEKernelStore(OmFileLoadHelper& om_load_helper); Status LoadCustAICPUKernelStore(OmFileLoadHelper& om_load_helper); Status ReleaseLocalModelData() noexcept; - Status SaveModelPartition(std::shared_ptr& om_file_save_helper, - ModelPartitionType type, const uint8_t* data, size_t size); + Status SaveModelPartition(std::shared_ptr& om_file_save_helper, ModelPartitionType type, + const uint8_t* data, size_t size); }; } // namespace ge #endif // INC_FRAMEWORK_COMMON_HELPER_MODEL_HELPER_H_ diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h index 4ca54b50..fec7e294 100644 --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -32,7 +32,7 @@ using std::vector; namespace ge { struct ModelPartition { ModelPartitionType type; - uint8_t* data = 0; + uint8_t *data = 0; uint32_t size = 0; }; @@ -81,8 +81,8 @@ class OmFileSaveHelper { const std::vector &GetModelPartitions() const; - Status SaveModel(const SaveParam &save_param, const char *target_file, - ge::ModelBufferData& model, bool is_offline = true); + Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model, + bool is_offline = true); Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true); diff --git a/inc/framework/common/op/attr_value_util.h b/inc/framework/common/op/attr_value_util.h index e3803b78..8a90cfa2 100644 --- a/inc/framework/common/op/attr_value_util.h +++ b/inc/framework/common/op/attr_value_util.h @@ -21,6 +21,7 @@ #include #include +#include "common/types.h" #include "graph/debug/ge_attr_define.h" #include "proto/om.pb.h" @@ -155,6 +156,6 @@ bool GetAttrDefListValue(const std::string &key, int idx, int32_t *value, const bool GetAttrDefListValue(const std::string &key, int idx, uint32_t *value, const AttrDefMap &attr); bool GetAttrDefListValue(const std::string &key, int idx, float *value, const AttrDefMap &attr); bool GetAttrDefListValue(const std::string &key, int idx, double *value, const AttrDefMap &attr); -} +} // namespace ge #endif // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_ diff --git a/inc/framework/common/op/ge_op_utils.h b/inc/framework/common/op/ge_op_utils.h index 4718b180..87cf54d8 100644 --- a/inc/framework/common/op/ge_op_utils.h +++ b/inc/framework/common/op/ge_op_utils.h @@ -22,8 +22,7 @@ #include #include "common/op/attr_value_util.h" -#include "register/register_types.h" -#include "register/register_error_codes.h" +#include "common/types.h" #include "common/util.h" #include "graph/attr_value.h" #include "graph/ge_tensor.h" diff --git a/inc/framework/common/scope_guard.h b/inc/framework/common/scope_guard.h index 001a0e75..2154648d 100644 --- a/inc/framework/common/scope_guard.h +++ b/inc/framework/common/scope_guard.h @@ -42,8 +42,9 @@ class ScopeGuard { if (on_exit_scope_ != nullptr) { try { on_exit_scope_(); - } catch (std::bad_function_call &e) { } - catch (...) { } + } catch (std::bad_function_call &e) { + } catch (...) { + } } } } diff --git a/inc/framework/common/string_util.h b/inc/framework/common/string_util.h index 3d7f6488..3e4bf093 100644 --- a/inc/framework/common/string_util.h +++ b/inc/framework/common/string_util.h @@ -37,7 +37,7 @@ class StringUtils { return s; } // lint -esym(551,*) - static std::string &Rtrim(std::string &s) { /*lint !e618*/ + static std::string &Rtrim(std::string &s) { /*lint !e618*/ #if __cplusplus >= 201103L (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); #else diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 0644b0f2..ad284d07 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -434,7 +434,6 @@ REGISTER_OPTYPE_DECLARE(HCOMREDUCESCATTER, "HcomReduceScatter"); REGISTER_OPTYPE_DECLARE(HCOMSEND, "HcomSend"); REGISTER_OPTYPE_DECLARE(HCOMRECEIVE, "HcomReceive"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEREAD, "HcomRemoteRead"); -REGISTER_OPTYPE_DECLARE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); REGISTER_OPTYPE_DECLARE(HCOMREMOTEWRITE, "HcomRemoteWrite"); REGISTER_OPTYPE_DECLARE(VARASSIGN, "VarAssign"); @@ -449,7 +448,6 @@ REGISTER_OPTYPE_DECLARE(MEMCPYASYNC, "MemcpyAsync"); REGISTER_OPTYPE_DECLARE(MEMCPYADDRASYNC, "MemcpyAddrAsync"); REGISTER_OPTYPE_DECLARE(STREAMMERGE, "StreamMerge"); REGISTER_OPTYPE_DECLARE(ENDGRAPH, "EndGraph"); -REGISTER_OPTYPE_DECLARE(MODELEXIT, "ModelExit"); REGISTER_OPTYPE_DECLARE(SEND, "Send"); REGISTER_OPTYPE_DECLARE(RECV, "Recv"); REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence"); @@ -567,10 +565,10 @@ enum ModelCheckType { /// @brief dynamic input type /// enum DynamicInputType { - FIXED = 0, // default mode - DYNAMIC_BATCH = 1, - DYNAMIC_IMAGE = 2, - DYNAMIC_DIMS = 3 + FIXED = 0, // default mode + DYNAMIC_BATCH = 1, + DYNAMIC_IMAGE = 2, + DYNAMIC_DIMS = 3 }; /// diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index 42ab3868..b1c278d8 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -30,12 +30,12 @@ #include "framework/common/ge_inner_error_codes.h" #include "mmpa/mmpa_api.h" -#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ - do { \ - if (size <= 0) { \ - DOMI_LOGE("param[%s] is not a positive number", #size); \ - return PARAM_INVALID; \ - } \ +#define GE_CHECK_POSITIVE_SIZE_RANGE(size) \ + do { \ + if (size <= 0) { \ + DOMI_LOGE("param[%s] is not a positive number", #size); \ + return PARAM_INVALID; \ + } \ } while (0) #define CHECK_FALSE_EXEC(expr, exec_expr, ...) \ @@ -113,84 +113,84 @@ } while (0) // Check if the parameter is null. If yes, return PARAM_INVALID and record the error -#define GE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the parameter is null. If yes, just return and record the error -#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return; \ - } \ +#define GE_CHECK_NOTNULL_JUST_RETURN(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return; \ + } \ } while (0) // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log -#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - exec_expr; \ - } \ +#define GE_CHECK_NOTNULL_EXEC(val, exec_expr) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + exec_expr; \ + } \ } while (0) // Check whether the parameter is null. If yes, return directly and record the error log -#define GE_RT_VOID_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return; \ - } \ +#define GE_RT_VOID_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return; \ + } \ } while (0) // Check if the parameter is null. If yes, return false and record the error log -#define GE_RT_FALSE_CHECK_NOTNULL(val) \ - do { \ - if (val == nullptr) { \ - DOMI_LOGE("param[%s] must not be null.", #val); \ - return false; \ - } \ +#define GE_RT_FALSE_CHECK_NOTNULL(val) \ + do { \ + if (val == nullptr) { \ + DOMI_LOGE("param[%s] must not be null.", #val); \ + return false; \ + } \ } while (0) // Check if the parameter is out of bounds -#define GE_CHECK_SIZE(size) \ - do { \ - if (size == 0) { \ - DOMI_LOGE("param[%s] is out of range", #size); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_SIZE(size) \ + do { \ + if (size == 0) { \ + DOMI_LOGE("param[%s] is out of range", #size); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the container is empty -#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ - do { \ - if (vector.empty()) { \ - DOMI_LOGE("param[%s] is empty!", #vector); \ - return ge::FAILED; \ - } \ +#define GE_CHECK_VECTOR_NOT_EMPTY(vector) \ + do { \ + if (vector.empty()) { \ + DOMI_LOGE("param[%s] is empty!", #vector); \ + return ge::FAILED; \ + } \ } while (0) // Check if the value on the left is greater than or equal to the value on the right -#define GE_CHECK_GE(lhs, rhs) \ - do { \ - if (lhs < rhs) { \ - DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_GE(lhs, rhs) \ + do { \ + if (lhs < rhs) { \ + DOMI_LOGE("param[%s] is less than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ } while (0) // Check if the value on the left is less than or equal to the value on the right -#define GE_CHECK_LE(lhs, rhs) \ - do { \ - if (lhs > rhs) { \ - DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ - return ge::PARAM_INVALID; \ - } \ +#define GE_CHECK_LE(lhs, rhs) \ + do { \ + if (lhs > rhs) { \ + DOMI_LOGE("param[%s] is greater than[%s]", #lhs, #rhs); \ + return ge::PARAM_INVALID; \ + } \ } while (0) #define GE_DELETE_NEW_SINGLE(var) \ @@ -345,7 +345,7 @@ std::string ToString(const google::protobuf::RepeatedField &rpd_field) { /// @return Timestamp, in microseconds (US) /// /// -uint64_t GetCurrentTimestamp(); +uint64_t GetCurrentTimestap(); /// /// @ingroup domi_common diff --git a/inc/framework/engine/dnnengine.h b/inc/framework/engine/dnnengine.h index 1bcf5e07..65897ac5 100644 --- a/inc/framework/engine/dnnengine.h +++ b/inc/framework/engine/dnnengine.h @@ -30,7 +30,6 @@ enum PriorityEnum { COST_0 = 0, COST_1, COST_2, - COST_3, COST_9 = 9, COST_10 = 10, }; diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index ba90fd03..6e82bb96 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -38,14 +38,14 @@ class DynamicSingleOp; struct RunModelData { uint32_t index; // Data index uint32_t modelId; - std::vector blobs; // All input/output data buffer - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint64_t request_id = 0; // Request ID - uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 - uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 - uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 - std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty + std::vector blobs; // All input/output data buffer + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint64_t request_id = 0; // Request ID + uint64_t dynamic_batch_size = 0; // Dynamic batch size scene, set dynamic size, not supported by default:0 + uint64_t dynamic_image_height = 0; // Dynamic image size scene, set image height, not supported by default:0 + uint64_t dynamic_image_width = 0; // Dynamic image size scene, set image width, not supported by default:0 + std::vector dynamic_dims; // Dynamic dims scene, set dynamic dims, not supported by default:empty }; class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { @@ -264,10 +264,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { static ge::Status LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream, DynamicSingleOp **single_op); - static ge::Status ExecuteAsync(DynamicSingleOp *executor, - const std::vector &input_desc, - const std::vector &inputs, - std::vector &output_desc, + static ge::Status ExecuteAsync(DynamicSingleOp *executor, const std::vector &input_desc, + const std::vector &inputs, std::vector &output_desc, std::vector &outputs); static ge::Status ReleaseSingleOpResource(void *stream); diff --git a/inc/framework/ge_runtime/davinci_model.h b/inc/framework/ge_runtime/davinci_model.h index 8b6ca978..91e70159 100644 --- a/inc/framework/ge_runtime/davinci_model.h +++ b/inc/framework/ge_runtime/davinci_model.h @@ -27,10 +27,10 @@ namespace ge { namespace model_runner { class DavinciModel { public: - DavinciModel(const std::vector> &task_info_list, /*lint !e151*/ + DavinciModel(const std::vector> &task_info_list, const std::vector> &data_info_list, - const std::vector> &output_info_list, /*lint !e151*/ - const std::vector> &constant_info_list, /*lint !e1049*/ + const std::vector> &output_info_list, + const std::vector> &constant_info_list, const std::vector &variable_info_list, const std::vector &wait_active_stream_list, const std::vector &force_copy_stream_list, uint64_t mem_size = 0, uint64_t weight_size = 0, @@ -68,12 +68,12 @@ class DavinciModel { uint32_t GetBatchNum() const { return batch_num_; } uint32_t GetEventNum() const { return event_num_; } - const std::vector &GetWaitActiveStreams() const { return wait_active_stream_list_; } /*lint !e1413*/ - const std::vector &GetForceCopyStreams() const { return force_copy_stream_list_; } /*lint !e1413*/ + const std::vector &GetWaitActiveStreams() const { return wait_active_stream_list_; } + const std::vector &GetForceCopyStreams() const { return force_copy_stream_list_; } int32_t GetPriority() const { return priority_; } - const std::vector> &GetTaskInfoList() const { return task_info_list_; } /*lint !e151*/ + const std::vector> &GetTaskInfoList() const { return task_info_list_; } const std::vector> &GetDataInfoList() const { return data_info_list_; } const std::vector> &GetOutputInfoList() const { return output_info_list_; } const std::vector> &GetConstantInfoList() const { return output_info_list_; } @@ -81,7 +81,7 @@ class DavinciModel { private: std::vector> task_info_list_; - std::vector> data_info_list_; /*lint !e151*/ + std::vector> data_info_list_; std::vector> output_info_list_; std::vector> constant_info_list_; std::vector variable_info_list_; diff --git a/inc/framework/ge_runtime/model_runner.h b/inc/framework/ge_runtime/model_runner.h index a5256af7..e495dfdf 100644 --- a/inc/framework/ge_runtime/model_runner.h +++ b/inc/framework/ge_runtime/model_runner.h @@ -52,11 +52,8 @@ class ModelRunner { bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data); - bool GetInputOutputDescInfo(uint32_t model_id, - bool zero_copy, - std::vector *input_desc, - std::vector *output_desc, - std::vector *input_format, + bool GetInputOutputDescInfo(uint32_t model_id, bool zero_copy, std::vector *input_desc, + std::vector *output_desc, std::vector *input_format, std::vector *output_format); private: diff --git a/inc/framework/ge_runtime/task_info.h b/inc/framework/ge_runtime/task_info.h index 86119219..e36c4333 100644 --- a/inc/framework/ge_runtime/task_info.h +++ b/inc/framework/ge_runtime/task_info.h @@ -161,12 +161,13 @@ class TbeTaskInfo : public TaskInfo { class AicpuTaskInfo : public TaskInfo { public: AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const string &so_name, const std::string &kernel_name, - const std::string &node_def, const std::vector &input_data_addrs, + const std::string &node_def, const std::string &ext_info, const std::vector &input_data_addrs, const std::vector &output_data_addrs, bool dump_flag) : TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag), so_name_(so_name), kernel_name_(kernel_name), node_def_(node_def), + ext_info_(ext_info), input_data_addrs_(input_data_addrs), output_data_addrs_(output_data_addrs) {} ~AicpuTaskInfo() override {} @@ -176,11 +177,13 @@ class AicpuTaskInfo : public TaskInfo { const std::string &node_def() const { return node_def_; } const std::vector &input_data_addrs() const { return input_data_addrs_; } const std::vector &output_data_addrs() const { return output_data_addrs_; } + const std::string &ext_info() const { return ext_info_; } private: std::string so_name_; std::string kernel_name_; std::string node_def_; + std::string ext_info_; std::vector input_data_addrs_; std::vector output_data_addrs_; }; @@ -293,19 +296,19 @@ class HcclTaskInfo : public TaskInfo { hcom_distribute_task_(hcom_distribute_task) {} ~HcclTaskInfo() override {} - const std::string &hccl_type() const { return hccl_type_; } /*lint !e1413*/ + const std::string &hccl_type() const { return hccl_type_; } void *input_data_addr() const { return input_data_addr_; } void *output_data_addr() const { return output_data_addr_; } void *workspace_addr() const { return workspace_addr_; } int64_t workspace_size() const { return workspace_size_; } int64_t hccl_stream_num() const { return hccl_stream_num_; } - const std::vector &private_def() const { return private_def_; } /*lint !e1413*/ + const std::vector &private_def() const { return private_def_; } void *ops_kernel_store() const { return ops_kernel_store_; } int32_t count() const { return count_; } int64_t root_id() const { return root_id_; } int64_t op_type() const { return op_type_; } int64_t data_type() const { return data_type_; } - const std::string group() const { return group_; } + const std::string &group() const { return group_; } std::function hcom_bind_model() const { return hcom_bind_model_; } std::function hcom_unbind_model() const { return hcom_unbind_model_; } std::function, void *)> hcom_distribute_task() const { diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index c446b983..37bca897 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -86,7 +86,6 @@ class GeGenerator { Status BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline = true); - Status CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs); class Impl; diff --git a/inc/framework/memory/memory_api.h b/inc/framework/memory/memory_api.h index d8b06125..ebb7e68c 100644 --- a/inc/framework/memory/memory_api.h +++ b/inc/framework/memory/memory_api.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,6 @@ #include #include "ge/ge_api_error_codes.h" -#include "graph//types.h" #include "runtime/mem.h" namespace ge { @@ -36,12 +35,6 @@ struct HostVarInfo { uint64_t var_size; }; -struct TensorInfo { - std::string var_name; - std::vector dims; - DataType data_type; -}; - /// /// \param size [in] rdma pool memory size to be allocated. /// \param mem_type [in] memory type for rdma pool. @@ -55,13 +48,6 @@ Status InitRdmaPool(size_t size, rtMemType_t mem_type = RT_MEMORY_HBM); Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t mem_type = RT_MEMORY_HBM); /// -/// \param tensor_info [in] description for tensor stored shared memory. -/// \param dev_addr [out] malloced shared memory addr. -/// \param memory_size [out] malloced shared memory size. -/// \return Status result of function -Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uint64_t &memory_size); - -/// /// \param var_name [in] var_name name of host variable. /// \param base_addr [out] base_addr vase addr of host variable. /// \param var_size [out] var_size memory_size of host variable. diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h index 4552fa7c..bbec014b 100644 --- a/inc/framework/memory/memory_assigner.h +++ b/inc/framework/memory/memory_assigner.h @@ -33,7 +33,7 @@ class MemoryAssigner { MemoryAssigner &operator=(const MemoryAssigner &) = delete; - Status AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size); + Status AssignMemory(bool is_loop_graph, size_t &mem_offset, size_t &zero_copy_mem_size); private: ge::ComputeGraphPtr compute_graph_; diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index e7ca05f7..45a8896d 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ #include #include #include +#include "framework/common/types.h" #include "framework/omg/omg_inner_types.h" #include "framework/omg/parser/parser_inner_ctx.h" #include "proto/ge_ir.pb.h" @@ -91,6 +92,8 @@ void GetGroupName(ge::proto::ModelDef &model); void FindParserSo(const string &path, vector &fileList, string &caffe_parser_path); +Status CheckCustomAiCpuOpLib(); + Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file); Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format); diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 6cc4afd3..e1a7da0b 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -25,6 +25,7 @@ #include #include #include "framework/common/fmk_error_codes.h" +#include "framework/common/types.h" #include "register/register_fmk_types.h" using domi::DOMI_TENSOR_ND; @@ -91,8 +92,6 @@ struct OmgContext { std::map> out_nodes_map; // user-designate out nodes (this is used for determing the orders) std::vector> user_out_nodes; - // default out nodes (this is used for determing the orders) - std::vector> default_out_nodes; // save the output node of the network, value = topName, // topName indicates the output name of the operator. std::vector user_out_nodes_top_vec; @@ -100,8 +99,8 @@ struct OmgContext { std::vector net_out_nodes; // net out nodes top names(only caffe has top) std::vector out_top_names; - // net data nodes top names(only caffe has top) - std::vector data_top_names; + // path for the aicpu custom operator so_file + std::vector aicpu_op_run_paths; // preferential format used by the entire network domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; domi::FrameworkType type = domi::FRAMEWORK_RESERVED; diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 20bfcef4..3a8aa6ce 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -18,7 +18,7 @@ #define INC_FRAMEWORK_OMG_PARSER_MODEL_PARSER_H_ #include -#include "framework/omg/parser/parser_types.h" +#include "framework/common/types.h" #include "framework/omg/omg_inner_types.h" #include "graph/attr_value.h" #include "graph/compute_graph.h" diff --git a/inc/framework/omg/parser/op_parser.h b/inc/framework/omg/parser/op_parser.h index 087bad32..251c0447 100644 --- a/inc/framework/omg/parser/op_parser.h +++ b/inc/framework/omg/parser/op_parser.h @@ -18,7 +18,7 @@ #define INC_FRAMEWORK_OMG_PARSER_OP_PARSER_H_ #include -#include "framework/omg/parser/parser_types.h" +#include "common/types.h" #include "omg/omg_inner_types.h" #include "proto/om.pb.h" #include "graph/ge_tensor.h" diff --git a/inc/framework/omg/parser/parser_factory.h b/inc/framework/omg/parser/parser_factory.h index 4845606f..90d441d7 100644 --- a/inc/framework/omg/parser/parser_factory.h +++ b/inc/framework/omg/parser/parser_factory.h @@ -21,8 +21,8 @@ #include #include #include +#include "framework/common/types.h" #include "framework/omg/omg_inner_types.h" -#include "framework/omg/parser/parser_types.h" using Status = domi::Status; diff --git a/inc/framework/omg/parser/parser_inner_ctx.h b/inc/framework/omg/parser/parser_inner_ctx.h index b92c6155..53f79895 100644 --- a/inc/framework/omg/parser/parser_inner_ctx.h +++ b/inc/framework/omg/parser/parser_inner_ctx.h @@ -29,42 +29,12 @@ namespace ge { struct ParserContext { - // format of the input specified by the command line - std::unordered_map input_nodes_format_map; - // user-designate input dims - std::vector>> user_input_dims; std::unordered_map> input_dims; - // resolve the mapping between operators with the same name and corresponding network. format e.g. - // Detectionoutput:SsdDetectiontOutput - std::map op_conf_map; - // user-designate out nodes (this is used for determing the orders) - std::vector> user_out_nodes; - // default out nodes (this is used for determing the orders) - std::vector> default_out_nodes; - // save the output node of the network. key = operator name, value = index, index indicates the output index of the - // operator - std::map> out_nodes_map; - // save the output node of the network, value = topName, - // topName indicates the output name of the operator. - std::vector user_out_nodes_top_vec; - // net out nodes (where user_out_nodes or leaf nodes) - std::vector net_out_nodes; - // net data nodes top names(only caffe has top) - std::vector data_top_names; - // net out nodes top names(only caffe has top) - std::vector out_top_names; - // Whether to use dynamic batch size or dynamic image size - bool is_dynamic_input = false; - bool train_flag = false; domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; - domi::FrameworkType type = domi::FRAMEWORK_RESERVED; RunMode run_mode = ONLY_PRE_CHECK; - // save caffe custom proto path, used by caffe parse - std::string custom_proto_path; - // save caffe proto path, used by caffe parse - std::string caffe_proto_path; - // name of the pass that needs to take effect - std::string enable_scope_fusion_passes; + std::string custom_proto_path; // save caffe custom proto path, used by caffe parse + std::string caffe_proto_path; // save caffe proto path, used by caffe parse + std::string enable_scope_fusion_passes; // name of the pass that needs to take effect }; ParserContext &GetParserContext(); diff --git a/inc/framework/omg/parser/parser_types.h b/inc/framework/omg/parser/parser_types.h deleted file mode 100644 index 62c9c750..00000000 --- a/inc/framework/omg/parser/parser_types.h +++ /dev/null @@ -1,508 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PARSER_COMMON_TYPES_H_ -#define PARSER_COMMON_TYPES_H_ - -#include -#include - -#include "register/register_types.h" - -#if !defined(__ANDROID__) && !defined(ANDROID) -#ifndef DOMI_DYNAMIC_CAST -#define DOMI_DYNAMIC_CAST static_cast -#endif -#ifndef DOMI_DYNAMIC_POINTER_CAST -#define DOMI_DYNAMIC_POINTER_CAST std::static_pointer_cast -#endif -#else -#ifndef DOMI_DYNAMIC_CAST -#define DOMI_DYNAMIC_CAST static_cast -#endif -#ifndef DOMI_DYNAMIC_POINTER_CAST -#define DOMI_DYNAMIC_POINTER_CAST std::static_pointer_cast -#endif -#endif - -namespace ge { -namespace parser { -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *AIPPDATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CORRELATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CORRELATIONV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DECONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *POOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ELTWISE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RELU6; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SIGMOID; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ABSVAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TANH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PRELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BATCHNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FUSIONBATCHNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SCALE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FULL_CONNECTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PLUS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACTIVATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLATTEN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SUB; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MATMUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RSQRT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BIASADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFORMAT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPCONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUTGENMASK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUTDOMASK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCAT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROIPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PROPOSAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FSRDETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DETECTIONPOSTPROCESS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LRN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSDATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PERMUTE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDNORMALIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPRIORBOX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NETOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDDETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFINEDETDETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CHANNELAXPY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PSROIPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *POWER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *POW; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROIALIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PYTHON; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FREESPACEEXTRACT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPATIALTF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SHAPEN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ARGMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GATHERND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GATHER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REALDIV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PACK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SLICE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SLICED; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLOORDIV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUEEZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UNSQUEEZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STRIDEDSLICE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RANGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RPNPROPOSALS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DECODEBBOX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PADV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MIRRORPAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CLIPBOXES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTRCNNPREDICTIONS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPLIT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPLITV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXPANDDIMS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EMPTY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GREATER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SWITCHN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MERGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SYMBOLICGRADIENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REMOTECALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_IF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSIF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CASE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *_WHILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATELESSWHILE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PARTITIONEDCALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STATEFULPARTITIONEDCALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FAKEPARAM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSPOSE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSPOSED; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REGION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *YOLO; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *YOLODETECTIONOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FILL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REVERSE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UNPACK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *YOLO2REORG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCESUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONSTANT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESIZEBILINEAR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESIZEBILINEARGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MAXIMUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FRAMEWORKOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ARG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FUSEDBATCHNORMGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LSTM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HIGHWAY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RNN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATTENTIONDECODER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOGICAL_NOT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOGICAL_AND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOGICAL_OR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NOTEQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *INTERP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SHUFFLECHANNEL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *AIPP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MULTISHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RECIPROCAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ELU; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOSH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASINH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MINIMUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CLIP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *L2NORMALIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CROPANDRESIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UNUSEDCONST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSETODENSE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NONMAXSUPPRESSION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TOPKV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *INVERTPERMUTATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MULTINOMIAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REVERSESEQUENCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEPROD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEMIN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXTRACTIMAGEPATCHES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQRT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEALL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESIZENEARESTNEIGHBOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPACETOBATCHND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BATCHTOSPACEND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSERT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GREATEREQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLOOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RANDOMUNIFORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BATCHMATMUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPACETODEPTH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHTOSPACE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RINT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATAN2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATANH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASIN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NEG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOG; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROUND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *UPSAMPLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FLOORMOD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LESS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LESSEQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ONEHOT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFSWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFMERGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ENTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFENTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LOOPCOND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NEXTITERATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFNEXTITERATION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXIT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFEXIT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONTROLTRIGGER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ZEROSLIKE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EXP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *WHERE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FAKEQUANTWITHMINMAXVARS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTPLUS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SOFTSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *COSH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SINH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUAREDDIFFERENCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char - *REQUIREDSPACETOBATCHPADDINGS; // for retinanet scope fusion -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETBOXES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINAMULTIANCHORS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETCLIPPEDBOXES; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETFILTEREDDETECTIONS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RETINANETANCHORS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNMAP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNMAP1; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNSECONDSTAGEPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNROIINTERPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNFIRSTSTAGEPOSTPROCESSOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNGRIDANCHORGENERATOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ROIINTERPOOLING; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FASTERRCNNCLIPTOWINDOW; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *EMBEDLOOKUP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HASHLOOKUP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LSH_PROJ; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SVDF; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDANCHORGENERATOR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IDENTITY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *IDENTITYN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PLACEHOLDERWITHDEFAULT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SELECT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GETSPAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STOPGRADIENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PREVENTGRADIENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GUARANTEECONST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BROADCASTGRADIENTARGS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BROADCASTARGS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONFUSIONMATRIX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RANK; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PLACEHOLDER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *END; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BASICLSTMCELL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GETNEXT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *INITDATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REFIDENTITY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BITCAST; - -/***************Ann special operator*************************/ -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_MEAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_CONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DEPCONVOLUTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_FULLCONNECTION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_NETOUTPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DATA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_RESHAPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_ADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_MUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_SUB; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DIV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_DEQUANTIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_QUANTIZE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_PAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANN_RESIZE_BILINEAR; - -/***************************************************/ -/******************Training operator*************************/ -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *GATHERV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONVGRADFILTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONV2D; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONV2DBACKPROPINPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FUSEDBATCHNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BIASADDGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACTIVATIONGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MAXPOOLWITHARGMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MAXPOOLGRADWITHARGMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSESOFTMAXCROSSENTROPYWITHLOGITS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SNAPSHOT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VAR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEANGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSLATE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ADDN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *L2LOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MULTIPLY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HUBERLOSSGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HUBERLOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NEGATIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDCAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSESOFTMAXCROSSENTROPY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SPARSESOFTMAXCROSSENTROPYGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDSQUEEZEFUSION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATFOUR2FIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATFIVE2FOUR; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDREALDIVTILEMUL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SSDSUMMULREALDIVMEAN; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARIABLEV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARHANDLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TEMPORARYVARIABLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DESTROYTEMPORARYVARIABLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARIABLE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNADD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNADDVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNSUB; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASSIGNSUBVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYMOMENTUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RESOURCEAPPLYMOMENTUM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SGD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *NOOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *READVARIABLEOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *PARALLELCONCATSTART; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONSTANTOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISECONV2DBACKPROPFILTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISECONV2DBACKPORPINPUT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISECONV2DFORWARDNATIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DROPOUTGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYRMSPROPMIXEDPRECISION; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYRMSPROP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RELU6GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *AVGPOOLGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCATOFFSET; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LAYERNORMGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LAYERNORM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LARS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DYNAMICSTITCH; - -/***************************************************/ -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQUARE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMBROADCAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMALLGATHER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMALLREDUCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREDUCESCATTER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMSEND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMRECEIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEREAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HCOMREMOTEWRITE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARASSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *VARISINITIALIZEDOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LogTimeStamp; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ISVARIABLEINITIALIZED; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMSWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMSWITCHN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMACTIVE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEMCPYASYNC; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *MEMCPYADDRASYNC; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *STREAMMERGE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ENDGRAPH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SEND; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *RECV; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ENDOFSEQUENCE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELSET; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELGOTO; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELGOTOEX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELSWITCH; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *LABELSWITCHBYINDEX; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATOMICADDRCLEAN; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ABS_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACCUMULATE_N_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOS_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ACOSH_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ANY; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPROXIMATE_EQUAL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASIN_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ASINH_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ATAN_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BROADCAST_TO; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ELU_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ADD_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DATAFORMATDIMMAP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DATAFORMATVECPERMUTE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BESSELI0E; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *BESSELI1E; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADADELTA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAGRADDA; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAM; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADAMAX; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYADDSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYCENTEREDRMSPROP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYFTRL; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYFTRLV2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYGRADIENTDESCENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYPOWERSIGN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYPROXIMALADAGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *APPLYPROXIMALGRADIENTDESCENT; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEQUANTIZE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FOCAL_LOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *FOCAL_LOSS_GRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SMOOTHL1_LOSS; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SMOOTHL1_LOSS_grad; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *REDUCEMEAN; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CONCAT_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *ONEHOT_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SLICE_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TILE_V2; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SUM_V2; -// Common type when the operator has the same name -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DETECTIONOUTPUT; -// Custom operator -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP_NCHW; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP_NHWC; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *CUSTOMOP_NC1HWC0; - -// Depthwise 4d_2_6d,6d_2_4d -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISEWEIGHT4D26D; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *DEPTHWISEWEIGHT6D24D; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SQRTGRAD; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *SIGMOIDGRAD; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *TRANSSHAPE; - -// Horovod operator -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDCALLBACKALLREDUCE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDCALLBACKALLGATHER; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDCALLBACKBROADCAST; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *HVDWAIT; - -/// -/// @brief Magic number of model file -/// -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_MAGIC_NUM; // magic number - -/// -/// @brief Model head length -/// -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FILE_HEAD_LEN; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_VERSION; ///< Model version 1.0/// - -// alpha default value -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float ALPHA_DEFAULT_VALUE; - -// beta default value -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const float BETA_DEFAULT_VALUE; - -/// -/// @ingroup domi_omg -/// @brief INPUT node type -/// -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string INPUT_TYPE; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DUMMY_DATA; - -// dim default size value -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY static const int32_t DIM_DEFAULT_SIZE = 4; - -// for fusion op plugin -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_FUSIONOP_ORIGINAL_TYPE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_INPUT_TENSOR_DESC; -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string ATTR_NAME_OUTPUT_TENSOR_DESC; - -// DATA node type -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE; - -// framework Operator Type -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string FRAMEWORK_OP_TYPE; - -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_NET_OUTPUT; - -#pragma pack() // Cancels single-byte alignment -} // namespace parser -} // namespace ge - -#endif // PARSER_COMMON_TYPES_H_ diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h index 94253bf4..00c220f1 100644 --- a/third_party/fwkacllib/inc/hccl/base.h +++ b/third_party/fwkacllib/inc/hccl/base.h @@ -61,16 +61,6 @@ struct model_feature { float *gradient_time; /**< The BP compution time of each gradient */ }; -/** - * @brief Memory Register Address Struct for Remote Access - */ -struct MemRegisterAddr { - u64 addr; - u64 length; -}; - -const u32 HCCL_MAX_MEM_REGISTER_NUM = 8; // The max number of memory register address. - enum GradSplitForceMode { FORCE_NONE, /**< no force */ FORCE_SIZE, /**< force split gradient by size */ diff --git a/third_party/fwkacllib/inc/hccl/hccl_types.h b/third_party/fwkacllib/inc/hccl/hccl_types.h index 50a64795..276516e7 100644 --- a/third_party/fwkacllib/inc/hccl/hccl_types.h +++ b/third_party/fwkacllib/inc/hccl/hccl_types.h @@ -81,8 +81,6 @@ typedef enum { HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ - HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index 90b96ac7..4399d3a8 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -270,15 +270,6 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen */ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); -/** - * @brief Register memories and init resources for remote access. - * - * @param addrList memory addresses for remote access. - * @param count number of remote memory addresses. - * @return HcclResult - */ -extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); - #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index f5b80269..ce1c9720 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -17,8 +17,8 @@ #ifndef _MMPA_API_H_ #define _MMPA_API_H_ -#define LINUX 0 -#define WIN 1 +#define LINUX 0 +#define WIN 1 #if(OS_TYPE == LINUX) //lint !e553 @@ -75,7 +75,6 @@ #include #include #include -#include #include "securec.h" @@ -104,19 +103,16 @@ #include #include "shlwapi.h" #include +#include "sub_inc/mmpa_typedef_win.h" +#include "sub_inc/mmpa_win.h" #include #include #include #include -#include + #include #include -#include "securec.h" - -#include "sub_inc/mmpa_typedef_win.h" -#include "sub_inc/mmpa_win.h" - #pragma comment(lib, "ws2_32.lib") #pragma comment(lib, "mswsock.lib") #pragma comment(lib, "Kernel32.lib") diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index aced4968..6ac8f8f6 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -30,26 +30,18 @@ typedef pthread_t mmThread; typedef pthread_mutex_t mmMutex_t; typedef pthread_cond_t mmCond; typedef pthread_mutex_t mmMutexFC; -typedef pthread_rwlock_t mmRWLock_t; typedef signed int mmProcess; typedef int mmPollHandle; typedef int mmPipeHandle; -typedef int mmFileHandle; typedef int mmComPletionKey; typedef int mmCompletionHandle; -typedef int mmErrorMsg; -typedef int mmFd_t; typedef VOID *mmExitCode; typedef key_t mmKey_t; typedef int mmMsgid; typedef struct dirent mmDirent; -typedef struct shmid_ds mmshmId_ds; typedef int (*mmFilter)(const mmDirent *entry); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); -typedef size_t mmSize_t; -typedef off_t mmOfft_t; -typedef pid_t mmPid_t; typedef VOID *(*userProcFunc)(VOID *pulArg); @@ -59,16 +51,6 @@ typedef struct { } mmUserBlock_t; typedef struct { - const char *dli_fname; - void *dli_fbase; - const char *dli_sname; - void *dli_saddr; - size_t dli_size; /* ELF only */ - int dli_bind; /* ELF only */ - int dli_type; -} mmDlInfo; - -typedef struct { int wSecond; // Seconds. [0-60] (1 leap second) int wMinute; // Minutes. [0-59] int wHour; // Hours. [0-23] @@ -91,7 +73,6 @@ typedef pthread_key_t mmThreadKey; typedef int mmOverLap; typedef ssize_t mmSsize_t; -typedef size_t mmSize; // size typedef struct { UINT32 createFlag; @@ -220,17 +201,6 @@ typedef struct { #define M_RDWR O_RDWR #define M_CREAT O_CREAT #define M_BINARY O_RDONLY -#define M_TRUNC O_TRUNC -#define M_IRWXU S_IRWXU - -#define M_IN_CREATE IN_CREATE -#define M_IN_CLOSE_WRITE IN_CLOSE_WRITE -#define M_IN_IGNORED IN_IGNORED - -#define M_OUT_CREATE IN_CREATE -#define M_OUT_CLOSE_WRITE IN_CLOSE_WRITE -#define M_OUT_IGNORED IN_IGNORED -#define M_OUT_ISDIR IN_ISDIR #define M_IREAD S_IREAD #define M_IRUSR S_IRUSR @@ -266,20 +236,13 @@ typedef struct { #define MMPA_OPTIONAL_ARGUMENT 2 #define MMPA_MAX_PATH PATH_MAX -#define M_NAME_MAX MAX_FNAME #define M_F_OK F_OK #define M_R_OK R_OK #define M_W_OK W_OK -#define MMPA_STDIN STDIN_FILENO -#define MMPA_STDOUT STDOUT_FILENO -#define MMPA_STDERR STDERR_FILENO - #define MMPA_RTLD_NOW RTLD_NOW #define MMPA_RTLD_GLOBAL RTLD_GLOBAL -#define MMPA_RTLD_LAZY RTLD_LAZY -#define MMPA_RTLD_NODELETE RTLD_NODELETE #define MMPA_DL_EXT_NAME ".so" @@ -287,7 +250,6 @@ extern INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock); extern INT32 mmJoinTask(mmThread *threadHandle); extern INT32 mmMutexInit(mmMutex_t *mutex); extern INT32 mmMutexLock(mmMutex_t *mutex); -extern INT32 mmMutexTryLock(mmMutex_t *mutex); extern INT32 mmMutexUnLock(mmMutex_t *mutex); extern INT32 mmMutexDestroy(mmMutex_t *mutex); extern INT32 mmCondInit(mmCond *cond); @@ -295,14 +257,6 @@ extern INT32 mmCondLockInit(mmMutexFC *mutex); extern INT32 mmCondLock(mmMutexFC *mutex); extern INT32 mmCondUnLock(mmMutexFC *mutex); extern INT32 mmCondLockDestroy(mmMutexFC *mutex); -extern INT32 mmRWLockInit(mmRWLock_t *rwLock); -extern INT32 mmRWLockRDLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockWRLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock); -extern INT32 mmRDLockUnLock(mmRWLock_t *rwLock); -extern INT32 mmWRLockUnLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockDestroy(mmRWLock_t *rwLock); extern INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex); extern INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond); extern INT32 mmCondNotify(mmCond *cond); @@ -312,7 +266,6 @@ extern INT32 mmGetPid(); extern INT32 mmGetTid(); extern INT32 mmGetPidHandle(mmProcess *processHandle); extern INT32 mmGetLocalTime(mmSystemTime_t *sysTime); -extern INT32 mmGetSystemTime(mmSystemTime_t *sysTime); extern INT32 mmSemInit(mmSem_t *sem, UINT32 value); extern INT32 mmSemWait(mmSem_t *sem); @@ -320,9 +273,7 @@ extern INT32 mmSemPost(mmSem_t *sem); extern INT32 mmSemDestroy(mmSem_t *sem); extern INT32 mmOpen(const CHAR *pathName, INT32 flags); extern INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode); -extern FILE *mmPopen(CHAR *command, CHAR *type); extern INT32 mmClose(INT32 fd); -extern INT32 mmPclose(FILE *stream); extern mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen); extern mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen); extern mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol); @@ -333,22 +284,9 @@ extern INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLe extern INT32 mmCloseSocket(mmSockHandle sockFd); extern mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); extern mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); -extern INT32 mmSocketSendTo(mmSockHandle sockFd, - VOID *sendMsg, - INT32 sendLen, - UINT32 sendFlag, - const mmSockAddr* addr, - INT32 tolen); -extern mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, - VOID *recvBuf, - mmSize recvLen, - UINT32 recvFlag, - mmSockAddr* addr, - mmSocklen_t *FromLen); extern INT32 mmSAStartup(); extern INT32 mmSACleanup(); extern VOID *mmDlopen(const CHAR *fileName, INT32 mode); -extern INT32 mmDladdr(VOID *addr, mmDlInfo *info); extern VOID *mmDlsym(VOID *handle, CHAR *funcName); extern INT32 mmDlclose(VOID *handle); extern CHAR *mmDlerror(); @@ -356,7 +294,6 @@ extern INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock extern INT32 mmDeleteTimer(mmTimer timerHandle); extern INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); extern INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); -extern INT32 mmFStatGet(INT32 fd, mmStat_t *buffer); extern INT32 mmMkdir(const CHAR *pathName, mmMode_t mode); extern INT32 mmSleep(UINT32 milliSecond); @@ -400,7 +337,6 @@ extern VOID mmCloseCompletionPort(mmCompletionHandle handle); extern INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, pmmPollData polledData, mmPollBack pollBack); extern INT32 mmGetErrorCode(); -extern CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); extern INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); extern mmTimespec mmGetTickCount(); extern INT32 mmGetRealPath(CHAR *path, CHAR *realPath); @@ -446,7 +382,6 @@ extern INT32 mmTlsDelete(mmThreadKey key); extern INT32 mmGetOsType(); extern INT32 mmFsync(mmProcess fd); -extern INT32 mmFsync2(INT32 fd); extern INT32 mmChdir(const CHAR *path); extern INT32 mmUmask(INT32 pmode); extern INT32 mmThreadKill(mmThread id); @@ -504,10 +439,6 @@ extern INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const c extern INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, const mmThreadAttr *threadAttr); -extern mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); -extern INT32 mmShmUnlink(const CHAR *name); -extern VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); -extern INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); #define MMPA_DLL_API #ifdef __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h index 9df5b9ce..fc862a72 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h @@ -79,8 +79,6 @@ typedef long LONG; #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN -#define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER - #define MMPA_MAX_NI 19 #define MMPA_MIN_NI (-20) @@ -88,7 +86,6 @@ typedef long LONG; #define EN_ERR 1 #define EN_ERROR (-1) #define EN_INVALID_PARAM (-2) -#define EN_TIMEOUT (-3) #ifdef __cplusplus #if __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h index 8200bea6..fc1b4858 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h @@ -1,4 +1,4 @@ -/** +/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -35,7 +35,6 @@ extern "C" { #define EN_ERR 1 #define EN_ERROR (-1) #define EN_INVALID_PARAM (-2) -#define EN_TIMEOUT (-3) #define HANDLE_INVALID_VALUE (-1) #define INVALID_SOCKET_HANDLE INVALID_SOCKET @@ -61,7 +60,6 @@ extern "C" { #define MMPA_MIDDLE_NI 5 #define MMPA_LOW_NI (-5) #define MMPA_MIN_NI (-20) -#define MMPA_MAX_FILE 128 #define MMPA_MAX_THREAD_PIO 99 #define MMPA_MIDDLE_THREAD_PIO 66 @@ -73,8 +71,6 @@ extern "C" { #define MMPA_THREAD_SCHED_OTHER 0 #define MMPA_THREAD_MIN_STACK_SIZE 0 -#define MM_MUTEX_INITIALIZER NULL - #ifdef __cplusplus #if __cplusplus } diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index 6adc1426..68a70c27 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -1,4 +1,4 @@ -/** +/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -43,9 +43,8 @@ typedef HANDLE mmThread; typedef HANDLE mmProcess; typedef HANDLE mmPollHandle; typedef HANDLE mmPipeHandle; -typedef HANDLE mmFileHandle; typedef HANDLE mmCompletionHandle; -typedef HANDLE mmFd_t; + typedef CRITICAL_SECTION mmMutexFC; typedef CONDITION_VARIABLE mmCond; @@ -60,22 +59,15 @@ typedef SYSTEMTIME mmSystemTime_t; typedef HANDLE mmSem_t; typedef SOCKET mmSockHandle; -typedef SRWLOCK mmRWLock_t; typedef struct sockaddr mmSockAddr; typedef int mmSocklen_t; typedef int mmSemTimeout_t; typedef long mmAtomicType; typedef DWORD mmExitCode; -typedef DWORD mmErrorMsg; typedef int mmKey_t; typedef HANDLE mmMsgid; -typedef long int mmOfft_t; -typedef int mmPid_t; typedef INT32 mmSsize_t; -typedef int mmSize; // size -typedef size_t mmSize_t; -typedef VOID mmshmId_ds; typedef enum { DT_DIR = FILE_ATTRIBUTE_DIRECTORY, @@ -190,16 +182,6 @@ typedef struct { } mmDiskSize; typedef struct { - const char *dli_fname; - void *dli_fbase; - const char *dli_sname; - void *dli_saddr; - size_t dli_size; /* ELF only */ - int dli_bind; /* ELF only */ - int dli_type; -} mmDlInfo; - -typedef struct { char addr[MMPA_MACINFO_DEFAULT_SIZE]; // ex:aa-bb-cc-dd-ee-ff\0 } mmMacInfo; @@ -241,10 +223,8 @@ typedef VOID (*mmPf)(VOID); #define M_RDONLY _O_RDONLY #define M_WRONLY _O_WRONLY #define M_RDWR _O_RDWR -#define M_IRWXU _O_RDWR #define M_CREAT _O_CREAT #define M_BINARY _O_BINARY -#define M_TRUNC _O_TRUNC #define M_IREAD _S_IREAD #define M_IRUSR _S_IREAD @@ -252,15 +232,6 @@ typedef VOID (*mmPf)(VOID); #define M_IWUSR _S_IWRITE #define M_IXUSR 0 -#define M_IN_CREATE FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME -#define M_IN_CLOSE_WRITE FILE_NOTIFY_CHANGE_LAST_WRITE -#define M_IN_IGNORED FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME - -#define M_OUT_CREATE 0x00000100 -#define M_OUT_CLOSE_WRITE 0x00000008 -#define M_OUT_IGNORED 0x00008000 -#define M_OUT_ISDIR 0x40000000 - #define M_MSG_CREAT 1 #define M_MSG_EXCL 2 #define M_MSG_NOWAIT 3 @@ -280,16 +251,6 @@ typedef VOID (*mmPf)(VOID); #define M_UMASK_GRPEXEC 0 #define M_UMASK_OTHEXEC 0 -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 -#define DT_WHT 14 - #define mmConstructor(x) __declspec(allocate(".CRT$XCU")) mmPf con = x #define mmDestructor(x) __declspec(allocate(".CRT$XPU")) mmPf de = x @@ -308,20 +269,13 @@ typedef VOID (*mmPf)(VOID); #define MMPA_EMSG "" #define MMPA_MAX_PATH MAX_PATH -#define M_NAME_MAX _MAX_FNAME #define M_F_OK 0 #define M_W_OK 2 #define M_R_OK 4 -#define MMPA_STDIN stdin -#define MMPA_STDOUT stdout -#define MMPA_STDERR stderr - #define MMPA_RTLD_NOW 0 #define MMPA_RTLD_GLOBAL 0 -#define MMPA_RTLD_LAZY 0 -#define MMPA_RTLD_NODELETE 0 #define MMPA_DL_EXT_NAME ".dll" @@ -331,7 +285,6 @@ _declspec(dllexport) INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *f _declspec(dllexport) INT32 mmJoinTask(mmThread *threadHandle); _declspec(dllexport) INT32 mmMutexInit(mmMutex_t *mutex); _declspec(dllexport) INT32 mmMutexLock(mmMutex_t *mutex); -_declspec(dllexport) INT32 mmMutexTryLock(mmMutex_t *mutex); _declspec(dllexport) INT32 mmMutexUnLock(mmMutex_t *mutex); _declspec(dllexport) INT32 mmMutexDestroy(mmMutex_t *mutex); _declspec(dllexport) INT32 mmCondInit(mmCond *cond); @@ -339,14 +292,6 @@ _declspec(dllexport) INT32 mmCondLockInit(mmMutexFC *mutex); _declspec(dllexport) INT32 mmCondLock(mmMutexFC *mutex); _declspec(dllexport) INT32 mmCondUnLock(mmMutexFC *mutex); _declspec(dllexport) INT32 mmCondLockDestroy(mmMutexFC *mutex); -_declspec(dllexport) INT32 mmRWLockInit(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockRDLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockWRLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRDLockUnLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmWRLockUnLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockDestroy(mmRWLock_t *rwLock); _declspec(dllexport) INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex); _declspec(dllexport) INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond); @@ -357,16 +302,13 @@ _declspec(dllexport) INT32 mmGetPid(VOID); _declspec(dllexport) INT32 mmGetTid(VOID); _declspec(dllexport) INT32 mmGetPidHandle(mmProcess *processHandle); _declspec(dllexport) INT32 mmGetLocalTime(mmSystemTime_t *sysTime); -_declspec(dllexport) INT32 mmGetSystemTime(mmSystemTime_t *sysTime); _declspec(dllexport) INT32 mmSemInit(mmSem_t *sem, UINT32 value); _declspec(dllexport) INT32 mmSemWait(mmSem_t *sem); _declspec(dllexport) INT32 mmSemPost(mmSem_t *sem); _declspec(dllexport) INT32 mmSemDestroy(mmSem_t *sem); _declspec(dllexport) INT32 mmOpen(const CHAR *pathName, INT32 flags); _declspec(dllexport) INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode); -_declspec(dllexport) FILE *mmPopen(CHAR *command, CHAR *type); _declspec(dllexport) INT32 mmClose(INT32 fd); -_declspec(dllexport) INT32 mmPclose(FILE *stream); _declspec(dllexport) mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen); _declspec(dllexport) mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen); _declspec(dllexport) mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol); @@ -377,22 +319,9 @@ _declspec(dllexport) INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSo _declspec(dllexport) INT32 mmCloseSocket(mmSockHandle sockFd); _declspec(dllexport) mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); _declspec(dllexport) mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); -_declspec(dllexport) INT32 mmSocketSendTo(mmSockHandle sockFd, - VOID *sendMsg, - INT32 sendLen, - UINT32 sendFlag, - const mmSockAddr* addr, - INT32 tolen); -_declspec(dllexport) mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, - VOID *recvBuf, - mmSize recvLen, - UINT32 recvFlag, - mmSockAddr* addr, - mmSocklen_t *FromLen); _declspec(dllexport) INT32 mmSAStartup(VOID); _declspec(dllexport) INT32 mmSACleanup(VOID); _declspec(dllexport) VOID *mmDlopen(const CHAR *fileName, INT mode); -_declspec(dllexport) INT32 mmDladdr(VOID *addr, mmDlInfo *info); _declspec(dllexport) VOID *mmDlsym(VOID *handle, CHAR *fileName); _declspec(dllexport) INT32 mmDlclose(VOID *handle); _declspec(dllexport) CHAR *mmDlerror(VOID); @@ -401,7 +330,6 @@ _declspec(dllexport) INT32 _declspec(dllexport) INT32 mmDeleteTimer(mmTimer timerHandle); _declspec(dllexport) INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); _declspec(dllexport) INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); -_declspec(dllexport) INT32 mmFStatGet(INT32 fd, mmStat_t *buffer); _declspec(dllexport) INT32 mmMkdir(const CHAR *pathName, mmMode_t mode); _declspec(dllexport) INT32 mmSleep(UINT32 milliSecond); _declspec(dllexport) INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock); @@ -443,7 +371,6 @@ _declspec(dllexport) INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, m pmmPollData polledData, mmPollBack pollBack); _declspec(dllexport) INT32 mmGetErrorCode(); -_declspec(dllexport) CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); _declspec(dllexport) INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); _declspec(dllexport) mmTimespec mmGetTickCount(); _declspec(dllexport) INT32 mmGetRealPath(CHAR *path, CHAR *realPath); @@ -480,7 +407,7 @@ _declspec(dllexport) INT32 mmTlsDelete(mmThreadKey key); _declspec(dllexport) INT32 mmGetOsType(); _declspec(dllexport) INT32 mmFsync(mmProcess fd); -_declspec(dllexport) INT32 mmFsync2(INT32 fd); + _declspec(dllexport) INT32 mmChdir(const CHAR *path); _declspec(dllexport) INT32 mmUmask(INT32 pmode); _declspec(dllexport) INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options); @@ -528,10 +455,7 @@ _declspec(dllexport) INT32 _declspec(dllexport) INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, const mmThreadAttr *threadAttr); -_declspec(dllexport) mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); -_declspec(dllexport) INT32 mmShmUnlink(const CHAR *name); -_declspec(dllexport) VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); -_declspec(dllexport) INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); + #ifdef __cplusplus #if __cplusplus } diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index dbd80a09..dd01ac5f 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file aipp.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_ +#ifndef GE_OP_AIPP_H +#define GE_OP_AIPP_H #include "graph/operator_reg.h" @@ -78,4 +78,4 @@ REG_OP(AippData) .OP_END_FACTORY_REG(AippData) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_AIPP_H_ +#endif // GE_OP_AIPP_H diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h index 614b06e2..84ff3d08 100644 --- a/third_party/fwkacllib/inc/ops/all_ops.h +++ b/third_party/fwkacllib/inc/ops/all_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file all_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ +#ifndef BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ +#define BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ #include "aipp.h" #include "array_ops.h" @@ -76,4 +76,4 @@ #include "transformation_ops.h" #include "condtake_ops.h" #include "warp_perspective_ops.h" -#endif // OPS_BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ +#endif // BUILT_IN_OP_PROTO_INC_ALL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 691b51f6..1af02b05 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file array_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ +#ifndef GE_OP_ARRAY_OPS_H_ +#define GE_OP_ARRAY_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -659,7 +659,8 @@ REG_OP(IdentityN) .OP_END_FACTORY_REG(IdentityN) /** -*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. \n +*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without + changing the data. \n *@par Inputs: *@li x: A tensor. @@ -737,7 +738,8 @@ REG_OP(Reshape) *x: A tensor. \n *@par Attributes: -*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1. If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n +*axis: An optional list of int32 or int64. If not specified, squeezes all dimensions of size 1. +If specified, only squeezes the dimensions listed. It is an error to squeeze a dimension that is not 1. \n *@par Outputs: *y: A tensor. \n @@ -752,7 +754,8 @@ REG_OP(Squeeze) .OP_END_FACTORY_REG(Squeeze) /** -*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n +*@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of +indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n *@par Inputs: *x: A tensor. \n @@ -886,29 +889,14 @@ REG_OP(ReadVariableOp) .ATTR(dtype, Int, DT_INT32) .OP_END_FACTORY_REG(ReadVariableOp) -/** -*@brief Mark outputs of one sub graph which partitioned by engine type. - -*@par Inputs: -*x: A tensor. \n - -*@par Outputs: -*y: A tensor. \n - -*@par Attributes: -*@li peerIndex: The index of the corresponding 'placeholder' node it's connected to. -*@li parentOpType: Op type of original node. - -*@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. -*/ REG_OP(End) .INPUT(x, TensorType::ALL()) .OUTPUT(y, TensorType::ALL()) - .ATTR(peerIndex, Int, 0) - .ATTR(parentOpType, String, "") + .ATTR(peerIndex, Int, 0) // the index of the corresponding 'placeholder' node it's connected to + .ATTR(parentOpType, String, "") // op type of original node .OP_END_FACTORY_REG(End) + /** *@brief Operations for writing summary data, for use in analysis and visualization. @@ -976,7 +964,8 @@ REG_OP(ShapeN) *@par Attributes: *@li dtype: Optional. The data type of the output tensor. Defaults to "int32". -*@li init: An optional bool. If true, initializes the returned tensor with the default value of "dtype". Defaults to "false". \n +*@li init: An optional bool. If true, initializes the returned tensor with the default value of "dtype". +Defaults to "false". \n *@par Outputs: *y: A tensor. \n @@ -1155,4 +1144,4 @@ REG_OP(EditDistance) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ +#endif // GE_OP_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h index f05135d1..149c57d5 100644 --- a/third_party/fwkacllib/inc/ops/audio_ops.h +++ b/third_party/fwkacllib/inc/ops/audio_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file audio_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ +#ifndef GE_OP_AUDIO_OPS_H_ +#define GE_OP_AUDIO_OPS_H_ #include "graph/operator_reg.h" @@ -159,4 +159,4 @@ REG_OP(EncodeWav) .OP_END_FACTORY_REG(EncodeWav) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_AUDIO_OPS_H_ +#endif // GE_OP_AUDIO_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index a4786cd3..0e1562c0 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file batch_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_ +#ifndef GE_OP_BATCH_OPS_H_ +#define GE_OP_BATCH_OPS_H_ #include "graph/operator_reg.h" @@ -158,4 +158,4 @@ REG_OP(UnbatchGrad) .OP_END_FACTORY_REG(UnbatchGrad) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_BATCH_OPS_H_ +#endif // GE_OP_BATCH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h index 39a28cf3..5b35a38a 100644 --- a/third_party/fwkacllib/inc/ops/bitwise_ops.h +++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file bitwise_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_ +#ifndef GE_OP_BITWISE_OPS_H_ +#define GE_OP_BITWISE_OPS_H_ #include "graph/operator_reg.h" @@ -56,4 +56,4 @@ REG_OP(RightShift) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_BITWISE_OPS_H_ +#endif // GE_OP_BITWISE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h index 08e54824..f1b4e7a9 100644 --- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h +++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file boosted_trees_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_ +#ifndef GE_OP_BOOSTED_TREES_OPS_H_ +#define GE_OP_BOOSTED_TREES_OPS_H_ #include "graph/operator_reg.h" @@ -61,4 +61,4 @@ REG_OP(BoostedTreesBucketize) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_BOOSTED_TREES_OPS_H_ +#endif // GE_OP_BOOSTED_TREES_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h index 890c52ae..9b9ce314 100644 --- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h +++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file candidate_sampling_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ +#ifndef GE_OP_CANDIDATE_SAMPLING_OPS_H_ +#define GE_OP_CANDIDATE_SAMPLING_OPS_H_ #include "graph/operator_reg.h" @@ -412,4 +412,4 @@ REG_OP(ComputeAccidentalHits) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_CANDIDATE_SAMPLING_OPS_H_ +#endif // GE_OP_CANDIDATE_SAMPLING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/condtake_ops.h b/third_party/fwkacllib/inc/ops/condtake_ops.h index 029cffbf..554c18f1 100644 --- a/third_party/fwkacllib/inc/ops/condtake_ops.h +++ b/third_party/fwkacllib/inc/ops/condtake_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file condtake_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_ +#ifndef GE_OP_CONDTAKE_OPS_H_ +#define GE_OP_CONDTAKE_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -56,4 +56,4 @@ REG_OP(CondTake) .OP_END_FACTORY_REG(CondTake) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_CONDTAKE_OPS_H_ +#endif // GE_OP_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index c0b6ad72..e2fd4715 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file control_flow_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_ +#ifndef GE_CONTROL_FLOW_OPS_H_ +#define GE_CONTROL_FLOW_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -404,4 +404,4 @@ REG_OP(MapIndex) .OP_END_FACTORY_REG(MapIndex) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_CONTROL_FLOW_OPS_H_ +#endif // GE_CONTROL_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index c6a265cc..383568dc 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file ctc_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ +#ifndef GE_OP_CTC_OPS_H +#define GE_OP_CTC_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -139,4 +139,4 @@ REG_OP(CTCBeamSearchDecoder) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_CTC_OPS_H_ \ No newline at end of file +#endif //GE_OP_CTC_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index 02d2bfdd..3bfcfe01 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file data_flow_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ +#ifndef GE_OP_DATA_FLOW_OPS_H_ +#define GE_OP_DATA_FLOW_OPS_H_ #include #include "graph/operator_reg.h" @@ -2242,4 +2242,4 @@ REG_OP(OutfeedEnqueueOp) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_ +#endif // GE_OP_DATA_FLOW_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 6c7904a6..6d865399 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file elewise_calculation_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ +#ifndef GE_OP_ELEWISE_CALCULATION_OPS_H +#define GE_OP_ELEWISE_CALCULATION_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -3314,4 +3314,4 @@ REG_OP(TensorRedirect) -#endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ +#endif // GE_OP_ELEWISE_CALCULATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/functional_ops.h b/third_party/fwkacllib/inc/ops/functional_ops.h index 07cf57a0..bf5ebd51 100644 --- a/third_party/fwkacllib/inc/ops/functional_ops.h +++ b/third_party/fwkacllib/inc/ops/functional_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file functional_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ +#ifndef GE_FUNCTIONAL_OPS_H_ +#define GE_FUNCTIONAL_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -330,4 +330,4 @@ REG_OP(StatefulPartitionedCall) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_FUNCTIONAL_OPS_H_ +#endif // GE_FUNCTIONAL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/get_data_ops.h b/third_party/fwkacllib/inc/ops/get_data_ops.h index e5518ef8..33a64903 100644 --- a/third_party/fwkacllib/inc/ops/get_data_ops.h +++ b/third_party/fwkacllib/inc/ops/get_data_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file get_data_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_ +#ifndef GE_OP_GET_DATA_OPS_H_ +#define GE_OP_GET_DATA_OPS_H_ #include "graph/operator_reg.h" @@ -100,4 +100,4 @@ REG_OP(DeviceQueueDataset) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_GET_DATA_OPS_H_ +#endif // GE_OP_GET_DATA_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index a8fc1106..7e985efc 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file hcom_ops.h * \brief huawei collective communication library ops. */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ +#ifndef GE_OP_HCOM_OPS_H_ +#define GE_OP_HCOM_OPS_H_ #include "graph/operator_reg.h" @@ -41,8 +41,8 @@ namespace ge { as the name of a world group. */ REG_OP(HcomAllGather) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) .REQUIRED_ATTR(rank_size, Int) .REQUIRED_ATTR(group, String) .ATTR(alpha, Float, 1.0) @@ -99,8 +99,8 @@ REG_OP(HcomAllReduce) as the name of a world group. */ REG_OP(HcomBroadcast) - .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) - .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) .REQUIRED_ATTR(root_rank, Int) .REQUIRED_ATTR(group, String) .ATTR(alpha, Float, 1.0) @@ -157,7 +157,7 @@ REG_OP(HcomReduceScatter) * @see HcomReceive */ REG_OP(HcomSend) - .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(dest_rank, Int) @@ -190,7 +190,7 @@ REG_OP(HcomSend) * @see HcomSend */ REG_OP(HcomReceive) - .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16})) .REQUIRED_ATTR(group, String) .REQUIRED_ATTR(sr_tag, Int) .REQUIRED_ATTR(src_rank, Int) @@ -200,30 +200,5 @@ REG_OP(HcomReceive) .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomReceive) -/** - * @brief Performs Remote Read of input tensors - * @par Inputs: - * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length - * @par Outputs: - * local: A Tensor. whose value is length / size_of(Type) - */ -REG_OP(HcomRemoteRead) - .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) - .OUTPUT(local, TensorType::ALL()) - .REQUIRED_ATTR(dtype, Type) - .OP_END_FACTORY_REG(HcomRemoteRead) - -/** - * @brief Performs Remote Write of input tensors - * @par Inputs: - * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length - * @par Inputs: - * local: A Tensor. whose value is length / size_of(Type) - */ -REG_OP(HcomRemoteWrite) - .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) - .INPUT(local, TensorType::ALL()) - .OP_END_FACTORY_REG(HcomRemoteWrite) - } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ +#endif // GE_OP_HCOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/hvd_ops.h b/third_party/fwkacllib/inc/ops/hvd_ops.h index 00299ef7..bde8486c 100644 --- a/third_party/fwkacllib/inc/ops/hvd_ops.h +++ b/third_party/fwkacllib/inc/ops/hvd_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file hvd_ops.h * \brief Horovod collective communication library ops. */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_ +#ifndef GE_OP_HVD_OPS_H_ +#define GE_OP_HVD_OPS_H_ #include "graph/operator_reg.h" @@ -78,4 +78,4 @@ REG_OP(HorovodBroadcast) .OP_END_FACTORY_REG(HorovodBroadcast) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_HVD_OPS_H_ +#endif // GE_OP_HVD_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 5f7aee41..302823a2 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file image_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ +#ifndef GE_OP_MAGE_OPS_H_ +#define GE_OP_MAGE_OPS_H_ #include "graph/operator_reg.h" @@ -1345,4 +1345,4 @@ REG_OP(SpatialTransformerD) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ +#endif // GE_OP_MAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/internal_ops.h b/third_party/fwkacllib/inc/ops/internal_ops.h index bcc3f1c3..7e9fd4a4 100644 --- a/third_party/fwkacllib/inc/ops/internal_ops.h +++ b/third_party/fwkacllib/inc/ops/internal_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file internal_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_ +#ifndef GE_OP_INTERNAL_OPS_H_ +#define GE_OP_INTERNAL_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -81,4 +81,4 @@ REG_OP(InternalDataMove) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_INTERNAL_OPS_H_ +#endif // GE_OP_INTERNAL_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index d8f45c5d..5d98f999 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file linalg_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ +#ifndef GE_OP_LINALG_OPS_H_ +#define GE_OP_LINALG_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -432,4 +432,4 @@ REG_OP(TridiagonalSolve) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_LINALG_OPS_H_ +#endif // GE_OP_LINALG_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index 03be7757..db9097ce 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file logging_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_ +#ifndef GE_OP_LOGGING_OPS_H +#define GE_OP_LOGGING_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -113,4 +113,4 @@ REG_OP(PrintV2) .OP_END_FACTORY_REG(PrintV2) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_LOGGING_OPS_H_ +#endif // GE_OP_LOGGING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index 5d928e5a..84b138c4 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file lookup_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ +#ifndef GE_OP_LOOKUP_OPS_H_ +#define GE_OP_LOOKUP_OPS_H_ #include "graph/operator_reg.h" @@ -305,4 +305,4 @@ REG_OP(MutableHashTable) .OP_END_FACTORY_REG(MutableHashTable) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_LOOKUP_OPS_H_ +#endif // GE_OP_LOOKUP_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 330d85e7..3d7ff1d9 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file math_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ +#ifndef GE_OP_MATH_OPS_H_ +#define GE_OP_MATH_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -512,23 +512,6 @@ REG_OP(IsFinite) .OP_END_FACTORY_REG(IsFinite) /** - * *@brief Compute element-wise infiniteness, return a boolean tensor. - * - * *@par Inputs: - * *x:A Tensor. - * - * *@par Outputs: - * *y:A Tensor. Has the same shape as x. - * - * *@par Third-party framework compatibility. - * *Compatible with tensorflow IsInf operator. - * */ -REG_OP(IsInf) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_BOOL})) - .OP_END_FACTORY_REG(IsInf) - -/** * *@brief Computes the complex absolute value of a tensor. * * *@par Inputs: @@ -694,4 +677,4 @@ REG_OP(IFMR) .OP_END_FACTORY_REG(IFMR) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ +#endif // GE_OP_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index daf0939c..bceff0cd 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file matrix_calculation_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ +#ifndef GE_OP_MATRIX_CALCULATION_OPS_H +#define GE_OP_MATRIX_CALCULATION_OPS_H #include "graph/operator_reg.h" @@ -95,10 +95,6 @@ REG_OP(MatMulV2) /** *@brief Performs Matrix-to-matrix Multiply, producing c=alpha[0]*a*b+beta[0]*c . \n -*@attention Constraints: -* For better performance, The k-axis must be aligned to 16 (input type -* is float16) or 32 (input type is int8). \n - *@par Inputs: *Five inputs, including: *@li a: A matrix Tensor. Must be one of the following types: float16, int8. @@ -402,8 +398,8 @@ REG_OP(TensorScatterUpdate) *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Attributes: -* use_locking: An optional bool. Defaults to "False". If "True", the operation -* will be protected by a lock . \n +*use_locking: An optional bool. Defaults to "False". If "True", the operation + * will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -434,7 +430,7 @@ REG_OP(ScatterAdd) *@par Attributes: *@li use_locking: An optional bool. Defaults to "False". If "True", -* the operation will be protected by a lock . \n + * the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -463,7 +459,7 @@ REG_OP(ScatterDiv) *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", -* the operation will be protected by a lock . \n + * the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -492,7 +488,7 @@ REG_OP(ScatterNdAdd) *Must be one of the following types: int32 *@li updates: An ND Tensor. \n -* Must be one of the following types: float16, float32, int32, int8, uint8 +*Must be one of the following types: float16, float32, int32, int8, uint8 *@par Outputs: *y: A Tensor. Has the same type and format as input "x" . \n @@ -521,10 +517,10 @@ REG_OP(TensorScatterAdd) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", -* the operation will be protected by a lock . \n + * the operation will be protected by a lock . \n *@par Outputs: -* var: A Tensor. Has the same type and format as input "var" . \n +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNdSub. @@ -553,7 +549,7 @@ REG_OP(ScatterNdSub) *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Outputs: -* y: A Tensor. Has the same type and format as input "x" . \n +*y: A Tensor. Has the same type and format as input "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator TensorScatterSub. @@ -578,10 +574,10 @@ REG_OP(TensorScatterSub) *Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", -* the operation will be protected by a lock . \n + * the operation will be protected by a lock . \n *@par Outputs: -* var: A Tensor. Has the same type and format as input "var" . \n +*var: A Tensor. Has the same type and format as input "var" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterSub. @@ -651,7 +647,7 @@ REG_OP(DiagPart) *@li num_output: Reserved. *@li transpose: A bool, specifying weight whether to transpose, either "true" or "false". Defaults to "false". *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. -* The product of the subsequent dimensions starting form first dimension or the second dimension is "K". + * The product of the subsequent dimensions starting form first dimension or the second dimension is "K". *@li offset_x: Reserved . \n *@par Outputs: @@ -768,7 +764,7 @@ REG_OP(ConfusionMatrix) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation -* will be protected by a lock . \n + * will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -801,7 +797,7 @@ REG_OP(ScatterMul) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", the operation -* will be protected by a lock . \n + * will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -834,7 +830,7 @@ REG_OP(ScatterMin) *@par Attributes: *use_locking: An optional bool. Defaults to "False". -* If "True", the operation will be protected by a lock . \n + * If "True", the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -867,7 +863,7 @@ REG_OP(ScatterMax) *@par Attributes: *use_locking: An optional bool. Defaults to "False". If "True", -* the operation will be protected by a lock . \n + * the operation will be protected by a lock . \n *@par Outputs: *var: A Tensor. Has the same type and format as input "var" . \n @@ -981,4 +977,4 @@ REG_OP(MatrixDiagV2) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ +#endif // GE_OP_MATRIX_CALCULATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 848e9f86..073d541d 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file nn_batch_norm_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_ +#ifndef GE_OP_NN_BATCH_NORM_OPS_H +#define GE_OP_NN_BATCH_NORM_OPS_H #include "graph/operator_reg.h" @@ -432,4 +432,4 @@ REG_OP(BNInferenceD) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_BATCH_NORM_OPS_H_ +#endif // GE_OP_NN_BATCH_NORM_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 08253522..6307889d 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file nn_calculation_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ +#ifndef GE_OP_NN_CALCULATION_OPS_H +#define GE_OP_NN_CALCULATION_OPS_H #include "graph/operator_reg.h" @@ -636,7 +636,7 @@ REG_OP(Conv2DBackpropFilterD) *@verbatim |Name | Field | Scope ------------------|----------|---------- - |Input Image Size | H | [1, 100000] + |Input Image Size | H | [1, 4096] | | W | [1, 4096] ------------------|----------|---------- |Filter Size | H | [1, 255] @@ -722,14 +722,14 @@ REG_OP(Conv2D) | | int8 | int8 | int32 | int8 | int32 -----------|---------|---------|---------|----------|-------- |Format | NCHW | NCHW | ND | ND | NCHW - | | NHWC | HWCN | | | NHWC + | | NHWC | NHWC | | | NHWC + | | | HWCN | | | @endverbatim -* Type float32 is allowed only in mixed precision (float32->float16) scenarios. -* Mixed precision is enabled by default. -* \n -* +* It should be noted that the data types must correspond to each other, but the +* format does not need to . \n + *@par Attributes: -*@li strides: Required. A list of 4 integers. Specifying the strides of the +* @li strides: A list of 4 integers. Specifying the strides of the * convolution along the height and width. The dimension order is determined * by the data format of "x". By default the N and C dimensions are set to 1. * @li pads: A list of 4 integers. Specifying the top, bottom, left and right @@ -767,122 +767,6 @@ REG_OP(Conv2DCompress) .OP_END_FACTORY_REG(Conv2DCompress) /** -*@brief Computes a 2D convolution given 4D "x", "filter" and "offsets" -* tensors. -*@par Inputs: -* @li x: A 4D tensor of input images. With shape of -* [batch, in_height, in_width, in_channels] when format is "NHWC". -* @li filter: A 4D tensor of filters. Must have the same type as "x". With -* shape of [filter_height, filter_width, in_channels, out_channels] when format -* is "HWCN". -* @li offsets: A 4D tensor of offsets. With shape of -* [batch, deformable_groups * filter_height * filter_width * 3, in_height, -* in_width] when format is "NCHW". -* @li bias: An optional 1D tensor. Shape is [out_channels]. -* -* The input and output tensor attributes are listed as follows: -* @verbatim - |Tensor | x | filter | offsets | bias | y - -----------|---------|---------|---------|----------|-------- - |Data Type | float16 | float16 | float16 | float16 | float16 - -----------|---------|---------|---------|----------|-------- - |Format | NCHW | NCHW | NCHW | ND | NCHW - | | NHWC | HWCN | | | NHWC -@endverbatim -* It should be noted that the data types must correspond to each other, but -* the format does not need to. - -*@par Attributes: -* @li strides: Required. A list of 4 integers. Specifying the strides of the -* convolution along the height and width. The dimension order is determined -* by the data format of "x". By default the N and C dimensions are set to 1. -* @li pads: Required. A list of 4 integers. Specifying the top, bottom, left -* and right padding. -* @li dilations: Optional. A list of 4 integers. Specifying the dilation rate -* to use for dilated convolution. Has the same dimension order and value as -* "strides". -* @li groups: Optional. Number of blocked connections from input channels to -* output channels. Input channels and output channels must both be divisible -* by "groups".Type is int32. -* @li data_format: Optional. An optional string from: "NHWC", "NCHW". Specifying the -* data format of the input and output images. Type is string. Defaults to -* "NHWC". Reserved. -* @li deformable_groups: Optional. Cut the c chanel of input X into deformable_groups, -* each share a different offsets. Input channels must be divisible by -* "deformable_groups". Type is int32. - -*@par Outputs: -* @li y: A 4D Tensor of output images. Must have the same type and format as -* "x". With shape of [batch, out_channels, out_height, out_width] when format -* is "NHWC". -* @li output_height = (in_height + top_pad + botton_pad - -* dilation_h * (filter_height - 1) -1) / stride_h + 1 -* @li output_width = (in_width + left_pad + right_pad - -* dilation_w * (filter_width - 1) -1) / stride_w + 1 - -*@attention -* @li The parameter scope is listed as follows: -* @verbatim - |Name | Field | Scope - ------------------|--------------|---------------------------------------- - |Input Image Size | H dimension | 1 <= in_height * filter_height <= 4096 - | | W dimension | 1 <= in_width * filter_width <=4096 - ------------------|--------------|---------------------------------------- - |Filter Size | H dimension | [1, 255] - | | W dimension | [1, 255] - ------------------|--------------|---------------------------------------- - |offsets Size | C dimension | offsets_c = deformable_groups * - | | | filter_width * filter_height * 3 - | | H dimension | the same as output H dimension - | | W dimension | the same as output W dimension - ------------------|--------------|---------------------------------------- - |Stride Size | H dimension | [1, 63] - | | W dimension | [1, 63] - ------------------|--------------|---------------------------------------- - |Padding Size | top side | [0, 255] - | | bottom side | [0, 255] - | | left side | [0, 255] - | | right side | [0, 255] - ------------------|--------------|---------------------------------------- - |Dilation Size | H dimension | [1, 255] - | | W dimension | [1, 255] -@endverbatim - -* @li There are restrictions for certain scenarios: -* @verbatim - | Output | Restrictions - -------------------|--------------------------- - | W dimension == 1 | HxW(input) == HxW(filter) - | H dimension == 1 | - -------------------|--------------------------- - | W dimension == 1 | Not supported - | H dimension != 1 | -@endverbatim -* As shown above, "HxW(input)" indicates the image size after padding and -* "HxW(filter)" indicates the filter size after dilation. - -*@par Quantization supported or not -*@li Yes -* -*@par Third-party framework compatibility -*@li Compatible with the TensorFlow operator "conv2d". -*@li Compatible with the Caffe operator 2D "Convolution". -*/ -REG_OP(DeformableConv2D) - .INPUT(x, TensorType({DT_FLOAT16})) - .INPUT(filter, TensorType({DT_FLOAT16})) - .INPUT(offsets, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT16})) - .REQUIRED_ATTR(strides, ListInt) - .REQUIRED_ATTR(pads, ListInt) - .ATTR(dilations, ListInt, {1, 1, 1, 1}) - .ATTR(groups, Int, 1) - .ATTR(data_format, String, "NHWC") - .ATTR(deformable_groups, Int, 1) - .OP_END_FACTORY_REG(DeformableConv2D) - -/** *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. *@par Inputs: * @li x: A 5D tensor. Must be one of the following types: float16, @@ -1391,39 +1275,5 @@ REG_OP(Conv2DTransposeD) .ATTR(offset_x, Int, 0) .OP_END_FACTORY_REG(Conv2DTransposeD) -/** -*@brief In the deformable convolution operator, the original input FeatureMap is expanded to a ksize_y * H * ksize_x *W -*FeatureMap by bilinear interpolation according to the offset offset. -*@par Inputs: - * Four inputs: - * @li x: A Tensor of type float16 - * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. -*@par Required Attributes: - * @li strides: A tuple/list of 2 integers.The stride of the sliding window for - * height and width for H/W dimension. - * @li pads: A tuple/list of 4 integers.Padding added to each dimension - * of the input. - * @li ksize: A tuple/list of 2 integers.kernel size. -*@par Attributes: - * Three attributes: - * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension - * of input. Defaults to [0, 0, 0, 0] - * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. - * @li deformable_groups: Specify the c-axis grouping number of input x. -*@par Outputs: - * y: A Tensor. A Tensor of type float16. -*/ -REG_OP(DeformableOffsets) - .INPUT(x, TensorType({DT_FLOAT16})) - .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT32})) - .OUTPUT(y, TensorType({DT_FLOAT16})) - .REQUIRED_ATTR(strides, ListInt) - .REQUIRED_ATTR(pads, ListInt) - .REQUIRED_ATTR(ksize, ListInt) - .ATTR(dilations, ListInt, {0, 0, 0, 0}) - .ATTR(data_format, String, "NCHW") - .ATTR(deformable_groups, Int, 1) - .OP_END_FACTORY_REG(DeformableOffsets) - } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_ +#endif // GE_OP_NN_CALCULATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index f5a6201e..bd8bb9bf 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file nn_detect_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ +#ifndef GE_OP_NN_DETECT_OPS_H_ +#define GE_OP_NN_DETECT_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -525,11 +525,11 @@ as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n *@par Outputs: *@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], -* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. +where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. *@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2], -* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. +where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence. *@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2], -* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n +where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n *@attention Constraints: *@li This operator applies to YOLO v2 and v3 networks. @@ -557,9 +557,9 @@ REG_OP(Yolo) *@par Inputs: * Four inputs, including: *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput. -* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li img_info: A float16 or float32, describing the image information including the required image height and width -* and the actual image height and width. +and the actual image height and width. * *@par Attributes: *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" @@ -568,7 +568,7 @@ REG_OP(Yolo) *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20]. *@li relative: An optional bool. Defaults to and must be "true". *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, -* which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n +which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n *@li post_nms_topn: An optional int32. This attribute is reserved. *@li score_threshold: A required float, specifying the class score threshold for box filtering, @@ -615,11 +615,11 @@ REG_OP(YoloV2DetectionOutput) *@par Inputs: *Six inputs, including: *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput. -* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li imginfo: A float16, describing the image information including the required image height and width -* and the actual image height and width. +and the actual image height and width. *@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs. -* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed. +[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed. *@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]]. @@ -680,10 +680,10 @@ REG_OP(YoloV2DetectionOutputD) *@par Inputs: *Ten inputs, including: *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". -* There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. +There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. *@li img_info: A float16 or float32, describing the image information including the required image height and width -* and the actual image height and width. - +and the actual image height and width. +* *@par Attributes: *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -698,13 +698,13 @@ REG_OP(YoloV2DetectionOutputD) *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". - +* *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 - +* *@attention Constraints: *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n @@ -746,15 +746,16 @@ REG_OP(YoloV3DetectionOutput) *@par Inputs: *16 Input, including: *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. -* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li imginfo: A float16, describing the image information including the required image height and width -* and the actual image height and width. +and the actual image height and width. *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. -* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n +[[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. -* [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n -s +[[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n + +* *@par Attributes: *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -766,13 +767,13 @@ s *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0]. *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0]. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". - +* *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 - +* *@attention Constraints: *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. @@ -823,8 +824,8 @@ REG_OP(YoloV3DetectionOutputD) *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. *@li img_info: A float16 or float32, describing the image information including the required image height and width \n -* and the actual image height and width. - +and the actual image height and width. +* *@par Attributes: *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -839,13 +840,13 @@ There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yol *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". - +* *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 - +* *@attention Constraints:\n *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. @@ -874,19 +875,19 @@ REG_OP(YoloV3DetectionOutputV2) .OP_END_FACTORY_REG(YoloV3DetectionOutputV2) /** -*@brief Performs YOLO V3 detection. +*@brief Performs YOLO V3 detection . \n *@par Inputs: *16 Input, including: *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput. -* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. +A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. *@li imginfo: A float16, describing the image information including the required image height and width -* and the actual image height and width. -*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. -* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] -* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n +and the actual image height and width. +*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs. [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n + +* *@par Attributes: *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes" *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer. @@ -901,7 +902,6 @@ REG_OP(YoloV3DetectionOutputV2) * *@par Outputs: *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2), -* describing the information of each output box. * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes. * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 @@ -912,9 +912,6 @@ REG_OP(YoloV3DetectionOutputV2) *@see Yolo() *@par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead. */ REG_OP(YoloV3DetectionOutputV2D) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -1031,15 +1028,15 @@ REG_OP(ROIPooling) /** *@brief Computes decode bbox function. - +* *@par Inputs: *Inputs include: * @li box_predictions: A Tensor. Must be float16. * @li anchors: A Tensor. Must have the same type as box_predictions. - +* *@par Attributes: * @ decode_clip: required, float, threahold of decode process. - +* *@par Outputs: * @ decoded_boxes: A Tensor. Must have the same type as box_predictions. * N-D with shape [N, 4]. @@ -1210,12 +1207,12 @@ REG_OP(RpnProposalsD) /** *@brief Computes Score Filte Pre-Sort function. - +* *@par Inputs: *Inputs include: * @li rois: A Tensor. Must be float16. N-D with shape [N, 4]. * @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1]. - +* *@par Attributes: * @li score_threshold: required, float, threahold of topk process. * @li k: required, Int, threahold of topk process. @@ -1276,12 +1273,12 @@ REG_OP(RpnProposalPostProcessing) .OP_END_FACTORY_REG(RpnProposalPostProcessing) /** *@brief Computes DecodeBoundariesTarget function. - +* *@par Inputs: *Inputs include: * @li boundary_predictions: A Tensor. Must be float16. * @li anchors: A Tensor. Must be float16. - +* *@par Outputs: * @ boundary_encoded: A Tensor. Must be float16. @@ -1479,21 +1476,7 @@ REG_OP(DecodeBboxV2) .ATTR(reversed_box, Bool, false) .OP_END_FACTORY_REG(DecodeBboxV2) -/** -*@brief Computes sort function. -* -*@par Inputs: -*Inputs include: -* x: A Tensor. Must be float16 or float32. -* -*@par Attributes: -* @li axis: optional, int. -* @li descending: optional,bool. -* -*@par Outputs: -* @li y1: A Tensor. Must have the same type as x. -* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32. -*/ + REG_OP(Sort) .INPUT(x, TensorType({ DT_FLOAT16 })) .OUTPUT(y1, TensorType({ DT_FLOAT16 })) @@ -1502,6 +1485,7 @@ REG_OP(Sort) .ATTR(descending, Bool, false) .OP_END_FACTORY_REG(Sort) + } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_ +#endif // GE_OP_NN_DETECT_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 0c607162..0d0032cf 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file nn_norm_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ +#ifndef GE_OP_NN_NORM_OPS_H +#define GE_OP_NN_NORM_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -160,34 +160,6 @@ REG_OP(SigmoidCrossEntropyWithLogits) .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogits) /** -*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n - -*@par Inputs: -* four inputs, including: -*@li predict: A multi-dimensional Tensor of type float16 or float32, specifying the predictive value. -*@li target: A multi-dimensional Tensor of type float16 or float32, specifying the target value . \n -*@li weight: An multi-dimensional Tensor, specifying the weight value. \n -*@li pos_weight: An multi-dimensional Tensor, specifying the pos weight value. \n - -*@par Attributes: -*reduction: A character string from "none", "mean", and "sum", specifying the reduction type to be applied to the output. Defaults to "mean" . \n - -*@par Outputs: -*loss: Sigmoid cross entropy between the predictive value and target value. Has the same dimensions as "predict" . \n - -*@par Third-party framework compatibility -* Compatible with PyTorch operator BCEWithLogitsLoss. -*/ -REG_OP(SigmoidCrossEntropyWithLogitsV2) - .INPUT(predict, TensorType({DT_FLOAT16, DT_FLOAT})) - .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(weight, TensorType({DT_FLOAT16, DT_FLOAT})) - .OPTIONAL_INPUT(pos_weight, TensorType({DT_FLOAT16, DT_FLOAT})) - .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT})) - .ATTR(reduction, String, "mean") - .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2) - -/** *@brief Computes the regression box of the RPN. It is a FasterRCNN operator . \n *@par Inputs: @@ -958,4 +930,4 @@ REG_OP(InHost) .OP_END_FACTORY_REG(InHost) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_ +#endif //GE_OP_NN_NORM_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 16552eee..ea4a5ba3 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,9 @@ * \file nn_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ +#ifndef GE_OP_NN_OPS_H_ +#define GE_OP_NN_OPS_H_ #include "nn_pooling_ops.h" -#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ +#endif // GE_OP_NN_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 471c0062..fb7fc127 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file nn_pooling_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H_ +#ifndef GE_OP_NN_POOLING_OPS_H +#define GE_OP_NN_POOLING_OPS_H #include "graph/operator_reg.h" #include "graph/operator.h" @@ -31,7 +31,7 @@ namespace ge { *@par Inputs: *@li x: An NCHW tensor of type float16, float32, int8. *@par Attributes: -*@li mode: An optional int32, specifying the pooling algorithm, either "0" (max pooling) or "1" (avg pooling). Defaults to "0". +*@li mode: An optional int32, specifying the pooling algorithm, either "1" (max pooling) or "0" (avg pooling). Defaults to "0". *@li global_pooling: An optional bool. Defaults to "false". *@li window: Optional, including: *window[0]: An optional int32, specifying the window size along in the H dimension. The value range is [1, 32768]. Defaults to "1". @@ -109,47 +109,7 @@ REG_OP(AvgPool) *@brief Performs average pooling on the input . \n *@par Inputs: -*x: A tensor of type float16, float32, double. - -*@par Attributes: -*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 32768]. -*@li strides: A required list of 4 ints, specifying the stride of the sliding window. The strides of the N and C dimensions are 1. The strides of the H and W dimensions are positive integers within the range [1, 63]. -*@li padding_mode: A required string, specifying the padding algorithm, either "VALID", "SAME" and "CALCULATED". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. With "VALID" means no padding. -*@li pads: Pad value when padding_mode is "CALCULATED". -*@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default). -*@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w] -*@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". -*@li exclusive: Ignore padding area or not when calculating average. - -*@par Outputs: -*y: The average pooled output tensor. Has the same type and format as input "x". - -*@attention Constraints: -*@li Only single input and single output are supported. -*@li Global pooling is supported. -*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 32768]. ksize_H * ksize_W < 256 -*@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. -*@par Third-party framework compatibility -* Compatible with the TensorFlow operator AvgPoolV2. -*/ -REG_OP(AvgPoolV2) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) - .REQUIRED_ATTR(ksize, ListInt) - .REQUIRED_ATTR(strides, ListInt) - .ATTR(padding_mode, String, "CALCULATED") - .ATTR(pads, ListInt, {0, 0, 0, 0}) - .ATTR(data_format, String, "NCHW") - .ATTR(global_pooling, Bool, false) - .ATTR(ceil_mode, Bool, false) - .ATTR(exclusive, Bool, true) - .OP_END_FACTORY_REG(AvgPoolV2) - -/** -*@brief Performs average pooling on the input. - -*@par Inputs: -*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. +*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double . \n *@par Attributes: *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. @@ -225,15 +185,15 @@ REG_OP(MaxPoolExt2) *@par Inputs: * One input: *x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16, -* int32, int64, uint8, uint16, qint8 + * int32, int64, uint8, uint16, qint8 *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, -* specifying the size of the window for each dimension of the input tensor. -* No default value. + * specifying the size of the window for each dimension of the input tensor. + * No default value. *@li strides: A required list of int8, int16, int32, or int64 values, -* specifying the stride of the sliding window for each dimension of -* the input tensor. No default value. + * specifying the stride of the sliding window for each dimension of + * the input tensor. No default value. *@li padding: A required string. No default value. *@li data_format: An optional string. Defaults to "NHWC" . \n @@ -242,9 +202,9 @@ REG_OP(MaxPoolExt2) *@attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, -* ksize[1] * ksize[2] <= 255. + * ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, -* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. + * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. *@li "padding" is either "SAME" or "VALID". @@ -666,7 +626,7 @@ REG_OP(AvgPoolGrad) * @par Inputs: * @input_grad: An NHWC tensor of type float16. * @mean_matrix: Assist matrix, an NHWC tensor of type float16. -* @kernel_matrix: Assist matrix, an NHWC tensor of type float16. +* @kernel_matrix: Assist matrix, an NHWC tensor of type float16. \n * @par Attributes: * @li orig_input_shape: A required Original input dimensions. @@ -696,88 +656,6 @@ REG_OP(AvgPoolGradD) .ATTR(data_format, String, "NHWC") .OP_END_FACTORY_REG(AvgPoolGradD) -/** -* @brief Computes avgpoolv2grad function. - -* @par Inputs: -* @li orig_input_shape: An NHWC tensor of type int32. -* @li input_grad: An NHWC tensor of type float16, float32, or double. - -* @par Attributes: -* @li ksize: A required tuple or list, specifying the size of the window for -* each dimension of the input tensor. -* @li strides: A required tuple or list, specifying the stride of the sliding -* window for each dimension of the input tensor. -* @li padding_mode: A required string, specifying the type of -* the padding algorithm to use. -* @li global_pooling: Whether to use the global pooling. If global_pooling=true, -* ksize and pads will be ignored. Default False. -* @li ceil_mode: Whether to use the ceil function to calculate output height and -* width. Default False. -* @li exclusive: Whether to exclude padding points. default is true. -* @li data_format: An optional string. Defaults to "NHWC". - -* @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input". - -* @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator AvgPoolGrad. -*/ -REG_OP(AvgPoolV2Grad) - .INPUT(orig_input_shape, TensorType({DT_INT32})) - .INPUT(input_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .OUTPUT(out_grad, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) - .REQUIRED_ATTR(ksize, ListInt) - .REQUIRED_ATTR(strides, ListInt) - .ATTR(padding_mode, String, "CALCULATED") - .ATTR(pads, ListInt, {0,0,0,0}) - .ATTR(data_format, String, "NCHW") - .ATTR(global_pooling, Bool, false) - .ATTR(ceil_mode, Bool, false) - .ATTR(exclusive, Bool, true) - .OP_END_FACTORY_REG(AvgPoolV2Grad) -/** -* @brief Computes gradients of averagev2 pooling function. - -* @par Inputs: -* @li input_grad: An NHWC tensor of type float16, float32, or double. - -* @par Attributes: -* @li orig_input_shape: A required tuple or list of type int32. -* @li ksize: A required tuple or list, specifying the size of the window for -* each dimension of the input tensor. -* @li strides: A required tuple or list, specifying the stride of the sliding -* window for each dimension of the input tensor. -* @li padding_mode: A required string, specifying the type of -* the padding algorithm to use. -* @li global_pooling: Whether to use the global pooling. If global_pooling=true, -* ksize and pads will be ignored. Default False. -* @li ceil_mode: Whether to use the ceil function to calculate output height and -* width. Default False. -* @li exclusive: Whether to exclude padding points. default is true. -* @li data_format: An optional string. Defaults to "NHWC". - -* @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input". - -* @par Third-party framework compatibility -* @li Compatible with the TensorFlow operator AvgPoolGrad. -*/ -REG_OP(AvgPoolV2GradD) - .INPUT(input_grad, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(mean_matrix, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(kernel_matrix, TensorType({DT_FLOAT16})) - .OUTPUT(out_grad, TensorType({DT_FLOAT16})) - .REQUIRED_ATTR(orig_input_shape, ListInt) - .REQUIRED_ATTR(ksize, ListInt) - .REQUIRED_ATTR(strides, ListInt) - .ATTR(padding_mode, String, "CALCULATED") - .ATTR(pads, ListInt, {0,0,0,0}) - .ATTR(data_format, String, "NCHW") - .ATTR(global_pooling, Bool, false) - .ATTR(ceil_mode, Bool, false) - .ATTR(exclusive, Bool, true) - .OP_END_FACTORY_REG(AvgPoolV2GradD) /** *@brief :upsample the layer @@ -1187,108 +1065,6 @@ REG_OP(MaxPoolGradWithArgmaxV2) .ATTR(dilation, ListInt, {1,1,1,1}) .ATTR(ceil_mode, Bool, false) .OP_END_FACTORY_REG(MaxPoolGradWithArgmaxV2) - -/** -* @brief Performs max pooling on the input . \n - -* @par Inputs: -* One input: -* x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16, -* int32, int64, uint8, uint16, qint8 - -* @par Attributes: -* @li ksize: A required list of int8, int16, int32, or int64 values, -* specifying the size of the window for each dimension of the input tensor. -* No default value. -* @li strides: A required list of int8, int16, int32, or int64 values, -* specifying the stride of the sliding window for each dimension of -* the input tensor. No default value. -* @li padding_mode: A required string. Defaults to "CALCULATED". -* @li pads:A required list of int8, int16, int32, or int64 values, -* a data to caculate when padding_mode is "SAME" and "CALCULATED". -* @li data_format: An optional string. Defaults to "NHWC" . -* @li global_pooling bool, Whether to use the global pooling. -* If global_pooling = true, kernel size and paddings will be ignored. -* Default False -* @li ceil_mode:global_pooling (bool) – (bool) Whether to use the global pooling. -* If global_pooling = true, kernel size and paddings will be ignored. -* Default False \n - -* @par Outputs: -* y: A Tensor. Has the same type and format as input "x" . \n - -* @attention Constraints: -* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, -* ksize[1] * ksize[2] <= 255. -* @li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, -* strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. -* @li "padding" is "SAME" "VALID" or "CACULATE" . - - -* @par Third-party framework compatibility -* Compatible with the TensorFlow operator MaxPool. -*/ -REG_OP(MaxPoolV3) - .INPUT(x,TensorType({DT_FLOAT16, DT_FLOAT32})) - .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT32})) - .REQUIRED_ATTR(ksize, ListInt) - .REQUIRED_ATTR(strides, ListInt) - .ATTR(padding_mode, String, "CALCULATED") - .ATTR(pads, ListInt, {0,0,0,0}) - .ATTR(data_format, String, "NCHW") - .ATTR(global_pooling,Bool,false) - .ATTR(ceil_mode, Bool, false) - .OP_END_FACTORY_REG(MaxPoolV3) - -/** -* @brief Computes gradients of the maxpooling function . \n - -* @par Inputs: -* @li orig_input: A mutable NC1HWC0 tensor of type RealNumberType. -* @li orig_output: A mutable NC1HWC0 tensor of type RealNumberTypex. -* @li grad: A mutable NC1HWC0 tensor of type RealNumberType . \n - -* @par Attributes: -* @li ksize: A required list of int8, int16, int32, or int64 values, -* specifying the size of the window for each dimension of the input tensor. -* No default value. -* @li strides: A required list of int8, int16, int32, or int64 values, -* specifying the stride of the sliding window for each dimension of -* the input tensor. No default value. -* @li padding_mode: A required string. Defaults to "CALCULATED". -* @li pads:A required list of int8, int16, int32, or int64 values, -* a data to caculate when padding_mode is "SAME" and "CALCULATED". -* @li data_format: An optional string. Defaults to "NHWC" . -* @li global_pooling bool, Whether to use the global pooling. -* If global_pooling = true, kernel size and paddings will be ignored. -* Default False -* @li ceil_mode:global_pooling (bool) – (bool) Whether to use the global pooling. -* If global_pooling = true, kernel size and paddings will be ignored. -* Default False \n - -* @par Outputs: -* y: A mutable tensor. Has the same shape and type as "x1" . \n - -* @attention Constraints: -* @li Computing gradients of global pooling is not supported, which means -* "ksize < x1". -* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] - -* @par Third-party framework compatibility -* Compatible with the TensorFlow operator MaxPoolGrad. -*/ -REG_OP(MaxPoolV3Grad) - .INPUT(orig_input, TensorType::RealNumberType()) - .INPUT(orig_output, TensorType::RealNumberType()) - .INPUT(grad, TensorType::RealNumberType()) - .OUTPUT(out_grad, TensorType::RealNumberType()) - .REQUIRED_ATTR(ksize, ListInt) - .REQUIRED_ATTR(strides, ListInt) - .ATTR(padding_mode, String, "CALCULATED") - .ATTR(pads, ListInt, {0, 0, 0, 0}) - .ATTR(data_format, String, "NCHW") - .ATTR(global_pooling, Bool, false) - .ATTR(ceil_mode, Bool, false) - .OP_END_FACTORY_REG(MaxPoolV3Grad) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_POOLING_OPS_H \ No newline at end of file + +#endif // GE_OP_NN_POOLING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index d50b3d2b..0621a96c 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file nn_training_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ +#ifndef GE_OP_TRAINING_OPS_H +#define GE_OP_TRAINING_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -2556,4 +2556,4 @@ REG_OP(AtomicAddrClean) .OP_END_FACTORY_REG(AtomicAddrClean) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_TRAINING_OPS_H_ +#endif // GE_OP_TRAINING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h index b27b1fa0..503d97b1 100644 --- a/third_party/fwkacllib/inc/ops/no_op.h +++ b/third_party/fwkacllib/inc/ops/no_op.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file no_op.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_ +#ifndef GE_NO_OP_H_ +#define GE_NO_OP_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -38,4 +38,4 @@ REG_OP(NoOp) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NO_OP_H_ +#endif // GE_NO_OP_H_ diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index ce8383db..b50b7cd1 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file nonlinear_fuc_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ +#ifndef GE_OP_NONLINEAR_FUC_OPS_H +#define GE_OP_NONLINEAR_FUC_OPS_H #include "graph/operator_reg.h" @@ -642,4 +642,4 @@ REG_OP(Mish) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ +#endif // GE_OP_NONLINEAR_FUC_OPS_H diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h index f36d2935..e94dafa7 100644 --- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h +++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,9 @@ * \file npu_loss_scale_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ + +#ifndef GE_OP_NN_LOSS_SCALE_OPS_H +#define GE_OP_NN_LOSS_SCALE_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -119,4 +120,4 @@ REG_OP(NPUGetFloatStatus) .OP_END_FACTORY_REG(NPUGetFloatStatus) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_ +#endif // GE_OP_NN_LOSS_SCALE_OPS_H diff --git a/third_party/fwkacllib/inc/ops/outfeed_ops.h b/third_party/fwkacllib/inc/ops/outfeed_ops.h index 53b9d701..139e4880 100644 --- a/third_party/fwkacllib/inc/ops/outfeed_ops.h +++ b/third_party/fwkacllib/inc/ops/outfeed_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,10 +18,10 @@ * \file outfeed_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_ +#ifndef GE_OP_OUTFEED_OPS_H +#define GE_OP_OUTFEED_OPS_H #include "data_flow_ops.h" -#endif // OPS_BUILT_IN_OP_PROTO_INC_OUTFEED_OPS_H_ +#endif // GE_OP_OUTFEED_OPS_H diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index ed10648e..567bc63d 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file pad_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ +#ifndef GE_OP_PAD_OPS_H +#define GE_OP_PAD_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -233,9 +233,6 @@ REG_OP(PadV3) *@par Third-party framework compatibility: * Compatible with ONNX operator Pad. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use PadV3 instead. */ REG_OP(PadV3D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8})) @@ -331,7 +328,7 @@ REG_OP(AscendPadding) */ REG_OP(EmbeddingRankId) .INPUT(addr_table, TensorType({DT_UINT64})) - .INPUT(index, TensorType({DT_INT64,DT_INT32,DT_UINT64})) + .INPUT(index, TensorType({DT_UINT32})) .OUTPUT(rank_id, TensorType({DT_UINT64})) .ATTR(row_memory, Int, 320) .ATTR(mode, String, "mod") @@ -339,4 +336,4 @@ REG_OP(EmbeddingRankId) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_PAD_OPS_H_ +#endif //GE_OP_PAD_OPS_H diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index 9a5cf504..b3c50654 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file parsing_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ +#ifndef GE_OP_PARSING_OPS_H +#define GE_OP_PARSING_OPS_H #include "graph/operator_reg.h" #include "graph/operator.h" @@ -53,4 +53,4 @@ REG_OP(StringToNumber) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_PARSING_OPS_H_ +#endif // GE_OP_PARSING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 806e28df..31ba266b 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file quantize_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ +#ifndef GE_OP_QUANTIZE_OPS_H +#define GE_OP_QUANTIZE_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -221,4 +221,4 @@ REG_OP(AscendRequantS16) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_QUANTIZE_OPS_H_ +#endif // GE_OP_QUANTIZE_OPS_H diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index 20484623..4c62ec86 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file ragged_array_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_ +#ifndef GE_OP_RAGGED_ARRAY_OPS_H +#define GE_OP_RAGGED_ARRAY_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -62,4 +62,4 @@ REG_OP(RaggedGather) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_ARRAY_OPS_H_ \ No newline at end of file +#endif //GE_OP_RAGGED_ARRAY_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index 099d2a85..ec88c618 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file ragged_conversion_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_ +#ifndef GE_OP_RAGGED_CONVERSION_OPS_H +#define GE_OP_RAGGED_CONVERSION_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -95,4 +95,4 @@ REG_OP(RaggedTensorToTensor) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_CONVERSION_OPS_H_ \ No newline at end of file +#endif // GE_OP_RAGGED_CONVERSION_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index 258b0ca1..ab871b7e 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file ragged_math_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_ +#ifndef GE_OP_RAGGED_MATH_OPS_H +#define GE_OP_RAGGED_MATH_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -57,4 +57,4 @@ REG_OP(RaggedRange) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_RAGGED_MATH_OPS_H_ \ No newline at end of file +#endif //GE_OP_RAGGED_MATH_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index df6cf33a..24a9edd1 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file random_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ +#ifndef GE_OP_RANDOM_OPS_H_ +#define GE_OP_RANDOM_OPS_H_ #include @@ -408,25 +408,6 @@ REG_OP(LinSpace) .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE})) .OP_END_FACTORY_REG(LinSpace) - - -/** -*@brief The dropout operator randomly sets (according to the given dropout probability) -*the outputs of some units to zero, while others are remain unchanged. . \n - -*@par Inputs: -*One input, including: -*@li x:The input tensor variable. The data type is float32. \n - -*@par Attributes: -*@li dropout_ratio:Float between 0 and 1. Fraction of the input units to drop.Defaults to "0.5". -*@li scale_train: Bool,default to true. -*@li alpha: An optional float32. A scaling factor. Defaults to "1.0". -*@li beta: An optional float32. An exponent. Defaults to "0.0". \n - -*@par Outputs: -*y: A Variable holding Tensor representing the dropout, has same shape and data type with x. \n -*/ REG_OP(Dropout) .INPUT(x, TensorType{DT_FLOAT}) .OUTPUT(y, TensorType{DT_FLOAT}) @@ -494,4 +475,4 @@ REG_OP(ShuffleChannel) .OP_END_FACTORY_REG(ShuffleChannel) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_ +#endif // GE_OP_RANDOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 386c88c3..80169344 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file reduce_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ +#ifndef GE_OP_REDUCE_OPS_H +#define GE_OP_REDUCE_OPS_H #include "graph/operator_reg.h" @@ -960,4 +960,4 @@ REG_OP(GNTrainingUpdate) } //namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_ +#endif /* GE_OP_REDUCE_OPS_H */ diff --git a/third_party/fwkacllib/inc/ops/resource_variable_ops.h b/third_party/fwkacllib/inc/ops/resource_variable_ops.h index 74ac83f8..fdc76391 100644 --- a/third_party/fwkacllib/inc/ops/resource_variable_ops.h +++ b/third_party/fwkacllib/inc/ops/resource_variable_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file resource_variable_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_ +#ifndef GE_OP_RESOURCE_VARIABLE_OPS_H +#define GE_OP_RESOURCE_VARIABLE_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -111,4 +111,4 @@ REG_OP(AssignSubVariableOp) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_RESOURCE_VARIABLE_OPS_H_ \ No newline at end of file +#endif //GE_OP_RESOURCE_VARIABLE_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/rnn.h b/third_party/fwkacllib/inc/ops/rnn.h index 07b8c0c7..0766d2c6 100644 --- a/third_party/fwkacllib/inc/ops/rnn.h +++ b/third_party/fwkacllib/inc/ops/rnn.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file rnn.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ +#ifndef GE_OP_RNN_H +#define GE_OP_RNN_H #include "graph/operator_reg.h" @@ -113,8 +113,8 @@ REG_OP(DynamicLSTM) *@li f:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li o:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li tanhct:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. -*@li seq_length:A 1D Tensor. Must be one of the following types: int32. -*@li mask:A 1D Tensor. Must be one of the following types: int8. +*@li seq_length:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. +*@li mask:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li wci:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li wcf:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. *@li wco:A 4D Tensor. Must be one of the following types: float16, float32. The format must be FRACTAL_NZ. @@ -594,4 +594,4 @@ REG_OP(DynamicGRUV2) .OP_END_FACTORY_REG(DynamicGRUV2) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_ +#endif // GE_OP_RNN_H diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h index 089af326..39583293 100644 --- a/third_party/fwkacllib/inc/ops/rpn_ops.h +++ b/third_party/fwkacllib/inc/ops/rpn_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file rpn_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_ +#ifndef GE_OP_RPN_OPS_H +#define GE_OP_RPN_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -58,4 +58,4 @@ REG_OP(NMSWithMask) .OP_END_FACTORY_REG(NMSWithMask) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_RPN_OPS_H_ +#endif // GE_OP_TRAINING_OPS_H diff --git a/third_party/fwkacllib/inc/ops/save_ops.h b/third_party/fwkacllib/inc/ops/save_ops.h index a232e7ba..7fd853d3 100644 --- a/third_party/fwkacllib/inc/ops/save_ops.h +++ b/third_party/fwkacllib/inc/ops/save_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file save_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_ +#ifndef GE_OP_SAVE_OPS_H_ +#define GE_OP_SAVE_OPS_H_ #include "graph/operator_reg.h" @@ -39,4 +39,4 @@ REG_OP(Save) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SAVE_OPS_H_ +#endif // GE_OP_SAVE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index 318e05a2..acf1c34d 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file sdca_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_ +#ifndef GE_OP_SDCA_OPS_H +#define GE_OP_SDCA_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -88,4 +88,4 @@ REG_OP(SdcaOptimizerV2) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SDCA_OPS_H_ \ No newline at end of file +#endif //GE_OP_SDCA_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index b3ff7297..8ef4a42c 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file selection_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ +#ifndef GE_OP_SELECTION_OPS_H +#define GE_OP_SELECTION_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -807,7 +807,7 @@ REG_OP(SliceD) * @attention Constraints: * @li k =< 5120 -* @li Size of the last dimension =< 1458176 +* @li Size of the last dimension =< 65500 * @li sorted = true * @li It's unstable sorted indices on the platform of Ascend310 @@ -903,9 +903,6 @@ REG_OP(ScatterNd) *@li "y" has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator ScatterNd. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use ScatterNd instead. */ REG_OP(ScatterNdD) .INPUT(indices, TensorType::IndexNumberType()) @@ -1149,9 +1146,6 @@ REG_OP(Cumprod) *y: A Tensor. Has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator Cumprod. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Cumprod instead. */ REG_OP(CumprodD) .INPUT(x, TensorType::NumberType()) @@ -1206,9 +1200,6 @@ REG_OP(Cumsum) *y: A Tensor. Has the same type as "x". *@par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. - -* @par Restrictions: -* Warning: THIS FUNCTION IS DEPRECATED. Please use Cumsum instead. */ REG_OP(CumsumD) .INPUT(x, TensorType::NumberType()) @@ -1875,4 +1866,4 @@ REG_OP(CumulativeLogsumexpD) .OP_END_FACTORY_REG(CumulativeLogsumexpD) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_ +#endif // GE_OP_SELECTION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h index 04e04f1b..18df6edf 100644 --- a/third_party/fwkacllib/inc/ops/set_ops.h +++ b/third_party/fwkacllib/inc/ops/set_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file set_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_ +#ifndef GE_OP_SET_OPS_H_ +#define GE_OP_SET_OPS_H_ #include "graph/operator.h" #include "graph/operator_reg.h" @@ -178,4 +178,4 @@ REG_OP(SetSize) .OP_END_FACTORY_REG(SetSize) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SET_OPS_H_ +#endif // GE_OP_SET_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index 09d8ced9..3eecbeab 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file sparse_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ +#ifndef GE_OP_SPARSE_OPS_H_ +#define GE_OP_SPARSE_OPS_H_ #include "graph/operator_reg.h" @@ -1044,4 +1044,4 @@ REG_OP(DeserializeManySparse) .OP_END_FACTORY_REG(DeserializeManySparse) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SPARSE_OPS_H_ +#endif // GE_OP_SPARSE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index be3d7d00..460dada4 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file spectral_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ +#ifndef GE_OP_SPECTRAL_OPS_H +#define GE_OP_SPECTRAL_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -49,4 +49,4 @@ REG_OP(RFFT) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_ \ No newline at end of file +#endif //GE_OP_SPECTRAL_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index 6d511728..b66a0213 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file split_combination_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ +#ifndef GE_OP_SPLIT_COMBINATION_OPS_H +#define GE_OP_SPLIT_COMBINATION_OPS_H #include "graph/operator_reg.h" namespace ge { @@ -379,4 +379,4 @@ REG_OP(ConcatOffsetD) .OP_END_FACTORY_REG(ConcatOffsetD) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_ +#endif // GE_OP_SPLIT_COMBINATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index 3c8e32b6..ca85067b 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file state_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_ +#ifndef GE_OP_STATE_OPS_H_ +#define GE_OP_STATE_OPS_H_ #include "graph/operator_reg.h" @@ -164,4 +164,4 @@ REG_OP(CountUpTo) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_STATE_OPS_H_ +#endif // GE_OP_STATE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index c2f65c6a..779e7cea 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file stateful_random_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_ +#ifndef GE_OP_STATEFUL_RANDOM_OPS_H +#define GE_OP_STATEFUL_RANDOM_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -233,4 +233,4 @@ REG_OP(StatefulUniformInt) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_STATEFUL_RANDOM_OPS_H_ \ No newline at end of file +#endif //GE_OP_STATELESS_RANDOM_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index ff9daaa3..d91bc38a 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file stateless_random_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ +#ifndef GE_OP_STATELESS_RANDOM_OPS_H +#define GE_OP_STATELESS_RANDOM_OPS_H #include "graph/operator.h" #include "graph/operator_reg.h" @@ -81,4 +81,4 @@ REG_OP(StatelessRandomUniformInt) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_STATELESS_RANDOM_OPS_H_ \ No newline at end of file +#endif //GE_OP_STATELESS_RANDOM_OPS_H \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index ec84cc83..90ee700d 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file string_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ +#ifndef GE_OP_STRING_OPS_H_ +#define GE_OP_STRING_OPS_H_ #include #include "graph/operator_reg.h" @@ -559,4 +559,4 @@ REG_OP(DecodeBase64) .OP_END_FACTORY_REG(DecodeBase64) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_STRING_OPS_H_ +#endif // GE_OP_STRING_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/swap_co_ops.h b/third_party/fwkacllib/inc/ops/swap_co_ops.h index 6e8eaac3..fb25c741 100644 --- a/third_party/fwkacllib/inc/ops/swap_co_ops.h +++ b/third_party/fwkacllib/inc/ops/swap_co_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file swap_co_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_ +#ifndef GE_OP_SWAP_CO_OPS_H_ +#define GE_OP_SWAP_CO_OPS_H_ #include "graph/operator_reg.h" @@ -59,4 +59,4 @@ REG_OP(SwapCo) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_SWAP_CO_OPS_H_ +#endif // GE_OP_SWAP_CO_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index eb5ae258..ed46d95c 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file transformation_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ +#ifndef GE_OP_TRANSFORMATION_OPS_H +#define GE_OP_TRANSFORMATION_OPS_H #include "graph/operator_reg.h" @@ -695,4 +695,4 @@ REG_OP(CompressFcOp) .OP_END_FACTORY_REG(CompressFcOp) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ +#endif // GE_OP_TRANSFORMATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h index 8ef69d8b..c96b96be 100644 --- a/third_party/fwkacllib/inc/ops/warp_perspective_ops.h +++ b/third_party/fwkacllib/inc/ops/warp_perspective_ops.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ * \file warp_perspective_ops.h * \brief */ -#ifndef OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_ -#define OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_ +#ifndef GE_OP_WARP_PERSPECTIVE_OPS_H_ +#define GE_OP_WARP_PERSPECTIVE_OPS_H_ #include "graph/operator_reg.h" #include "graph/operator.h" @@ -56,4 +56,4 @@ REG_OP(WarpPerspective) .OP_END_FACTORY_REG(WarpPerspective) } // namespace ge -#endif // OPS_BUILT_IN_OP_PROTO_INC_WARP_PERSPECTIVE_OPS_H_ +#endif // GE_OP_WARP_PERSPECTIVE_OPS_H_ diff --git a/third_party/fwkacllib/inc/register/ops_kernel_builder_registry.h b/third_party/fwkacllib/inc/register/ops_kernel_builder_registry.h deleted file mode 100644 index 96ac931b..00000000 --- a/third_party/fwkacllib/inc/register/ops_kernel_builder_registry.h +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_REGISTER_OPS_KERNEL_BUILDER_REGISTRY_H_ -#define INC_REGISTER_OPS_KERNEL_BUILDER_REGISTRY_H_ - -#include -#include "register/register_types.h" -#include "common/opskernel/ops_kernel_builder.h" - -namespace ge { -using OpsKernelBuilderPtr = std::shared_ptr; - -class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpsKernelBuilderRegistry { - public: - static OpsKernelBuilderRegistry &GetInstance(); - - void Register(const std::string &lib_name, const OpsKernelBuilderPtr &instance); - - void UnregisterAll(); - - const std::map &GetAll() const; - - private: - std::map kernel_builders_; -}; - -class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpsKernelBuilderRegistrar { - public: - using CreateFn = OpsKernelBuilder *(*)(); - OpsKernelBuilderRegistrar(const std::string &kernel_lib_name, CreateFn fn); - ~OpsKernelBuilderRegistrar() = default; -}; - -#define REGISTER_OPS_KERNEL_BUILDER(kernel_lib_name, builder) \ - REGISTER_OPS_KERNEL_BUILDER_UNIQ_HELPER(__COUNTER__, kernel_lib_name, builder) - -#define REGISTER_OPS_KERNEL_BUILDER_UNIQ_HELPER(ctr, kernel_lib_name, builder) \ - REGISTER_OPS_KERNEL_BUILDER_UNIQ(ctr, kernel_lib_name, builder) - -#define REGISTER_OPS_KERNEL_BUILDER_UNIQ(ctr, kernel_lib_name, builder) \ - static ::ge::OpsKernelBuilderRegistrar register_op_kernel_builder_##ctr \ - __attribute__((unused)) = \ - ::ge::OpsKernelBuilderRegistrar(kernel_lib_name, []()->::ge::OpsKernelBuilder* { \ - return new (std::nothrow) builder(); \ - }) -} // namespace ge - -#endif // INC_REGISTER_OPS_KERNEL_BUILDER_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index a8341e32..17243802 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_BASE_H__ #define __CCE_RUNTIME_BASE_H__ @@ -100,9 +100,6 @@ typedef enum tagRtError { RT_ERROR_MODEL_ID, RT_ERROR_MODEL_EXE_FAILED, RT_ERROR_END_OF_SEQUENCE, // end of sequence - RT_ERROR_MODEL_EXIT, - RT_ERROR_MODEL_EXIT_STREAM_UNBIND, - RT_ERROR_MODEL_EXIT_ID, RT_ERROR_EVENT_BASE = 0x07050000, RT_ERROR_EVENT_NULL, @@ -584,16 +581,6 @@ RTS_API rtError_t rtLabelListCpy(rtLabel_t *label, uint32_t labelNumber, void *d */ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); -/** - * @ingroup dvrt_base - * @brief get current thread last stream id and task id - * @param [out] stream id and task id - * @param [in] null - * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for input null ptr - */ -RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskid, uint32_t *streamid); - #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index f1a70eaa..6de84c02 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_CONFIG_H__ #define __CCE_RUNTIME_CONFIG_H__ diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index ffcf7e4b..39651817 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_CONTEXT_H__ #define __CCE_RUNTIME_CONTEXT_H__ @@ -149,13 +149,6 @@ RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint */ RTS_API rtError_t rtGetGroupCount(uint32_t *count); -/** - * @ingroup rt_context - * @brief set context INF mode - * @param [in] mode - * @return RT_ERROR_NONE for ok - */ -RTS_API rtError_t rtSetCtxINFMode(bool mode); #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index f3f2937c..0bff548b 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_DEVICE_H__ #define __CCE_RUNTIME_DEVICE_H__ @@ -339,23 +339,6 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3 * @return RT_ERROR_NONE for ok */ RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value); - -/** - * @ingroup dvrt_dev - * @brief set target device for current thread - * @param [int] device the device id - * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input - */ -RTS_API rtError_t rtSetDeviceWithoutTsd(int32_t device); - -/** - * @ingroup dvrt_dev - * @brief reset all opened device - * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input - */ -RTS_API rtError_t rtDeviceResetWithoutTsd(int32_t device); #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/dvfsprofile.h b/third_party/fwkacllib/inc/runtime/dvfsprofile.h index 6e451695..e27cd832 100644 --- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h +++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_DVFSPROFILE_H__ #define __CCE_RUNTIME_DVFSPROFILE_H__ diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index c8a227e8..af7b16d8 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_EVENT_H__ #define __CCE_RUNTIME_EVENT_H__ diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index de806a9e..2030634a 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_KERNEL_H__ #define __CCE_RUNTIME_KERNEL_H__ diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 22a00ac6..a506e94a 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_MEM_H__ #define __CCE_RUNTIME_MEM_H__ @@ -177,28 +177,6 @@ typedef struct tagRtPointerAttributes { uint32_t pageSize; } rtPointerAttributes_t; - -typedef struct rtMallocHostSharedMemoryIn { - const char* name; - const uint64_t size; - uint32_t flag; -} rtMallocHostSharedMemoryIn; - -typedef struct rtMallocHostSharedMemoryOut { - int fd; - void* ptr; - void* devPtr; -} rtMallocHostSharedMemoryOut; - -typedef struct rtFreeHostSharedMemoryIn { - const char* name; - const uint64_t size; - int fd; - void* ptr; - void* devPtr; -} rtFreeHostSharedMemoryIn; - - /** * @ingroup dvrt_mem * @brief alloc device memory @@ -259,28 +237,6 @@ RTS_API rtError_t rtFreeHost(void *hostPtr); /** * @ingroup dvrt_mem - * @brief alloc host shared memory - * @param [in] in alloc host shared memory inputPara pointer - * @param [in] out alloc host shared memory outputInfo pointer - * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input - */ - -RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in, - rtMallocHostSharedMemoryOut *out); - -/** - * @ingroup dvrt_mem - * @brief free host memory - * @param [in] in free host shared memory inputPara pointer - * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input - */ - -RTS_API rtError_t rtFreeHostSharedMemory(rtFreeHostSharedMemoryIn *in); - -/** - * @ingroup dvrt_mem * @brief alloc managed memory * @param [in|out] ptr memory pointer * @param [in] size memory size diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index d3d5956f..c1872941 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_RT_H__ #define __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 78bb2321..59a1ba7d 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_MODEL_H__ #define __CCE_RUNTIME_MODEL_H__ @@ -49,7 +49,6 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_MEMCPY_ADDR_ASYNC, RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, RT_MODEL_TASK_STREAM_LABEL_GOTO, - RT_MODEL_TASK_MODEL_EXIT, } rtModelTaskType_t; typedef enum tagModelStreamType { @@ -225,13 +224,6 @@ typedef struct tagrtModelEndGraphTaskInfo { uint32_t reserved[8]; } rtModelEndGraphTaskInfo_t; -typedef struct tagrtModelExitInfo { - uint32_t modelId; - uint32_t streamId; - uint32_t reserved[8]; -} rtModelExitTaskInfo_t; - - typedef struct tagrtStreamLabelSwitchByIndexTask_t { uint64_t indexPtr; uint64_t labelInfoPtr; @@ -264,7 +256,6 @@ typedef struct tagTaskInfo { rtRdmaSendTaskInfo_t rdmaSendTask; rtRdmaDbSendTaskInfo_t rdmaDbSendTask; rtModelEndGraphTaskInfo_t modelEndGraphTask; - rtModelExitTaskInfo_t modelExitTask; rtStreamSwitchNTaskInfo_t streamSwitchNTask; rtStreamLabelSwitchByIndexTask_t streamLabelSwitchIndexTask; rtStreamLabelGotoTask_t streamLabelGotoTask; @@ -400,16 +391,6 @@ RTS_API rtError_t rtModelAbort(rtModel_t model); /** * @ingroup rt_model - * @brief end graph task to model default stream - * @param [in] model model to execute - * @param [in] end graph stream - * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input - */ -RTS_API rtError_t rtModelExit(rtModel_t model, rtStream_t stream); - -/** - * @ingroup rt_model * @brief bind queue * @param [in] model model to bind * @param [in] queueId queueId to bind diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 81810a21..ab542d89 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -1,18 +1,18 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd - + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - + * * http://www.apache.org/licenses/LICENSE-2.0 - + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. -*/ + */ #ifndef __CCE_RUNTIME_STREAM_H__ #define __CCE_RUNTIME_STREAM_H__ diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h index a42f38a5..185d2b9c 100644 --- a/third_party/fwkacllib/inc/tdt/status.h +++ b/third_party/fwkacllib/inc/tdt/status.h @@ -1,4 +1,4 @@ -/** +/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h index 1cab6fd1..0e62a85c 100644 --- a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h +++ b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h @@ -135,93 +135,6 @@ int32_t TdtHostPopData(const std::string &channelName, std::vector &it * @li tdt_host_interface.h: Header file where the interface declaration is located. */ int32_t TdtHostStop(const std::string &channelName); - -/** -* @ingroup TdtInFeedInit -* @brief Initialize the interface, start and initialize various general thread, log and other services -* -* @par Function -* Initialize the interface, start and initialize various general thread, log and other services -* -* @param deviceId [IN] type #unsigned int. logic device ID -* @retval #0 Success -* @retval #Not 0 Fail -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li tdt_host_interface.h: Header file where the interface declaration is located. -*/ -int32_t TdtInFeedInit(uint32_t deviceId); - -/** -* @ingroup TdtOutFeedInit -* @brief Initialize the interface, start and initialize various general thread, log and other services -* -* @par Function -* Initialize the interface, start and initialize various general thread, log and other services -* -* @param deviceId [IN] type #unsigned int. logic device ID -* @retval #0 Success -* @retval #Not 0 Fail -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li tdt_host_interface.h: Header file where the interface declaration is located. -*/ -int32_t TdtOutFeedInit(uint32_t deviceId); - -/** -* @ingroup TdtInFeedDestroy -* @brief Notify TDT component to close related resources -* -* @par Function -* Notify TDT component to close related resources -* -* @param NA -* @retval 0 Success -* @retval OtherValues Fail -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li tdt_host_interface.h: Header file where the interface declaration is located. -*/ -int32_t TdtInFeedDestroy(uint32_t deviceId); - -/** -* @ingroup TdtOutFeedDestroy -* @brief Notify TDT component to close related resources -* -* @par Function -* Notify TDT component to close related resources -* -* @param NA -* @retval 0 Success -* @retval OtherValues Fail -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li tdt_host_interface.h: Header file where the interface declaration is located. -*/ -int32_t TdtOutFeedDestroy(); - -/** -* @ingroup TdtInFeedData -* @brief Blocking queue. When the queue is full, the Push interface will block. -* -* @par Function -* Blocking queue. When the queue is full, the Push interface will block. -* -* @param channelName [IN] type #String. queue channel name -* @param items [IN] type #vector DataItem is defined in data_common.h. input data -* @retval 0 Success -* @retval OtherValues 0 Fail -* -* @par Dependency -* @li libtsdclient.so: Library to which the interface belongs. -* @li tdt_host_interface.h: Header file where the interface declaration is located. -* @li data_common.h: Header file where 'DataItem' defined -*/ -int32_t TdtInFeedData(const std::string &channelName, const std::vector &item, uint32_t deviceId); } // namespace tdt #ifdef __cplusplus } diff --git a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h index c8715041..4f216239 100644 --- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h +++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h @@ -152,13 +152,4 @@ MSVP_PROF_API int32_t ProfStopProfiling(const ProfConfig *profStopCfg); */ MSVP_PROF_API int32_t ProfFinalize(); -/** - * @name ProfGetDataTypeConfig - * @brief get dataTypeConfig started with of one device - * @param deviceId [IN] deviceId to get dataTypeConfig - * @param dataTypeConfig [OUT] result get - * @return ProfErrorCode - */ -MSVP_PROF_API int32_t ProfGetDataTypeConfig(uint32_t deviceId, uint64_t &dataTypeConfig); - #endif // MSPROF_ENGINE_PROF_ACL_API_H_