diff --git a/dubhe-visual-server/backend/backend/api/utils.py b/dubhe-visual-server/backend/backend/api/utils.py index 90d93c2..5f2d327 100644 --- a/dubhe-visual-server/backend/backend/api/utils.py +++ b/dubhe-visual-server/backend/backend/api/utils.py @@ -17,6 +17,7 @@ """ import re import time +import urllib.parse from pathlib import Path from django.http import HttpResponseNotAllowed, HttpResponseBadRequest, \ JsonResponse, HttpResponse @@ -228,9 +229,17 @@ def response_wrapper(fn): }) return res except Exception as e: + _tb = e.__traceback__ + _str_tb = "" + while _tb: + _st = "in {}, at line {} \n".format(_tb.tb_frame.f_globals["__file__"], + _tb.tb_lineno) + _str_tb += _st + _tb = _tb.tb_next + msg = "{}: Trace: {}".format(str(e), _str_tb) return JsonResponse({ 'code': 500, - 'msg': str(e), + 'msg': msg, 'data': "" }) @@ -256,5 +265,9 @@ def get_api_params(request, params): params_key = res.keys() if 'uid' in params_key and 'trainJobName' in params_key: res['uid'] = res['uid'] + '_' + res['trainJobName'] - - return res.values() + ret = list(res.values()) + for i, r in enumerate(ret): + ret[i] = urllib.parse.unquote(r) + if '%' in ret[i]: + ret[i] = urllib.parse.unquote(ret[i]) + return ret diff --git a/dubhe-visual-server/backend/backend/component/Graph/graph.py b/dubhe-visual-server/backend/backend/component/Graph/graph.py index c83efce..984791b 100644 --- a/dubhe-visual-server/backend/backend/component/Graph/graph.py +++ b/dubhe-visual-server/backend/backend/component/Graph/graph.py @@ -287,11 +287,11 @@ class Proxy: def _get_links(self): links = defaultdict(list) for node in self._nodes: - to_name_without = get_source_name(node.name) - from_names = node.input + to_name_without = get_source_name(node["name"]) + from_names = node["inputs"] if from_names: for from_name in from_names: - name = node.name + name = node["name"] from_name_without_ = get_source_name(from_name) if "^" in from_name: name = "^" + name @@ -304,8 +304,8 @@ class Proxy: def _set_graph(self): G = Graph() for node in self._nodes: - tmp_node = Node(node.name, node.op) - links = self._links[node.name] + tmp_node = Node(node["name"], node["op"]) + links = self._links[node["name"]] if links: for f, t in links: f_without = get_source_name(f) @@ -313,7 +313,7 @@ class Proxy: tmp_node.add_output(t) else: tmp_node.add_input(f) - tmp_node.set_attr(node.attr) + tmp_node.set_attr(node["attrs"]) G.add_node(tmp_node) return G diff --git a/dubhe-visual-server/backend/backend/component/Graph/graph_read.py b/dubhe-visual-server/backend/backend/component/Graph/graph_read.py index 7c19732..830a34f 100644 --- a/dubhe-visual-server/backend/backend/component/Graph/graph_read.py +++ b/dubhe-visual-server/backend/backend/component/Graph/graph_read.py @@ -183,34 +183,8 @@ def edge_deal(node, edges_info, edges_info_num, edges_info_list, edges_control_i edge_id = i + "__" + j if sign == 0: edges_info_temp[edge_id] = output_shapes[0] - # 若该边存在多条,则以;的形式对边信息进行划分 - # 其中若边信息为空,则以()替代 - # if edge_id not in edges_info.keys(): - # edges_info[edge_id] = output_shapes[0] - # edges_info_num[edge_id] = 1 - # else: - # if edges_info[edge_id] == '': - # edges_info[edge_id] = '()' - # if output_shapes[0] == '': - # edges_info[edge_id] = edges_info[edge_id] + ';' + '()' - # else: - # edges_info[edge_id] = edges_info[edge_id] + ';' + output_shapes[0] - # edges_info_num[edge_id] += 1 else: edges_info_temp[edge_id] = cur2targets_edge_info[k] - # 若该边存在多条,则以;的形式对边信息进行划分 - # 其中若边信息为空,则以()替代 - # if edge_id not in edges_info.keys(): - # edges_info[edge_id] = cur2targets_edge_info[k] - # edges_info_num[edge_id] = 1 - # else: - # if edges_info[edge_id] == '': - # edges_info[edge_id] = '()' - # if cur2targets_edge_info[k] == '': - # edges_info[edge_id] = edges_info[edge_id] + ';' + '()' - # else: - # edges_info[edge_id] = edges_info[edge_id] + ';' + cur2targets_edge_info[k] - # edges_info_num[edge_id] += 1 # 构造和存储每条边的控制信息 # 若对于一条边既存在实边也存在控制边,则绘制为实边 @@ -342,24 +316,7 @@ def data_build(tree, graph, data, level, curr_path=None): if (key != "_output_shapes") & (key != "shape"): node_attr[key] = str(node_info.attr[key]).replace('\n', '') elif key == "shape": - # shape处理成{1,1,1}形式的字符串 - # 每个shape都是一个protobuf,通过.来获取子层结构 - raw_dim = node_info.attr[key].shape.dim - raw_dim_length = len(raw_dim) - new_dim = "" - for j, dim in enumerate(raw_dim): - str_dim = "" - if dim.size == -1: - str_dim = "?" - else: - str_dim = str(dim.size) - if j == 0: - new_dim = '{' + str_dim - else: - new_dim += ',' + str_dim - if j == raw_dim_length - 1: - new_dim += '}' - node_attr[key] = new_dim + node_attr[key] = node_info.attr[key] elif key == "_output_shapes": # 在_output_shapes中用一个list存储当前节点到其他节点边的维度信息,每一个是一个shape shape_list = node_info.attr[key].list.shape @@ -488,6 +445,3 @@ def get_data(graph): # TODO # 放回data return data - # f = open('data_v1.txt', 'w') - # f.write(str(data)) - # f.close() diff --git a/dubhe-visual-server/backend/backend/component/Graph/provider.py b/dubhe-visual-server/backend/backend/component/Graph/provider.py index edba36e..9080002 100644 --- a/dubhe-visual-server/backend/backend/component/Graph/provider.py +++ b/dubhe-visual-server/backend/backend/component/Graph/provider.py @@ -18,8 +18,7 @@ import json from utils.cache_io import CacheIO from utils.path_utils import get_file_path -from .graph_read import get_data as graph_get_data -from .s_graph_read import get_s_graph_data +from .s_graph_read import get_s_graph_data, get_c_graph_data from .graph import graph_op from backend.api.utils import get_api_params @@ -28,8 +27,8 @@ def graph_provider(file_path): res = CacheIO(file_path).get_cache() if res: return { - 'net': get_s_graph_data(res) if isinstance(res, str) - else graph_get_data(res), + 'net': get_s_graph_data(res) if "s_graph" in str(file_path) + else get_c_graph_data(res), 'operator': graph_op } else: diff --git a/dubhe-visual-server/backend/backend/component/Graph/s_graph_read.py b/dubhe-visual-server/backend/backend/component/Graph/s_graph_read.py index e3e80f8..af2aa0a 100644 --- a/dubhe-visual-server/backend/backend/component/Graph/s_graph_read.py +++ b/dubhe-visual-server/backend/backend/component/Graph/s_graph_read.py @@ -18,18 +18,36 @@ import json from backend.component.Graph.parse_json import Proxy_json from backend.component.Graph.graph import Node +from backend.component.Graph.graph_read import get_data # Base_RATE = 16 # nodes保留每个节点的单独信息,以节点全称为key进行索引 + nodes = {} # edges_info保留每条线的维度信息,以起始节点+‘_’+目标节点为key进行索引 edges_info = {} +class Graph: + def __init__(self): + self.nodes = [] + + @property + def node(self): + return self.nodes + + def add_node(self, node): + node["name"] = node["uid"].replace("-", "/") + for i in range(len(node["inputs"])): + node["inputs"][i] = node["inputs"][i].replace("-", "/") + self.nodes.append(node) + + # 同根同支,以短边的尽头为分叉点 # 同根不同支, # 不同根 -def diff_index_find(small_len, node_edge_list, target_edge_list, same_root, same_branch): +def diff_index_find(small_len, node_edge_list, target_edge_list, same_root, + same_branch): # 遍历寻找分叉点 for i in range(small_len): @@ -101,7 +119,8 @@ def edge_deal(node): unique_sign = False duplication_sign = False if "_output_shapes" in nodes[target]["attrs"]: - target_output_shapes = nodes[target]["attrs"]["_output_shapes"] + target_output_shapes = nodes[target]["attrs"][ + "_output_shapes"] else: target_output_shapes = [""] # 若有匹配 @@ -111,11 +130,13 @@ def edge_deal(node): if duplication_sign: break for target_output_shape in target_output_shapes: - if (output_shape == target_output_shape) & (unique_sign is False): + if (output_shape == target_output_shape) & ( + unique_sign is False): unique_sign = True cur2targets_edge_info[i] = output_shape break - elif (output_shape == target_output_shape) & (unique_sign is True): + elif (output_shape == target_output_shape) & ( + unique_sign is True): duplication_sign = True cur2targets_edge_info[i] = "{?}" # candidate_list.append(target) @@ -144,9 +165,13 @@ def edge_deal(node): # 寻找分叉点 if node_edge_len < target_edge_len: - diff_index = diff_index_find(node_edge_len, node_edge_list, target_edge_list, same_root, same_branch) + diff_index = diff_index_find(node_edge_len, node_edge_list, + target_edge_list, same_root, + same_branch) else: - diff_index = diff_index_find(target_edge_len, node_edge_list, target_edge_list, same_root, same_branch) + diff_index = diff_index_find(target_edge_len, node_edge_list, + target_edge_list, same_root, + same_branch) # 构边与插入 # 同支情况下由于展开父节点消失,故不进行边的构建 @@ -176,7 +201,7 @@ def edge_deal(node): # layer 节点当前所在的层级 # targets 目标节点 # attrs 属性 -def data_build(tree, graph, data, level, curr_path=None): +def data_build(tree, graph, data, level, curr_path=None, Graph_=None): # parent用于存储父节点名称 # curr_path用于存储当前路径,主要用于虚节点的构造上 parent = curr_path @@ -272,7 +297,9 @@ def data_build(tree, graph, data, level, curr_path=None): node["op"] = node_info.op node["layer"] = level + 1 node["attrs"] = node_attr - + node["inputs"] = node_info.input + Graph_.add_node(node) + # print(node_info) node2nodes = node.copy() # nodes中node的边不重复,且仅含当前节点的信息,构建时为空,在处理后添加 node2nodes["targets"] = set() @@ -282,10 +309,12 @@ def data_build(tree, graph, data, level, curr_path=None): node["sub_net"] = [] if level == 0: data.append(node) - data_build(tree.child[node_name], graph, data[i], level + 1, curr_path) + data_build(tree.child[node_name], graph, data[i], + level + 1, curr_path) else: data["sub_net"].append(node) - data_build(tree.child[node_name], graph, data["sub_net"][i], level + 1, curr_path) + data_build(tree.child[node_name], graph, + data["sub_net"][i], level + 1, curr_path) def data_search(data, level=1, build=True): @@ -329,10 +358,11 @@ def data_search(data, level=1, build=True): data_search(sub_data, level + 1, build) -def get_s_graph_data(s_data): +def preprocess(s_data): s_data = s_data.replace('\n', '') data = json.loads(s_data) res = [] + g = Graph() for d in data: proxy = Proxy_json(d) tree = proxy.tree @@ -340,12 +370,22 @@ def get_s_graph_data(s_data): _data = [] level = 0 graph = proxy.graph - data_build(tree, graph, _data, level) - + data_build(tree, graph, _data, level, Graph_=g) # 边的重新构造,存入nodes中 data_search(_data, build=True) # 从nodes中取出边,赋值回data中 data_search(_data, build=False) if _data: res.append(_data) + return [res, g] + + +def get_s_graph_data(s_data): + res, g = preprocess(s_data) + return res + + +def get_c_graph_data(c_data): + res, g = preprocess(c_data) + res = get_data(g) return res diff --git a/dubhe-visual-server/backend/backend/component/Projector/projector_reduction.py b/dubhe-visual-server/backend/backend/component/Projector/projector_reduction.py index d11eede..f2abf78 100644 --- a/dubhe-visual-server/backend/backend/component/Projector/projector_reduction.py +++ b/dubhe-visual-server/backend/backend/component/Projector/projector_reduction.py @@ -50,7 +50,8 @@ class projector_reduction: raise ValueError('The dimension of the tsne method must be 2 or 3') _data = np.array(self.data) seed = np.random.RandomState(0) - data = bh_sne(_data, pca_d=True, d=self.dimension, perplexity=30, random_state=seed) + perplexity = _data.shape[0]//4 if _data.shape[0]-1 < 3*30 else 30 + data = bh_sne(_data, pca_d=True, d=self.dimension, perplexity=perplexity, random_state=seed) return data.tolist() diff --git a/dubhe-visual-server/parser_service/python_io/logfile_loader.py b/dubhe-visual-server/parser_service/python_io/logfile_loader.py index 6032f9e..a6d78cf 100644 --- a/dubhe-visual-server/parser_service/python_io/logfile_loader.py +++ b/dubhe-visual-server/parser_service/python_io/logfile_loader.py @@ -17,6 +17,7 @@ """ import threading import time +import json from io import BytesIO from pathlib import Path from tbparser import SummaryReader @@ -168,12 +169,46 @@ class Trace_Thread(threading.Thread): self.set_redis_key(type="embedding", tag="sample_" + items.tag, file_path=file_path) + def filter_graph(self, file): + variable_names = {} + graph = json.loads(file) + for sub_graph in graph: + cfg = sub_graph["config"] + # 拷贝一份,用于循环 + cfg_copy = cfg["layers"].copy() + for layer in cfg_copy: + if layer["class_name"] == "variable": + _name = layer["name"] + variable_names[_name] = layer + cfg["layers"].remove(layer) + # 第二遍循环,删除`variable_names`出现在`inbound_nodes`中的名字 + for sub_graph in graph: + cfg = sub_graph["config"] + for layer in cfg["layers"]: + in_nodes = layer["inbound_nodes"] + in_nodes_copy = in_nodes.copy() + for node in in_nodes_copy: + # 在里面则删除 + if node in variable_names.keys(): + in_nodes.remove(node) + graph_str = json.dumps(graph) + return graph_str + def load_model_file(self, file): with open(file, "r") as f: - _content = f.read() - file_path = path_parser(self.cache_path, self.runname, - type="graph", - tag="s_graph") - CacheIO(file_path).set_cache(data=_content) + # 结构图内容 + _cg_content = f.read() + _sg_content = self.filter_graph(_cg_content) + # caclulate_graph.json + sg_file_path = path_parser(self.cache_path, self.runname, + type="graph", + tag="s_graph") + cg_file_path = path_parser(self.cache_path, self.runname, + type="graph", + tag="c_graph") + CacheIO(sg_file_path).set_cache(data=_sg_content) + CacheIO(cg_file_path).set_cache(data=_cg_content) self.set_redis_key(type="graph", tag="s_graph", - file_path=file_path) + file_path=sg_file_path) + self.set_redis_key(type="graph", tag="c_graph", + file_path=cg_file_path) diff --git a/dubhe-visual-server/service_utils/utils/path_utils.py b/dubhe-visual-server/service_utils/utils/path_utils.py index acb9918..d38d012 100644 --- a/dubhe-visual-server/service_utils/utils/path_utils.py +++ b/dubhe-visual-server/service_utils/utils/path_utils.py @@ -36,11 +36,12 @@ def id2logdir(uid, trainJobName): def get_file_path(uid, run, type, tag): _key = uid + '_' + run + '_' + type + '_' + tag try: - _path = Path(RedisInstance.get(_key)) + _res = RedisInstance.get(_key) + _path = Path(_res) except TypeError: raise OSError('Redis key {} not found according to request ' - 'parameters, please check the parameters' - .format(_key)) + 'parameters, please check the parameters\n _path={}' + .format(_key, _res)) return _path