diff --git a/gklearn/utils/utils.py b/gklearn/utils/utils.py index 1a991bb..fca19dd 100644 --- a/gklearn/utils/utils.py +++ b/gklearn/utils/utils.py @@ -101,7 +101,7 @@ def get_shortest_paths(G, weight, directed): # each edge walk is counted twice, starting from both its extreme nodes. if not directed: sp += [sptemp[::-1] for sptemp in spltemp] - + # add single nodes as length 0 paths. sp += [[n] for n in G.nodes()] return sp @@ -233,7 +233,7 @@ def direct_product_graph(G1, G2, node_labels, edge_labels): A list of node attributes used as labels. edge_labels : list A list of edge attributes used as labels. - + Return ------ gt : NetworkX graph @@ -287,9 +287,9 @@ def direct_product_graph(G1, G2, node_labels, edge_labels): def graph_deepcopy(G): - """Deep copy a graph, including deep copy of all nodes, edges and + """Deep copy a graph, including deep copy of all nodes, edges and attributes of the graph, nodes and edges. - + Note ---- It is the same as the NetworkX function graph.copy(), as far as I know. @@ -302,28 +302,28 @@ def graph_deepcopy(G): G_copy = nx.DiGraph(**labels) else: G_copy = nx.Graph(**labels) - - # add nodes + + # add nodes for nd, attrs in G.nodes(data=True): labels = {} for k, v in attrs.items(): labels[k] = deepcopy(v) G_copy.add_node(nd, **labels) - + # add edges. for nd1, nd2, attrs in G.edges(data=True): labels = {} for k, v in attrs.items(): labels[k] = deepcopy(v) G_copy.add_edge(nd1, nd2, **labels) - + return G_copy def graph_isIdentical(G1, G2): """Check if two graphs are identical, including: same nodes, edges, node labels/attributes, edge labels/attributes. - + Notes ----- 1. The type of graphs has to be the same. @@ -341,7 +341,7 @@ def graph_isIdentical(G1, G2): if not elist1 == elist2: return False # check graph attributes. - + return True @@ -363,7 +363,9 @@ def get_edge_labels(Gn, edge_label): return el -def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): +def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}, **kwargs): + if len(kwargs) != 0: + kernel_options = kwargs if name == 'Marginalized': from gklearn.kernels import Marginalized graph_kernel = Marginalized(node_labels=node_labels, @@ -379,7 +381,7 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr elif name == 'StructuralSP': from gklearn.kernels import StructuralSP graph_kernel = StructuralSP(node_labels=node_labels, - edge_labels=edge_labels, + edge_labels=edge_labels, node_attrs=node_attrs, edge_attrs=edge_attrs, ds_infos=ds_infos, @@ -417,7 +419,7 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): import os from gklearn.utils import Dataset, split_dataset_by_target - + # 1. get dataset. print('1. getting dataset...') dataset_all = Dataset() @@ -427,20 +429,20 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d dataset_all.remove_labels(**irrelevant_labels) # dataset_all.cut_graphs(range(0, 10)) datasets = split_dataset_by_target(dataset_all) - + gram_matrix_unnorm_list = [] run_time_list = [] - + print('start generating preimage for each class of target...') for idx, dataset in enumerate(datasets): target = dataset.targets[0] print('\ntarget =', target, '\n') - + # 2. initialize graph kernel. print('2. initializing graph kernel and setting parameters...') - graph_kernel = get_graph_kernel_by_name(kernel_options['name'], + graph_kernel = get_graph_kernel_by_name(kernel_options['name'], node_labels=dataset.node_labels, - edge_labels=dataset.edge_labels, + edge_labels=dataset.edge_labels, node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs, ds_infos=dataset.get_dataset_infos(keys=['directed']), @@ -450,24 +452,24 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d print('3. computing gram matrix...') gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm - + gram_matrix_unnorm_list.append(gram_matrix_unnorm) run_time_list.append(run_time) - + # 4. save results. print() print('4. saving results...') if save_results: os.makedirs(dir_save, exist_ok=True) - np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) + np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) + + print('\ncomplete.') + - print('\ncomplete.') - - def find_paths(G, source_node, length): - """Find all paths with a certain length those start from a source node. + """Find all paths with a certain length those start from a source node. A recursive depth first search is applied. - + Parameters ---------- G : NetworkX graphs @@ -476,7 +478,7 @@ def find_paths(G, source_node, length): The number of the node from where all paths start. length : integer The length of paths. - + Return ------ path : list of list @@ -492,14 +494,14 @@ def find_paths(G, source_node, length): def find_all_paths(G, length, is_directed): """Find all paths with a certain length in a graph. A recursive depth first search is applied. - + Parameters ---------- G : NetworkX graphs The graph in which paths are searched. length : integer The length of paths. - + Return ------ path : list of list @@ -508,18 +510,18 @@ def find_all_paths(G, length, is_directed): all_paths = [] for node in G: all_paths.extend(find_paths(G, node, length)) - + if not is_directed: - # For each path, two presentations are retrieved from its two extremities. + # For each path, two presentations are retrieved from its two extremities. # Remove one of them. - all_paths_r = [path[::-1] for path in all_paths] + all_paths_r = [path[::-1] for path in all_paths] for idx, path in enumerate(all_paths[:-1]): for path2 in all_paths_r[idx+1::]: if path == path2: all_paths[idx] = [] break all_paths = list(filter(lambda a: a != [], all_paths)) - + return all_paths @@ -535,8 +537,8 @@ def get_mlti_dim_edge_attrs(G, attr_names): for ed, attrs in G.edges(data=True): attributes.append(tuple(attrs[aname] for aname in attr_names)) return attributes - - + + def normalize_gram_matrix(gram_matrix): diag = gram_matrix.diagonal().copy() for i in range(len(gram_matrix)): @@ -544,8 +546,8 @@ def normalize_gram_matrix(gram_matrix): gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j]) gram_matrix[j][i] = gram_matrix[i][j] return gram_matrix - - + + def compute_distance_matrix(gram_matrix): dis_mat = np.empty((len(gram_matrix), len(gram_matrix))) for i in range(len(gram_matrix)): @@ -573,9 +575,9 @@ def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]): g1, g2 : NetworkX graph The kernels bewteen pairs of vertices in these two graphs are computed. node_kernels : dict - A dictionary of kernel functions for nodes, including 3 items: 'symb' - for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' - for both labels. The first 2 functions take two node labels as + A dictionary of kernel functions for nodes, including 3 items: 'symb' + for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' + for both labels. The first 2 functions take two node labels as parameters, and the 'mix' function takes 4 parameters, a symbolic and a non-symbolic label for each the two nodes. Each label is in form of 2-D dimension array (n_samples, n_features). Each function returns a number @@ -590,18 +592,18 @@ def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]): ------- vk_dict : dict Vertex kernels keyed by vertices. - + Notes ----- - This function is used by ``gklearn.kernels.FixedPoint'' and + This function is used by ``gklearn.kernels.FixedPoint'' and ``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1]. References ---------- - .. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang. - Parallelization of shortest path graph kernels on multi-core cpus and gpus. - Proceedings of the Programmability Issues for Heterogeneous Multicores - (MultiProg), Vienna, Austria, 2014. + .. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang. + Parallelization of shortest path graph kernels on multi-core cpus and gpus. + Proceedings of the Programmability Issues for Heterogeneous Multicores + (MultiProg), Vienna, Austria, 2014. """ vk_dict = {} # shortest path matrices dict if len(node_labels) > 0: