diff --git a/pygraph/kernels/untilHPathKernel.py b/pygraph/kernels/untilHPathKernel.py index 50e5e19..8b80f99 100644 --- a/pygraph/kernels/untilHPathKernel.py +++ b/pygraph/kernels/untilHPathKernel.py @@ -96,7 +96,10 @@ def untilhpathkernel(*args, pool.join() # for g in Gn: -# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) +# if compute_method == 'trie': +# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) +# else: +# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) ## size = sys.getsizeof(all_paths) ## for item in all_paths: @@ -112,7 +115,7 @@ def untilhpathkernel(*args, ## desc='getting paths', file=sys.stdout): ## all_paths[i] = ps ## print(time.time() - ttt) -# + if compute_method == 'trie': def init_worker(trie_toshare): global G_trie @@ -127,24 +130,20 @@ def untilhpathkernel(*args, do_partial = partial(wrapper_uhpath_do_naive, k_func) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, glbv=(all_paths,), n_jobs=n_jobs) -# -# -## # ---- direct running, normally use single CPU core. ---- -## all_paths = [ -## find_all_paths_until_length( -## Gn[i], -## depth, -## ds_attrs, -## node_label=node_label, -## edge_label=edge_label) for i in tqdm( -## range(0, len(Gn)), desc='getting paths', file=sys.stdout) -## ] -## -# if compute_method == 'trie': -# # build generalized suffix tree of sets of paths for each graph. -## all_gstree = [paths2GSuffixTree(all_paths[i]) for i in tqdm( -## range(0, len(Gn)), desc='getting generalized suffix trees', file=sys.stdout)] + + +# # ---- direct running, normally use single CPU core. ---- +# all_paths = [ +# find_all_paths_until_length( +# Gn[i], +# depth, +# ds_attrs, +# node_label=node_label, +# edge_label=edge_label) for i in tqdm( +# range(0, len(Gn)), desc='getting paths', file=sys.stdout) +# ] # +# if compute_method == 'trie': # pbar = tqdm( # total=((len(Gn) + 1) * len(Gn) / 2), # desc='calculating kernels', @@ -155,18 +154,18 @@ def untilhpathkernel(*args, # all_paths[j], k_func) # Kmatrix[j][i] = Kmatrix[i][j] # pbar.update(1) -## else: -## pbar = tqdm( -## total=((len(Gn) + 1) * len(Gn) / 2), -## desc='calculating kernels', -## file=sys.stdout) -## for i in range(0, len(Gn)): -## for j in range(i, len(Gn)): -## Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j], -## k_func) -## Kmatrix[j][i] = Kmatrix[i][j] -## pbar.update(1) -# +# else: +# pbar = tqdm( +# total=((len(Gn) + 1) * len(Gn) / 2), +# desc='calculating kernels', +# file=sys.stdout) +# for i in range(0, len(Gn)): +# for j in range(i, len(Gn)): +# Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j], +# k_func) +# Kmatrix[j][i] = Kmatrix[i][j] +# pbar.update(1) + run_time = time.time() - start_time print( "\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" @@ -197,8 +196,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): # search is applied. def traverseTrie1t(root, trie2, setlist, pcurrent=[]): for key, node in root['children'].items(): - if node['isEndOfWord']: - pcurrent.append(key) + pcurrent.append(key) + if node['isEndOfWord']: setlist[1] += 1 count2 = trie2.searchWord(pcurrent) if count2 != 0: @@ -215,8 +214,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): # graph1. Deep-first search is applied. def traverseTrie2t(root, trie1, setlist, pcurrent=[]): for key, node in root['children'].items(): + pcurrent.append(key) if node['isEndOfWord']: - pcurrent.append(key) # print(node['count']) count1 = trie1.searchWord(pcurrent) if count1 == 0: @@ -242,8 +241,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): # search is applied. def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): for key, node in root['children'].items(): + pcurrent.append(key) if node['isEndOfWord']: - pcurrent.append(key) # print(node['count']) count1 = node['count'] count2 = trie2.searchWord(pcurrent) @@ -260,8 +259,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): # graph1. Deep-first search is applied. def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): for key, node in root['children'].items(): - if node['isEndOfWord']: - pcurrent.append(key) + pcurrent.append(key) + if node['isEndOfWord']: # print(node['count']) count1 = trie1.searchWord(pcurrent) if count1 == 0: @@ -405,6 +404,7 @@ def find_all_paths_until_length(G, # all_paths.extend(new_paths) # consider labels +# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) @@ -479,7 +479,32 @@ def find_all_path_as_trie(G, for n in G.nodes: traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label, pcurrent=[n]) + + +# def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label, +# pcurrent=[]): +# if len(pcurrent) < length + 1: +# for neighbor in G[root]: +# if neighbor not in pcurrent: +# pcurrent.append(neighbor) +# plstr = paths2labelseqs([pcurrent], G, ds_attrs, +# node_label, edge_label) +# all_paths.append(pcurrent[:]) +# traverseGraph(neighbor, all_paths, length, G, ds_attrs, +# node_label, edge_label, pcurrent) +# del pcurrent[-1] +# +# +# path_l = [[n] for n in G.nodes] # paths of length l +# all_paths = path_l[:] +# path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label) +## for p in path_l_str: +## ptrie.insertWord(p) +# for n in G.nodes: +# traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label, +# pcurrent=[n]) +# print(ptrie.root) return ptrie