|
@@ -96,7 +96,10 @@ def untilhpathkernel(*args, |
|
|
pool.join() |
|
|
pool.join() |
|
|
|
|
|
|
|
|
# for g in Gn: |
|
|
# for g in Gn: |
|
|
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) |
|
|
|
|
|
|
|
|
# if compute_method == 'trie': |
|
|
|
|
|
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) |
|
|
|
|
|
# else: |
|
|
|
|
|
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) |
|
|
|
|
|
|
|
|
## size = sys.getsizeof(all_paths) |
|
|
## size = sys.getsizeof(all_paths) |
|
|
## for item in all_paths: |
|
|
## for item in all_paths: |
|
@@ -112,7 +115,7 @@ def untilhpathkernel(*args, |
|
|
## desc='getting paths', file=sys.stdout): |
|
|
## desc='getting paths', file=sys.stdout): |
|
|
## all_paths[i] = ps |
|
|
## all_paths[i] = ps |
|
|
## print(time.time() - ttt) |
|
|
## print(time.time() - ttt) |
|
|
# |
|
|
|
|
|
|
|
|
|
|
|
if compute_method == 'trie': |
|
|
if compute_method == 'trie': |
|
|
def init_worker(trie_toshare): |
|
|
def init_worker(trie_toshare): |
|
|
global G_trie |
|
|
global G_trie |
|
@@ -127,24 +130,20 @@ def untilhpathkernel(*args, |
|
|
do_partial = partial(wrapper_uhpath_do_naive, k_func) |
|
|
do_partial = partial(wrapper_uhpath_do_naive, k_func) |
|
|
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, |
|
|
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, |
|
|
glbv=(all_paths,), n_jobs=n_jobs) |
|
|
glbv=(all_paths,), n_jobs=n_jobs) |
|
|
# |
|
|
|
|
|
# |
|
|
|
|
|
## # ---- direct running, normally use single CPU core. ---- |
|
|
|
|
|
## all_paths = [ |
|
|
|
|
|
## find_all_paths_until_length( |
|
|
|
|
|
## Gn[i], |
|
|
|
|
|
## depth, |
|
|
|
|
|
## ds_attrs, |
|
|
|
|
|
## node_label=node_label, |
|
|
|
|
|
## edge_label=edge_label) for i in tqdm( |
|
|
|
|
|
## range(0, len(Gn)), desc='getting paths', file=sys.stdout) |
|
|
|
|
|
## ] |
|
|
|
|
|
## |
|
|
|
|
|
# if compute_method == 'trie': |
|
|
|
|
|
# # build generalized suffix tree of sets of paths for each graph. |
|
|
|
|
|
## all_gstree = [paths2GSuffixTree(all_paths[i]) for i in tqdm( |
|
|
|
|
|
## range(0, len(Gn)), desc='getting generalized suffix trees', file=sys.stdout)] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # ---- direct running, normally use single CPU core. ---- |
|
|
|
|
|
# all_paths = [ |
|
|
|
|
|
# find_all_paths_until_length( |
|
|
|
|
|
# Gn[i], |
|
|
|
|
|
# depth, |
|
|
|
|
|
# ds_attrs, |
|
|
|
|
|
# node_label=node_label, |
|
|
|
|
|
# edge_label=edge_label) for i in tqdm( |
|
|
|
|
|
# range(0, len(Gn)), desc='getting paths', file=sys.stdout) |
|
|
|
|
|
# ] |
|
|
# |
|
|
# |
|
|
|
|
|
# if compute_method == 'trie': |
|
|
# pbar = tqdm( |
|
|
# pbar = tqdm( |
|
|
# total=((len(Gn) + 1) * len(Gn) / 2), |
|
|
# total=((len(Gn) + 1) * len(Gn) / 2), |
|
|
# desc='calculating kernels', |
|
|
# desc='calculating kernels', |
|
@@ -155,18 +154,18 @@ def untilhpathkernel(*args, |
|
|
# all_paths[j], k_func) |
|
|
# all_paths[j], k_func) |
|
|
# Kmatrix[j][i] = Kmatrix[i][j] |
|
|
# Kmatrix[j][i] = Kmatrix[i][j] |
|
|
# pbar.update(1) |
|
|
# pbar.update(1) |
|
|
## else: |
|
|
|
|
|
## pbar = tqdm( |
|
|
|
|
|
## total=((len(Gn) + 1) * len(Gn) / 2), |
|
|
|
|
|
## desc='calculating kernels', |
|
|
|
|
|
## file=sys.stdout) |
|
|
|
|
|
## for i in range(0, len(Gn)): |
|
|
|
|
|
## for j in range(i, len(Gn)): |
|
|
|
|
|
## Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j], |
|
|
|
|
|
## k_func) |
|
|
|
|
|
## Kmatrix[j][i] = Kmatrix[i][j] |
|
|
|
|
|
## pbar.update(1) |
|
|
|
|
|
# |
|
|
|
|
|
|
|
|
# else: |
|
|
|
|
|
# pbar = tqdm( |
|
|
|
|
|
# total=((len(Gn) + 1) * len(Gn) / 2), |
|
|
|
|
|
# desc='calculating kernels', |
|
|
|
|
|
# file=sys.stdout) |
|
|
|
|
|
# for i in range(0, len(Gn)): |
|
|
|
|
|
# for j in range(i, len(Gn)): |
|
|
|
|
|
# Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j], |
|
|
|
|
|
# k_func) |
|
|
|
|
|
# Kmatrix[j][i] = Kmatrix[i][j] |
|
|
|
|
|
# pbar.update(1) |
|
|
|
|
|
|
|
|
run_time = time.time() - start_time |
|
|
run_time = time.time() - start_time |
|
|
print( |
|
|
print( |
|
|
"\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" |
|
|
"\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" |
|
@@ -197,8 +196,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): |
|
|
# search is applied. |
|
|
# search is applied. |
|
|
def traverseTrie1t(root, trie2, setlist, pcurrent=[]): |
|
|
def traverseTrie1t(root, trie2, setlist, pcurrent=[]): |
|
|
for key, node in root['children'].items(): |
|
|
for key, node in root['children'].items(): |
|
|
if node['isEndOfWord']: |
|
|
|
|
|
pcurrent.append(key) |
|
|
|
|
|
|
|
|
pcurrent.append(key) |
|
|
|
|
|
if node['isEndOfWord']: |
|
|
setlist[1] += 1 |
|
|
setlist[1] += 1 |
|
|
count2 = trie2.searchWord(pcurrent) |
|
|
count2 = trie2.searchWord(pcurrent) |
|
|
if count2 != 0: |
|
|
if count2 != 0: |
|
@@ -215,8 +214,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): |
|
|
# graph1. Deep-first search is applied. |
|
|
# graph1. Deep-first search is applied. |
|
|
def traverseTrie2t(root, trie1, setlist, pcurrent=[]): |
|
|
def traverseTrie2t(root, trie1, setlist, pcurrent=[]): |
|
|
for key, node in root['children'].items(): |
|
|
for key, node in root['children'].items(): |
|
|
|
|
|
pcurrent.append(key) |
|
|
if node['isEndOfWord']: |
|
|
if node['isEndOfWord']: |
|
|
pcurrent.append(key) |
|
|
|
|
|
# print(node['count']) |
|
|
# print(node['count']) |
|
|
count1 = trie1.searchWord(pcurrent) |
|
|
count1 = trie1.searchWord(pcurrent) |
|
|
if count1 == 0: |
|
|
if count1 == 0: |
|
@@ -242,8 +241,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): |
|
|
# search is applied. |
|
|
# search is applied. |
|
|
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): |
|
|
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): |
|
|
for key, node in root['children'].items(): |
|
|
for key, node in root['children'].items(): |
|
|
|
|
|
pcurrent.append(key) |
|
|
if node['isEndOfWord']: |
|
|
if node['isEndOfWord']: |
|
|
pcurrent.append(key) |
|
|
|
|
|
# print(node['count']) |
|
|
# print(node['count']) |
|
|
count1 = node['count'] |
|
|
count1 = node['count'] |
|
|
count2 = trie2.searchWord(pcurrent) |
|
|
count2 = trie2.searchWord(pcurrent) |
|
@@ -260,8 +259,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): |
|
|
# graph1. Deep-first search is applied. |
|
|
# graph1. Deep-first search is applied. |
|
|
def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): |
|
|
def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): |
|
|
for key, node in root['children'].items(): |
|
|
for key, node in root['children'].items(): |
|
|
if node['isEndOfWord']: |
|
|
|
|
|
pcurrent.append(key) |
|
|
|
|
|
|
|
|
pcurrent.append(key) |
|
|
|
|
|
if node['isEndOfWord']: |
|
|
# print(node['count']) |
|
|
# print(node['count']) |
|
|
count1 = trie1.searchWord(pcurrent) |
|
|
count1 = trie1.searchWord(pcurrent) |
|
|
if count1 == 0: |
|
|
if count1 == 0: |
|
@@ -405,6 +404,7 @@ def find_all_paths_until_length(G, |
|
|
# all_paths.extend(new_paths) |
|
|
# all_paths.extend(new_paths) |
|
|
|
|
|
|
|
|
# consider labels |
|
|
# consider labels |
|
|
|
|
|
# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) |
|
|
return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) |
|
|
return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) |
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -479,7 +479,32 @@ def find_all_path_as_trie(G, |
|
|
for n in G.nodes: |
|
|
for n in G.nodes: |
|
|
traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label, |
|
|
traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label, |
|
|
pcurrent=[n]) |
|
|
pcurrent=[n]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label, |
|
|
|
|
|
# pcurrent=[]): |
|
|
|
|
|
# if len(pcurrent) < length + 1: |
|
|
|
|
|
# for neighbor in G[root]: |
|
|
|
|
|
# if neighbor not in pcurrent: |
|
|
|
|
|
# pcurrent.append(neighbor) |
|
|
|
|
|
# plstr = paths2labelseqs([pcurrent], G, ds_attrs, |
|
|
|
|
|
# node_label, edge_label) |
|
|
|
|
|
# all_paths.append(pcurrent[:]) |
|
|
|
|
|
# traverseGraph(neighbor, all_paths, length, G, ds_attrs, |
|
|
|
|
|
# node_label, edge_label, pcurrent) |
|
|
|
|
|
# del pcurrent[-1] |
|
|
|
|
|
# |
|
|
|
|
|
# |
|
|
|
|
|
# path_l = [[n] for n in G.nodes] # paths of length l |
|
|
|
|
|
# all_paths = path_l[:] |
|
|
|
|
|
# path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label) |
|
|
|
|
|
## for p in path_l_str: |
|
|
|
|
|
## ptrie.insertWord(p) |
|
|
|
|
|
# for n in G.nodes: |
|
|
|
|
|
# traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label, |
|
|
|
|
|
# pcurrent=[n]) |
|
|
|
|
|
|
|
|
|
|
|
# print(ptrie.root) |
|
|
return ptrie |
|
|
return ptrie |
|
|
|
|
|
|
|
|
|
|
|
|
|
|