Browse Source

* fix the path kernel up to length h.

v0.1
jajupmochi 6 years ago
parent
commit
ccec961d2c
1 changed files with 62 additions and 37 deletions
  1. +62
    -37
      pygraph/kernels/untilHPathKernel.py

+ 62
- 37
pygraph/kernels/untilHPathKernel.py View File

@@ -96,7 +96,10 @@ def untilhpathkernel(*args,
pool.join() pool.join()
# for g in Gn: # for g in Gn:
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label)
# if compute_method == 'trie':
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label)
# else:
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label)
## size = sys.getsizeof(all_paths) ## size = sys.getsizeof(all_paths)
## for item in all_paths: ## for item in all_paths:
@@ -112,7 +115,7 @@ def untilhpathkernel(*args,
## desc='getting paths', file=sys.stdout): ## desc='getting paths', file=sys.stdout):
## all_paths[i] = ps ## all_paths[i] = ps
## print(time.time() - ttt) ## print(time.time() - ttt)
#
if compute_method == 'trie': if compute_method == 'trie':
def init_worker(trie_toshare): def init_worker(trie_toshare):
global G_trie global G_trie
@@ -127,24 +130,20 @@ def untilhpathkernel(*args,
do_partial = partial(wrapper_uhpath_do_naive, k_func) do_partial = partial(wrapper_uhpath_do_naive, k_func)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_paths,), n_jobs=n_jobs) glbv=(all_paths,), n_jobs=n_jobs)
#
#
## # ---- direct running, normally use single CPU core. ----
## all_paths = [
## find_all_paths_until_length(
## Gn[i],
## depth,
## ds_attrs,
## node_label=node_label,
## edge_label=edge_label) for i in tqdm(
## range(0, len(Gn)), desc='getting paths', file=sys.stdout)
## ]
##
# if compute_method == 'trie':
# # build generalized suffix tree of sets of paths for each graph.
## all_gstree = [paths2GSuffixTree(all_paths[i]) for i in tqdm(
## range(0, len(Gn)), desc='getting generalized suffix trees', file=sys.stdout)]
# # ---- direct running, normally use single CPU core. ----
# all_paths = [
# find_all_paths_until_length(
# Gn[i],
# depth,
# ds_attrs,
# node_label=node_label,
# edge_label=edge_label) for i in tqdm(
# range(0, len(Gn)), desc='getting paths', file=sys.stdout)
# ]
# #
# if compute_method == 'trie':
# pbar = tqdm( # pbar = tqdm(
# total=((len(Gn) + 1) * len(Gn) / 2), # total=((len(Gn) + 1) * len(Gn) / 2),
# desc='calculating kernels', # desc='calculating kernels',
@@ -155,18 +154,18 @@ def untilhpathkernel(*args,
# all_paths[j], k_func) # all_paths[j], k_func)
# Kmatrix[j][i] = Kmatrix[i][j] # Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1) # pbar.update(1)
## else:
## pbar = tqdm(
## total=((len(Gn) + 1) * len(Gn) / 2),
## desc='calculating kernels',
## file=sys.stdout)
## for i in range(0, len(Gn)):
## for j in range(i, len(Gn)):
## Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j],
## k_func)
## Kmatrix[j][i] = Kmatrix[i][j]
## pbar.update(1)
#
# else:
# pbar = tqdm(
# total=((len(Gn) + 1) * len(Gn) / 2),
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j],
# k_func)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
run_time = time.time() - start_time run_time = time.time() - start_time
print( print(
"\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" "\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---"
@@ -197,8 +196,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func):
# search is applied. # search is applied.
def traverseTrie1t(root, trie2, setlist, pcurrent=[]): def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
for key, node in root['children'].items(): for key, node in root['children'].items():
if node['isEndOfWord']:
pcurrent.append(key)
pcurrent.append(key)
if node['isEndOfWord']:
setlist[1] += 1 setlist[1] += 1
count2 = trie2.searchWord(pcurrent) count2 = trie2.searchWord(pcurrent)
if count2 != 0: if count2 != 0:
@@ -215,8 +214,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func):
# graph1. Deep-first search is applied. # graph1. Deep-first search is applied.
def traverseTrie2t(root, trie1, setlist, pcurrent=[]): def traverseTrie2t(root, trie1, setlist, pcurrent=[]):
for key, node in root['children'].items(): for key, node in root['children'].items():
pcurrent.append(key)
if node['isEndOfWord']: if node['isEndOfWord']:
pcurrent.append(key)
# print(node['count']) # print(node['count'])
count1 = trie1.searchWord(pcurrent) count1 = trie1.searchWord(pcurrent)
if count1 == 0: if count1 == 0:
@@ -242,8 +241,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func):
# search is applied. # search is applied.
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
for key, node in root['children'].items(): for key, node in root['children'].items():
pcurrent.append(key)
if node['isEndOfWord']: if node['isEndOfWord']:
pcurrent.append(key)
# print(node['count']) # print(node['count'])
count1 = node['count'] count1 = node['count']
count2 = trie2.searchWord(pcurrent) count2 = trie2.searchWord(pcurrent)
@@ -260,8 +259,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func):
# graph1. Deep-first search is applied. # graph1. Deep-first search is applied.
def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): def traverseTrie2m(root, trie1, sumlist, pcurrent=[]):
for key, node in root['children'].items(): for key, node in root['children'].items():
if node['isEndOfWord']:
pcurrent.append(key)
pcurrent.append(key)
if node['isEndOfWord']:
# print(node['count']) # print(node['count'])
count1 = trie1.searchWord(pcurrent) count1 = trie1.searchWord(pcurrent)
if count1 == 0: if count1 == 0:
@@ -405,6 +404,7 @@ def find_all_paths_until_length(G,
# all_paths.extend(new_paths) # all_paths.extend(new_paths)


# consider labels # consider labels
# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)
@@ -479,7 +479,32 @@ def find_all_path_as_trie(G,
for n in G.nodes: for n in G.nodes:
traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label, traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label,
pcurrent=[n]) pcurrent=[n])
# def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label,
# pcurrent=[]):
# if len(pcurrent) < length + 1:
# for neighbor in G[root]:
# if neighbor not in pcurrent:
# pcurrent.append(neighbor)
# plstr = paths2labelseqs([pcurrent], G, ds_attrs,
# node_label, edge_label)
# all_paths.append(pcurrent[:])
# traverseGraph(neighbor, all_paths, length, G, ds_attrs,
# node_label, edge_label, pcurrent)
# del pcurrent[-1]
#
#
# path_l = [[n] for n in G.nodes] # paths of length l
# all_paths = path_l[:]
# path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
## for p in path_l_str:
## ptrie.insertWord(p)
# for n in G.nodes:
# traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label,
# pcurrent=[n])
# print(ptrie.root)
return ptrie return ptrie






Loading…
Cancel
Save