Browse Source

New translations structural_sp.py (Chinese Simplified)

l10n_v0.2.x
linlin 4 years ago
parent
commit
4e6891e1d4
1 changed files with 62 additions and 95 deletions
  1. +62
    -95
      lang/zh/gklearn/kernels/structural_sp.py

+ 62
- 95
lang/zh/gklearn/kernels/structural_sp.py View File

@@ -18,7 +18,7 @@ from tqdm import tqdm
# import networkx as nx # import networkx as nx
import numpy as np import numpy as np
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import get_shortest_paths
from gklearn.utils.utils import get_shortest_paths, compute_vertex_kernels
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel




@@ -26,15 +26,15 @@ class StructuralSP(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__node_attrs = kwargs.get('node_attrs', [])
self.__edge_attrs = kwargs.get('edge_attrs', [])
self.__edge_weight = kwargs.get('edge_weight', None)
self.__node_kernels = kwargs.get('node_kernels', None)
self.__edge_kernels = kwargs.get('edge_kernels', None)
self.__compute_method = kwargs.get('compute_method', 'naive')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_attrs = kwargs.get('edge_attrs', [])
self._edge_weight = kwargs.get('edge_weight', None)
self._node_kernels = kwargs.get('node_kernels', None)
self._edge_kernels = kwargs.get('edge_kernels', None)
self._compute_method = kwargs.get('compute_method', 'naive')
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -44,12 +44,12 @@ class StructuralSP(GraphKernel):
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = self._graphs iterator = self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else: else:
for g in iterator: for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -57,17 +57,17 @@ class StructuralSP(GraphKernel):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i, j in iterator: for i, j in iterator:
kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
else: else:
for i, j in iterator: for i, j in iterator:
kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
# if(kernel > 1): # if(kernel > 1):
# print("error here ") # print("error here ")
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
@@ -86,7 +86,7 @@ class StructuralSP(GraphKernel):
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of self._graphs # get shortest path graphs of self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie get_sps_fun = self._wrapper_get_sps_trie
else: else:
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
@@ -107,8 +107,8 @@ class StructuralSP(GraphKernel):
global G_spl, G_gs global G_spl, G_gs
G_spl = spl_toshare G_spl = spl_toshare
G_gs = gs_toshare G_gs = gs_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else: else:
do_fun = self._wrapper_ssp_do_naive do_fun = self._wrapper_ssp_do_naive
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
@@ -119,32 +119,32 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list. # get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [] splist = []
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else: else:
for g in iterator: for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i in iterator: for i in iterator:
kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel kernel_list[i] = kernel
else: else:
for i in iterator: for i in iterator:
kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
@@ -152,7 +152,7 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list. # get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [None] * len(g_list) splist = [None] * len(g_list)
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -161,7 +161,7 @@ class StructuralSP(GraphKernel):
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of g_list # get shortest path graphs of g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie get_sps_fun = self._wrapper_get_sps_trie
else: else:
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
@@ -184,8 +184,8 @@ class StructuralSP(GraphKernel):
G_spl = spl_toshare G_spl = spl_toshare
G_g1 = g1_toshare G_g1 = g1_toshare
G_gl = gl_toshare G_gl = gl_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else: else:
do_fun = self._wrapper_kernel_list_do do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign): def func_assign(result, var_to_assign):
@@ -193,42 +193,42 @@ class StructuralSP(GraphKernel):
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list return kernel_list
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])
return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])


def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed'])
if self.__compute_method == 'trie':
kernel = self.__ssp_do_trie(g1, g2, sp1, sp2)
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed'])
if self._compute_method == 'trie':
kernel = self._ssp_do_trie(g1, g2, sp1, sp2)
else: else:
kernel = self.__ssp_do_naive(g1, g2, sp1, sp2)
kernel = self._ssp_do_naive(g1, g2, sp1, sp2)
return kernel return kernel
def _wrapper_get_sps_naive(self, itr_item): def _wrapper_get_sps_naive(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])
return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])
def __ssp_do_naive(self, g1, g2, spl1, spl2):
def _ssp_do_naive(self, g1, g2, spl1, spl2):
kernel = 0 kernel = 0
# First, compute shortest path matrices, method borrowed from FCSP. # First, compute shortest path matrices, method borrowed from FCSP.
vk_dict = self.__get_all_node_kernels(g1, g2)
vk_dict = self._get_all_node_kernels(g1, g2)
# Then, compute kernels between all pairs of edges, which is an idea of # Then, compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we # extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow. # went though. For dense graphs, this would be slow.
ek_dict = self.__get_all_edge_kernels(g1, g2)
ek_dict = self._get_all_edge_kernels(g1, g2)
# compute graph kernels # compute graph kernels
if vk_dict: if vk_dict:
@@ -273,7 +273,7 @@ class StructuralSP(GraphKernel):
if len(p1) == len(p2): if len(p1) == len(p2):
kernel += 1 kernel += 1
try: try:
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average
except ZeroDivisionError: except ZeroDivisionError:
print(spl1, spl2) print(spl1, spl2)
print(g1.nodes(data=True)) print(g1.nodes(data=True))
@@ -314,60 +314,27 @@ class StructuralSP(GraphKernel):
def _wrapper_ssp_do_naive(self, itr): def _wrapper_ssp_do_naive(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
def __get_all_node_kernels(self, g1, g2):
# compute shortest path matrices, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if len(self.__node_labels) > 0:
# node symb and non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['mix']
for n1, n2 in product(g1.nodes(data=True), g2.nodes(data=True)):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled
else:
kn = self.__node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
else:
# node non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
# node unlabeled
else:
pass
return vk_dict
def _get_all_node_kernels(self, g1, g2):
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
def __get_all_edge_kernels(self, g1, g2):
def _get_all_edge_kernels(self, g1, g2):
# compute kernels between all pairs of edges, which is an idea of # compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we # extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow. # went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels ek_dict = {} # dict of edge kernels
if len(self.__edge_labels) > 0:
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled # edge symb and non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['mix']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['mix']
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -375,11 +342,11 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled # edge symb labeled
else: else:
ke = self.__edge_kernels['symb']
ke = self._edge_kernels['symb']
for e1 in g1.edges(data=True): for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True): for e2 in g2.edges(data=True):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
ek_temp = ke(e1_labels, e2_labels) ek_temp = ke(e1_labels, e2_labels)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -387,12 +354,12 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else: else:
# edge non-synb labeled # edge non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['nsymb']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['nsymb']
for e1 in g1.edges(data=True): for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True): for e2 in g2.edges(data=True):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_attrs, e2_attrs) ek_temp = ke(e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp


Loading…
Cancel
Save