@@ -1,188 +0,0 @@ | |||
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ | |||
!_TAG_FILE_SORTED 0 /0=unsorted, 1=sorted, 2=foldcase/ | |||
!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ | |||
!_TAG_PROGRAM_NAME Exuberant Ctags // | |||
!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ | |||
!_TAG_PROGRAM_VERSION 5.9~svn20110310 // | |||
commonwalkkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def commonwalkkernel(*args,$/;" function line:23 | |||
compute_method /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ compute_method = compute_method.lower()$/;" variable line:67 | |||
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = args[0] if len(args) == 1 else [args[0], args[1]]$/;" variable line:69 | |||
len_gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ len_gn = len(Gn)$/;" variable line:72 | |||
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 1]$/;" variable line:73 | |||
idx /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ idx = [G[0] for G in Gn]$/;" variable line:74 | |||
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = [G[1] for G in Gn]$/;" variable line:75 | |||
ds_attrs /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ ds_attrs = get_dataset_attributes($/;" variable line:81 | |||
attr_names /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ attr_names=['node_labeled', 'edge_labeled', 'is_directed'],$/;" variable line:83 | |||
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = [G.to_directed() for G in Gn]$/;" variable line:92 | |||
start_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ start_time = time.time()$/;" variable line:94 | |||
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Kmatrix = np.zeros((len(Gn), len(Gn)))$/;" variable line:96 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ def init_worker(gn_toshare):$/;" function line:99 | |||
run_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ run_time = time.time() - start_time$/;" variable line:173 | |||
_commonwalkkernel_exp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):$/;" function line:181 | |||
wrapper_cw_exp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def wrapper_cw_exp(node_label, edge_label, beta, itr):$/;" function line:249 | |||
_commonwalkkernel_geo /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma):$/;" function line:255 | |||
wrapper_cw_geo /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def wrapper_cw_geo(node_label, edge_label, gama, itr):$/;" function line:290 | |||
_commonwalkkernel_brute /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def _commonwalkkernel_brute(walks1,$/;" function line:296 | |||
find_all_walks_until_length /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def find_all_walks_until_length(G,$/;" function line:336 | |||
find_walks /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def find_walks(G, source_node, length):$/;" function line:388 | |||
find_all_walks /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def find_all_walks(G, length):$/;" function line:412 | |||
randomwalkkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def randomwalkkernel(*args,$/;" function line:27 | |||
_sylvester_equation /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs):$/;" function line:150 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(Awl_toshare):$/;" function line:184 function:_sylvester_equation | |||
wrapper_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_se_do(lmda, itr):$/;" function line:214 | |||
_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _se_do(A_wave1, A_wave2, lmda):$/;" function line:220 | |||
_conjugate_gradient /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:236 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(gn_toshare):$/;" function line:280 function:_conjugate_gradient | |||
wrapper_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_cg_unlabled_do(lmda, itr):$/;" function line:302 | |||
_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _cg_unlabled_do(A_wave1, A_wave2, lmda):$/;" function line:308 | |||
wrapper_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:320 | |||
_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:328 | |||
_fixed_point /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:351 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(gn_toshare):$/;" function line:408 function:_fixed_point | |||
wrapper_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:418 | |||
_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:426 | |||
func_fp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def func_fp(x, p_times, lmda, w_times):$/;" function line:448 | |||
_spectral_decomposition /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs):$/;" function line:456 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(q_T_toshare, P_toshare, D_toshare):$/;" function line:492 function:_spectral_decomposition | |||
wrapper_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_sd_do(weight, sub_kernel, itr):$/;" function line:516 | |||
_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): $/;" function line:523 | |||
_randomwalkkernel_kron /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _randomwalkkernel_kron(G1, G2, node_label, edge_label):$/;" function line:540 | |||
getLabels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def getLabels(Gn, node_label, edge_label, directed):$/;" function line:561 | |||
filterGramMatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def filterGramMatrix(gmt, label_dict, label, directed):$/;" function line:581 | |||
computeVK /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def computeVK(g1, g2, ds_attrs, node_kernels, node_label):$/;" function line:593 | |||
computeW /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):$/;" function line:627 | |||
spkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def spkernel(*args,$/;" function line:24 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^ def init_worker(gn_toshare):$/;" function line:115 function:spkernel | |||
spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):$/;" function line:130 | |||
wrapper_sp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):$/;" function line:191 | |||
wrapper_getSPGraph /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def wrapper_getSPGraph(weight, itr_item):$/;" function line:197 | |||
structuralspkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def structuralspkernel(*args,$/;" function line:25 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^ def init_worker(spl_toshare, gs_toshare):$/;" function line:177 function:structuralspkernel | |||
structuralspkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,$/;" function line:265 | |||
wrapper_ssp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels, $/;" function line:417 | |||
get_shortest_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def get_shortest_paths(G, weight, directed):$/;" function line:426 | |||
wrapper_getSP /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def wrapper_getSP(weight, directed, itr_item):$/;" function line:461 | |||
marginalizedkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def marginalizedkernel(*args,$/;" function line:31 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^ def init_worker(gn_toshare):$/;" function line:114 function:marginalizedkernel | |||
_marginalizedkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):$/;" function line:144 | |||
wrapper_marg_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def wrapper_marg_do(node_label, edge_label, p_quit, n_iteration, itr):$/;" function line:290 | |||
wrapper_untotter /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def wrapper_untotter(Gn, node_label, edge_label, i):$/;" function line:296 | |||
randomwalkkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def randomwalkkernel(*args,$/;" function line:21 | |||
_sylvester_equation /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, verbose=True):$/;" function line:197 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(Awl_toshare):$/;" function line:232 function:_sylvester_equation | |||
wrapper_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_se_do(lmda, itr):$/;" function line:262 | |||
_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _se_do(A_wave1, A_wave2, lmda):$/;" function line:268 | |||
_conjugate_gradient /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:284 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(gn_toshare):$/;" function line:328 function:_conjugate_gradient | |||
wrapper_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_cg_unlabled_do(lmda, itr):$/;" function line:350 | |||
_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _cg_unlabled_do(A_wave1, A_wave2, lmda):$/;" function line:356 | |||
wrapper_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:368 | |||
_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:376 | |||
_fixed_point /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:399 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(gn_toshare):$/;" function line:456 function:_fixed_point | |||
wrapper_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:466 | |||
_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:474 | |||
func_fp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def func_fp(x, p_times, lmda, w_times):$/;" function line:496 | |||
_spectral_decomposition /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, verbose=True):$/;" function line:504 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(q_T_toshare, P_toshare, D_toshare):$/;" function line:541 function:_spectral_decomposition | |||
wrapper_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_sd_do(weight, sub_kernel, itr):$/;" function line:566 | |||
_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): $/;" function line:573 | |||
_randomwalkkernel_kron /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _randomwalkkernel_kron(G1, G2, node_label, edge_label):$/;" function line:590 | |||
getLabels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def getLabels(Gn, node_label, edge_label, directed):$/;" function line:611 | |||
filterGramMatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def filterGramMatrix(gmt, label_dict, label, directed):$/;" function line:631 | |||
computeVK /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def computeVK(g1, g2, ds_attrs, node_kernels, node_label):$/;" function line:643 | |||
computeW /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):$/;" function line:677 | |||
spkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def spkernel(*args,$/;" function line:22 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^ def init_worker(gn_toshare):$/;" function line:157 function:spkernel | |||
spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):$/;" function line:207 | |||
wrapper_sp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):$/;" function line:297 | |||
wrapper_getSPGraph /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def wrapper_getSPGraph(weight, itr_item):$/;" function line:310 | |||
structuralspkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def structuralspkernel(*args,$/;" function line:28 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^ def init_worker(spl_toshare, gs_toshare):$/;" function line:179 function:structuralspkernel | |||
structuralspkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,$/;" function line:258 | |||
wrapper_ssp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels, $/;" function line:346 | |||
ssp_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label,$/;" function line:355 | |||
wrapper_ssp_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_ssp_do_trie(ds_attrs, node_label, edge_label, node_kernels, $/;" function line:463 | |||
getAllNodeKernels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def getAllNodeKernels(g1, g2, node_kernels, node_label, ds_attrs):$/;" function line:471 | |||
getAllEdgeKernels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def getAllEdgeKernels(g1, g2, edge_kernels, edge_label, ds_attrs):$/;" function line:505 | |||
traverseBothTriem /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTriem(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:551 | |||
traverseTrie2m /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2m(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:568 | |||
traverseBothTriev /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTriev(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:592 | |||
traverseTrie2v /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2v(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:609 | |||
traverseBothTriee /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTriee(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:631 | |||
traverseTrie2e /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2e(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:648 | |||
traverseBothTrieu /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTrieu(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:673 | |||
traverseTrie2u /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2u(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:690 | |||
get_shortest_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def get_shortest_paths(G, weight, directed):$/;" function line:748 | |||
wrapper_getSP_naive /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_getSP_naive(weight, directed, itr_item):$/;" function line:783 | |||
get_sps_as_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def get_sps_as_trie(G, weight, directed):$/;" function line:789 | |||
wrapper_getSP_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_getSP_trie(weight, directed, itr_item):$/;" function line:830 | |||
treeletkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def treeletkernel(*args, $/;" function line:23 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^ def init_worker(canonkeys_toshare):$/;" function line:105 function:treeletkernel | |||
_treeletkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def _treeletkernel_do(canonkey1, canonkey2, sub_kernel):$/;" function line:140 | |||
wrapper_treeletkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def wrapper_treeletkernel_do(sub_kernel, itr):$/;" function line:160 | |||
get_canonkeys /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def get_canonkeys(G, node_label, edge_label, labeled, is_directed):$/;" function line:166 | |||
wrapper_get_canonkeys /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def wrapper_get_canonkeys(node_label, edge_label, labeled, is_directed, itr_item):$/;" function line:418 | |||
find_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def find_paths(G, source_node, length):$/;" function line:424 | |||
find_all_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def find_all_paths(G, length, is_directed):$/;" function line:449 | |||
cyclicpatternkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/cyclicPatternKernel.py /^def cyclicpatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None):$/;" function line:20 | |||
_cyclicpatternkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/cyclicPatternKernel.py /^def _cyclicpatternkernel_do(patterns1, patterns2):$/;" function line:63 | |||
get_patterns /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/cyclicPatternKernel.py /^def get_patterns(G, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None):$/;" function line:87 | |||
pathkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def pathkernel(*args, node_label='atom', edge_label='bond_type'):$/;" function line:20 | |||
_pathkernel_do_l /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_l(G1, G2, sp1, sp2, node_label, edge_label):$/;" function line:107 | |||
_pathkernel_do_nl /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_nl(G1, G2, sp1, sp2, node_label):$/;" function line:148 | |||
_pathkernel_do_el /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_el(G1, G2, sp1, sp2, edge_label):$/;" function line:171 | |||
_pathkernel_do_unl /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_unl(G1, G2, sp1, sp2):$/;" function line:196 | |||
get_shortest_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def get_shortest_paths(G, weight):$/;" function line:211 | |||
treepatternkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^def treepatternkernel(*args,$/;" function line:21 | |||
_treepatternkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,$/;" function line:90 | |||
matchingset /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^ def matchingset(n1, n2):$/;" function line:119 function:_treepatternkernel_do | |||
mset_com /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^ def mset_com(allpairs, length):$/;" function line:123 function:_treepatternkernel_do.matchingset | |||
kernel_h /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^ def kernel_h(h):$/;" function line:165 function:_treepatternkernel_do | |||
weisfeilerlehmankernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'):$/;" function line:18 | |||
_wl_subtreekernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_subtreekernel_do(Gn, node_label, edge_label, height):$/;" function line:75 | |||
_wl_spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_spkernel_do(Gn, node_label, edge_label, height):$/;" function line:183 | |||
_wl_edgekernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_edgekernel_do(Gn, node_label, edge_label, height):$/;" function line:264 | |||
_wl_userkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):$/;" function line:340 | |||
untilhpathkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def untilhpathkernel(*args,$/;" function line:25 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def init_worker(trie_toshare):$/;" function line:142 function:untilhpathkernel | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def init_worker(plist_toshare):$/;" function line:149 function:untilhpathkernel | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def init_worker(plist_toshare):$/;" function line:156 function:untilhpathkernel | |||
_untilhpathkernel_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def _untilhpathkernel_do_trie(trie1, trie2, k_func):$/;" function line:207 | |||
traverseTrie1t /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie1t(root, trie2, setlist, pcurrent=[]):$/;" function line:226 function:_untilhpathkernel_do_trie | |||
traverseTrie2t /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie2t(root, trie1, setlist, pcurrent=[]):$/;" function line:244 function:_untilhpathkernel_do_trie | |||
traverseTrie1m /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):$/;" function line:271 function:_untilhpathkernel_do_trie | |||
traverseTrie2m /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie2m(root, trie1, sumlist, pcurrent=[]):$/;" function line:289 function:_untilhpathkernel_do_trie | |||
wrapper_uhpath_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_uhpath_do_trie(k_func, itr):$/;" function line:316 | |||
_untilhpathkernel_do_naive /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def _untilhpathkernel_do_naive(paths1, paths2, k_func):$/;" function line:322 | |||
wrapper_uhpath_do_naive /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_uhpath_do_naive(k_func, itr):$/;" function line:365 | |||
_untilhpathkernel_do_kernelless /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def _untilhpathkernel_do_kernelless(paths1, paths2, k_func):$/;" function line:371 | |||
wrapper_uhpath_do_kernelless /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_uhpath_do_kernelless(k_func, itr):$/;" function line:414 | |||
find_all_paths_until_length /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def find_all_paths_until_length(G,$/;" function line:421 | |||
wrapper_find_all_paths_until_length /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_find_all_paths_until_length(length, ds_attrs, node_label, $/;" function line:492 | |||
find_all_path_as_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def find_all_path_as_trie(G,$/;" function line:501 | |||
traverseGraph /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseGraph(root, ptrie, length, G, ds_attrs, node_label, edge_label,$/;" function line:542 function:find_all_path_as_trie | |||
wrapper_find_all_path_as_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_find_all_path_as_trie(length, ds_attrs, node_label, $/;" function line:593 | |||
paths2labelseqs /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def paths2labelseqs(plist, G, ds_attrs, node_label, edge_label):$/;" function line:601 | |||
weisfeilerlehmankernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def weisfeilerlehmankernel(*args, $/;" function line:25 | |||
base_kernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ base_kernel = base_kernel.lower()$/;" variable line:74 | |||
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list$/;" variable line:75 | |||
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Gn = [g.copy() for g in Gn]$/;" variable line:76 | |||
ds_attrs /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ ds_attrs = get_dataset_attributes(Gn, attr_names=['node_labeled'], $/;" variable line:77 | |||
node_label /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ node_label=node_label)$/;" variable line:78 | |||
start_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ start_time = time.time()$/;" variable line:83 | |||
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose)$/;" variable line:87 | |||
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_spkernel_do(Gn, node_label, edge_label, height)$/;" variable line:91 | |||
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_edgekernel_do(Gn, node_label, edge_label, height)$/;" variable line:95 | |||
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel)$/;" variable line:99 | |||
run_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ run_time = time.time() - start_time$/;" variable line:101 | |||
_wl_kernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose):$/;" function line:109 | |||
wl_iteration /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def wl_iteration(G, node_label):$/;" function line:256 | |||
wrapper_wl_iteration /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def wrapper_wl_iteration(node_label, itr_item):$/;" function line:293 | |||
compute_kernel_matrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, verbose):$/;" function line:300 | |||
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ def init_worker(alllabels_toshare):$/;" function line:305 function:compute_kernel_matrix | |||
compute_subtree_kernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def compute_subtree_kernel(num_of_each_label1, num_of_each_label2, kernel):$/;" function line:319 | |||
wrapper_compute_subtree_kernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def wrapper_compute_subtree_kernel(Kmatrix, itr):$/;" function line:333 | |||
_wl_spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_spkernel_do(Gn, node_label, edge_label, height):$/;" function line:339 | |||
_wl_edgekernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_edgekernel_do(Gn, node_label, edge_label, height):$/;" function line:421 | |||
_wl_userkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):$/;" function line:498 |
@@ -1,842 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Sun Dec 23 16:53:57 2018 | |||
@author: ljia | |||
@references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and | |||
Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, | |||
11(Apr):1201–1242, 2010. | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
import time | |||
from functools import partial | |||
from tqdm import tqdm | |||
import networkx as nx | |||
import numpy as np | |||
from scipy.sparse import identity, kron | |||
from scipy.sparse.linalg import cg | |||
from scipy.optimize import fixed_point | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.parallel import parallel_gm | |||
def randomwalkkernel(*args, | |||
# params for all method. | |||
compute_method=None, | |||
weight=1, | |||
p=None, | |||
q=None, | |||
edge_weight=None, | |||
# params for conjugate and fp method. | |||
node_kernels=None, | |||
edge_kernels=None, | |||
node_label='atom', | |||
edge_label='bond_type', | |||
# params for spectral method. | |||
sub_kernel=None, | |||
n_jobs=None): | |||
"""Calculate random walk graph kernels. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
h : integer | |||
Longest length of walks. | |||
method : string | |||
Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the path kernel up to d between 2 praphs. | |||
""" | |||
compute_method = compute_method.lower() | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
eweight = None | |||
if edge_weight == None: | |||
print('\n None edge weight specified. Set all weight to 1.\n') | |||
else: | |||
try: | |||
some_weight = list( | |||
nx.get_edge_attributes(Gn[0], edge_weight).values())[0] | |||
if isinstance(some_weight, float) or isinstance(some_weight, int): | |||
eweight = edge_weight | |||
else: | |||
print( | |||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||
% edge_weight) | |||
except: | |||
print( | |||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||
% edge_weight) | |||
ds_attrs = get_dataset_attributes( | |||
Gn, | |||
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | |||
'edge_attr_dim', 'is_directed'], | |||
node_label=node_label, | |||
edge_label=edge_label) | |||
ds_attrs['node_attr_dim'] = 0 | |||
ds_attrs['edge_attr_dim'] = 0 | |||
# remove graphs with no edges, as no walk can be found in their structures, | |||
# so the weight matrix between such a graph and itself might be zero. | |||
len_gn = len(Gn) | |||
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||
idx = [G[0] for G in Gn] | |||
Gn = [G[1] for G in Gn] | |||
if len(Gn) != len_gn: | |||
print('\n %d graphs are removed as they don\'t contain edges.\n' % | |||
(len_gn - len(Gn))) | |||
start_time = time.time() | |||
# # get vertex and edge concatenated labels for each graph | |||
# label_list, d = getLabels(Gn, node_label, edge_label, ds_attrs['is_directed']) | |||
# gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed']) | |||
if compute_method == 'sylvester': | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs) | |||
elif compute_method == 'conjugate': | |||
Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs, | |||
node_kernels, edge_kernels, | |||
node_label, edge_label, eweight, n_jobs) | |||
elif compute_method == 'fp': | |||
Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels, | |||
edge_kernels, node_label, edge_label, | |||
eweight, n_jobs) | |||
elif compute_method == 'spectral': | |||
import warnings | |||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||
Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs) | |||
elif compute_method == 'kron': | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j], | |||
node_label, edge_label) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
else: | |||
raise Exception( | |||
'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".' | |||
) | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time, idx | |||
############################################################################### | |||
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs): | |||
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
Return | |||
------ | |||
kernel : float | |||
Kernel between 2 graphs. | |||
""" | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
if q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list accually contains the transposes of the adjacency matrices. | |||
A_wave_list = [ | |||
nx.adjacency_matrix(G, eweight).todense().transpose() for G in tqdm( | |||
Gn, desc='compute adjacency matrices', file=sys.stdout) | |||
] | |||
# # normalized adjacency matrices | |||
# A_wave_list = [] | |||
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): | |||
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose() | |||
# norm = A_tilde.sum(axis=0) | |||
# norm[norm == 0] = 1 | |||
# A_wave_list.append(A_tilde / norm) | |||
if p == None: # p is uniform distribution as default. | |||
def init_worker(Awl_toshare): | |||
global G_Awl | |||
G_Awl = Awl_toshare | |||
do_partial = partial(wrapper_se_do, lmda) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(A_wave_list,), n_jobs=n_jobs) | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
# S = lmda * A_wave_list[j] | |||
# T_t = A_wave_list[i] | |||
# # use uniform distribution if there is no prior knowledge. | |||
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j]) | |||
# p_times_uni = 1 / nb_pd | |||
# M0 = np.full((len(A_wave_list[j]), len(A_wave_list[i])), p_times_uni) | |||
# X = dlyap(S, T_t, M0) | |||
# X = np.reshape(X, (-1, 1), order='F') | |||
# # use uniform distribution if there is no prior knowledge. | |||
# q_times = np.full((1, nb_pd), p_times_uni) | |||
# Kmatrix[i][j] = np.dot(q_times, X) | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
# pbar.update(1) | |||
return Kmatrix | |||
def wrapper_se_do(lmda, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, _se_do(G_Awl[i], G_Awl[j], lmda) | |||
def _se_do(A_wave1, A_wave2, lmda): | |||
from control import dlyap | |||
S = lmda * A_wave2 | |||
T_t = A_wave1 | |||
# use uniform distribution if there is no prior knowledge. | |||
nb_pd = len(A_wave1) * len(A_wave2) | |||
p_times_uni = 1 / nb_pd | |||
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni) | |||
X = dlyap(S, T_t, M0) | |||
X = np.reshape(X, (-1, 1), order='F') | |||
# use uniform distribution if there is no prior knowledge. | |||
q_times = np.full((1, nb_pd), p_times_uni) | |||
return np.dot(q_times, X) | |||
############################################################################### | |||
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
node_label, edge_label, eweight, n_jobs): | |||
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
Return | |||
------ | |||
kernel : float | |||
Kernel between 2 graphs. | |||
""" | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | |||
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1: | |||
# # this is faster from unlabeled graphs. @todo: why? | |||
# if q == None: | |||
# # don't normalize adjacency matrices if q is a uniform vector. Note | |||
# # A_wave_list accually contains the transposes of the adjacency matrices. | |||
# A_wave_list = [ | |||
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in | |||
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | |||
# ] | |||
# if p == None: # p is uniform distribution as default. | |||
# def init_worker(Awl_toshare): | |||
# global G_Awl | |||
# G_Awl = Awl_toshare | |||
# do_partial = partial(wrapper_cg_unlabled_do, lmda) | |||
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
# glbv=(A_wave_list,), n_jobs=n_jobs) | |||
# else: | |||
# reindex nodes using consecutive integers for convenience of kernel calculation. | |||
Gn = [nx.convert_node_labels_to_integers( | |||
g, first_label=0, label_attribute='label_orignal') for g in tqdm( | |||
Gn, desc='reindex vertices', file=sys.stdout)] | |||
if p == None and q == None: # p and q are uniform distributions as default. | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(Gn,), n_jobs=n_jobs) | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
# result = _cg_labled_do(Gn[i], Gn[j], ds_attrs, node_kernels, | |||
# node_label, edge_kernels, edge_label, lmda) | |||
# Kmatrix[i][j] = result | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
# pbar.update(1) | |||
return Kmatrix | |||
def wrapper_cg_unlabled_do(lmda, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, _cg_unlabled_do(G_Awl[i], G_Awl[j], lmda) | |||
def _cg_unlabled_do(A_wave1, A_wave2, lmda): | |||
nb_pd = len(A_wave1) * len(A_wave2) | |||
p_times_uni = 1 / nb_pd | |||
w_times = kron(A_wave1, A_wave2).todense() | |||
A = identity(w_times.shape[0]) - w_times * lmda | |||
b = np.full((nb_pd, 1), p_times_uni) | |||
x, _ = cg(A, b) | |||
# use uniform distribution if there is no prior knowledge. | |||
q_times = np.full((1, nb_pd), p_times_uni) | |||
return np.dot(q_times, x) | |||
def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||
edge_label, lmda, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||
edge_kernels, edge_label, lmda): | |||
# Frist, ompute kernels between all pairs of nodes, method borrowed | |||
# from FCSP. It is faster than directly computing all edge kernels | |||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | |||
# graphs compared, which is the most case we went though. For very | |||
# sparse graphs, this would be slow. | |||
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label) | |||
# Compute weight matrix of the direct product graph. | |||
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs, | |||
edge_kernels, edge_label) | |||
# use uniform distribution if there is no prior knowledge. | |||
p_times_uni = 1 / w_dim | |||
A = identity(w_times.shape[0]) - w_times * lmda | |||
b = np.full((w_dim, 1), p_times_uni) | |||
x, _ = cg(A, b) | |||
# use uniform distribution if there is no prior knowledge. | |||
q_times = np.full((1, w_dim), p_times_uni) | |||
return np.dot(q_times, x) | |||
############################################################################### | |||
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||
node_label, edge_label, eweight, n_jobs): | |||
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
Return | |||
------ | |||
kernel : float | |||
Kernel between 2 graphs. | |||
""" | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | |||
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1: | |||
# # this is faster from unlabeled graphs. @todo: why? | |||
# if q == None: | |||
# # don't normalize adjacency matrices if q is a uniform vector. Note | |||
# # A_wave_list accually contains the transposes of the adjacency matrices. | |||
# A_wave_list = [ | |||
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in | |||
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | |||
# ] | |||
# if p == None: # p is uniform distribution as default. | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
# # use uniform distribution if there is no prior knowledge. | |||
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j]) | |||
# p_times_uni = 1 / nb_pd | |||
# w_times = kron(A_wave_list[i], A_wave_list[j]).todense() | |||
# p_times = np.full((nb_pd, 1), p_times_uni) | |||
# x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times)) | |||
# # use uniform distribution if there is no prior knowledge. | |||
# q_times = np.full((1, nb_pd), p_times_uni) | |||
# Kmatrix[i][j] = np.dot(q_times, x) | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
# pbar.update(1) | |||
# else: | |||
# reindex nodes using consecutive integers for convenience of kernel calculation. | |||
Gn = [nx.convert_node_labels_to_integers( | |||
g, first_label=0, label_attribute='label_orignal') for g in tqdm( | |||
Gn, desc='reindex vertices', file=sys.stdout)] | |||
if p == None and q == None: # p and q are uniform distributions as default. | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(Gn,), n_jobs=n_jobs) | |||
return Kmatrix | |||
def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||
edge_label, lmda, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||
node_label, edge_kernels, edge_label, lmda) | |||
def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||
edge_kernels, edge_label, lmda): | |||
# Frist, ompute kernels between all pairs of nodes, method borrowed | |||
# from FCSP. It is faster than directly computing all edge kernels | |||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | |||
# graphs compared, which is the most case we went though. For very | |||
# sparse graphs, this would be slow. | |||
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label) | |||
# Compute weight matrix of the direct product graph. | |||
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs, | |||
edge_kernels, edge_label) | |||
# use uniform distribution if there is no prior knowledge. | |||
p_times_uni = 1 / w_dim | |||
p_times = np.full((w_dim, 1), p_times_uni) | |||
x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times), | |||
xtol=1e-06, maxiter=1000) | |||
# use uniform distribution if there is no prior knowledge. | |||
q_times = np.full((1, w_dim), p_times_uni) | |||
return np.dot(q_times, x) | |||
def func_fp(x, p_times, lmda, w_times): | |||
haha = w_times * x | |||
haha = lmda * haha | |||
haha = p_times + haha | |||
return p_times + lmda * np.dot(w_times, x) | |||
############################################################################### | |||
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs): | |||
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using | |||
spectral decomposition method. Labels will be ignored. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
Return | |||
------ | |||
kernel : float | |||
Kernel between 2 graphs. | |||
""" | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
if q == None: | |||
# precompute the spectral decomposition of each graph. | |||
P_list = [] | |||
D_list = [] | |||
for G in tqdm(Gn, desc='spectral decompose', file=sys.stdout): | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A accually is the transpose of the adjacency matrix. | |||
A = nx.adjacency_matrix(G, eweight).todense().transpose() | |||
ew, ev = np.linalg.eig(A) | |||
D_list.append(ew) | |||
P_list.append(ev) | |||
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? | |||
if p == None: # p is uniform distribution as default. | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn] | |||
# q_T_list = [q.T for q in q_list] | |||
def init_worker(q_T_toshare, P_toshare, D_toshare): | |||
global G_q_T, G_P, G_D | |||
G_q_T = q_T_toshare | |||
G_P = P_toshare | |||
G_D = D_toshare | |||
do_partial = partial(wrapper_sd_do, weight, sub_kernel) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs) | |||
# pbar = tqdm( | |||
# total=(1 + len(Gn)) * len(Gn) / 2, | |||
# desc='calculating kernels', | |||
# file=sys.stdout) | |||
# for i in range(0, len(Gn)): | |||
# for j in range(i, len(Gn)): | |||
# result = _sd_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], | |||
# D_list[i], D_list[j], weight, sub_kernel) | |||
# Kmatrix[i][j] = result | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
# pbar.update(1) | |||
return Kmatrix | |||
def wrapper_sd_do(weight, sub_kernel, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, _sd_do(G_q_T[i], G_q_T[j], G_P[i], G_P[j], G_D[i], G_D[j], | |||
weight, sub_kernel) | |||
def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | |||
# use uniform distribution if there is no prior knowledge. | |||
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense() | |||
# @todo: this is not be needed when p = q (kr = kl.T) for undirected graphs | |||
# kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense() | |||
if sub_kernel == 'exp': | |||
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2]) | |||
kmiddle = np.diag(np.exp(weight * D_diag)) | |||
elif sub_kernel == 'geo': | |||
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2]) | |||
kmiddle = np.diag(weight * D_diag) | |||
kmiddle = np.identity(len(kmiddle)) - weight * kmiddle | |||
kmiddle = np.linalg.inv(kmiddle) | |||
return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0] | |||
############################################################################### | |||
def _randomwalkkernel_kron(G1, G2, node_label, edge_label): | |||
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
Graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
Return | |||
------ | |||
kernel : float | |||
Kernel between 2 graphs. | |||
""" | |||
pass | |||
############################################################################### | |||
def getLabels(Gn, node_label, edge_label, directed): | |||
"""Get symbolic labels of a graph dataset, where vertex labels are dealt | |||
with by concatenating them to the edge labels of adjacent edges. | |||
""" | |||
label_list = [] | |||
label_set = set() | |||
for g in Gn: | |||
label_g = {} | |||
for e in g.edges(data=True): | |||
nl1 = g.node[e[0]][node_label] | |||
nl2 = g.node[e[1]][node_label] | |||
if not directed and nl1 > nl2: | |||
nl1, nl2 = nl2, nl1 | |||
label = (nl1, e[2][edge_label], nl2) | |||
label_g[(e[0], e[1])] = label | |||
label_list.append(label_g) | |||
label_set = set([l for lg in label_list for l in lg.values()]) | |||
return label_list, len(label_set) | |||
def filterGramMatrix(gmt, label_dict, label, directed): | |||
"""Compute (the transpose of) the Gram matrix filtered by a label. | |||
""" | |||
gmf = np.zeros(gmt.shape) | |||
for (n1, n2), l in label_dict.items(): | |||
if l == label: | |||
gmf[n2, n1] = gmt[n2, n1] | |||
if not directed: | |||
gmf[n1, n2] = gmt[n1, n2] | |||
return gmf | |||
def computeVK(g1, g2, ds_attrs, node_kernels, node_label): | |||
'''Compute vertex kernels between vertices of two graphs. | |||
''' | |||
vk_dict = {} # shortest path matrices dict | |||
if ds_attrs['node_labeled']: | |||
# node symb and non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['mix'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn( | |||
n1[1][node_label], n2[1][node_label], | |||
n1[1]['attributes'], n2[1]['attributes']) | |||
# node symb labeled | |||
else: | |||
kn = node_kernels['symb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label], | |||
n2[1][node_label]) | |||
else: | |||
# node non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['nsymb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'], | |||
n2[1]['attributes']) | |||
# node unlabeled | |||
else: | |||
pass | |||
return vk_dict | |||
def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label): | |||
'''Compute weight matrix of the direct product graph. | |||
''' | |||
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | |||
w_times = np.zeros((w_dim, w_dim)) | |||
if vk_dict: # node labeled | |||
if ds_attrs['is_directed']: | |||
if ds_attrs['edge_labeled']: | |||
# edge symb and non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['mix'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label], | |||
e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[1])] | |||
# edge symb labeled | |||
else: | |||
ke = edge_kernels['symb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label]) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[1])] | |||
else: | |||
# edge non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['nsymb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[1])] | |||
# edge unlabeled | |||
else: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* vk_dict[(e1[1], e2[1])] | |||
else: # undirected | |||
if ds_attrs['edge_labeled']: | |||
# edge symb and non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['mix'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label], | |||
e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[1])] \ | |||
+ vk_dict[(e1[0], e2[1])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[0])] | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
# edge symb labeled | |||
else: | |||
ke = edge_kernels['symb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label]) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[1])] \ | |||
+ vk_dict[(e1[0], e2[1])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[0])] | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
else: | |||
# edge non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['nsymb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[1])] \ | |||
+ vk_dict[(e1[0], e2[1])] \ | |||
* ek_temp * vk_dict[(e1[1], e2[0])] | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
# edge unlabeled | |||
else: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \ | |||
* vk_dict[(e1[1], e2[1])] \ | |||
+ vk_dict[(e1[0], e2[1])] \ | |||
* vk_dict[(e1[1], e2[0])] | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
else: # node unlabeled | |||
if ds_attrs['is_directed']: | |||
if ds_attrs['edge_labeled']: | |||
# edge symb and non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['mix'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label], | |||
e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp | |||
# edge symb labeled | |||
else: | |||
ke = edge_kernels['symb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label]) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp | |||
else: | |||
# edge non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['nsymb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp | |||
# edge unlabeled | |||
else: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = 1 | |||
else: # undirected | |||
if ds_attrs['edge_labeled']: | |||
# edge symb and non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['mix'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label], | |||
e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
# edge symb labeled | |||
else: | |||
ke = edge_kernels['symb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label]) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
else: | |||
# edge non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['nsymb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes']) | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = ek_temp | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
# edge unlabeled | |||
else: | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], | |||
e1[1] * nx.number_of_nodes(g2) + e2[1]) | |||
w_times[w_idx] = 1 | |||
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]] | |||
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], | |||
e1[1] * nx.number_of_nodes(g2) + e2[0]) | |||
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]] | |||
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]] | |||
return w_times, w_dim |
@@ -1,200 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Dec 21 18:02:00 2018 | |||
@author: ljia | |||
""" | |||
import sys | |||
import time | |||
from itertools import product | |||
from functools import partial | |||
from multiprocessing import Pool | |||
from tqdm import tqdm | |||
import networkx as nx | |||
import numpy as np | |||
from gklearn.utils.utils import getSPGraph | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.parallel import parallel_gm | |||
sys.path.insert(0, "../") | |||
def spkernel(*args, | |||
node_label='atom', | |||
edge_weight=None, | |||
node_kernels=None, | |||
n_jobs=None): | |||
"""Calculate shortest-path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_weight : string | |||
Edge attribute name corresponding to the edge weight. | |||
node_kernels: dict | |||
A dictionary of kernel functions for nodes, including 3 items: 'symb' | |||
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' | |||
for both labels. The first 2 functions take two node labels as | |||
parameters, and the 'mix' function takes 4 parameters, a symbolic and a | |||
non-symbolic label for each the two nodes. Each label is in form of 2-D | |||
dimension array (n_samples, n_features). Each function returns an | |||
number as the kernel value. Ignored when nodes are unlabeled. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the sp kernel between 2 praphs. | |||
""" | |||
# pre-process | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
weight = None | |||
if edge_weight is None: | |||
print('\n None edge weight specified. Set all weight to 1.\n') | |||
else: | |||
try: | |||
some_weight = list( | |||
nx.get_edge_attributes(Gn[0], edge_weight).values())[0] | |||
if isinstance(some_weight, (float, int)): | |||
weight = edge_weight | |||
else: | |||
print( | |||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||
% edge_weight) | |||
except: | |||
print( | |||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||
% edge_weight) | |||
ds_attrs = get_dataset_attributes( | |||
Gn, | |||
attr_names=['node_labeled', 'node_attr_dim', 'is_directed'], | |||
node_label=node_label) | |||
ds_attrs['node_attr_dim'] = 0 | |||
# remove graphs with no edges, as no sp can be found in their structures, | |||
# so the kernel between such a graph and itself will be zero. | |||
len_gn = len(Gn) | |||
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||
idx = [G[0] for G in Gn] | |||
Gn = [G[1] for G in Gn] | |||
if len(Gn) != len_gn: | |||
print('\n %d graphs are removed as they don\'t contain edges.\n' % | |||
(len_gn - len(Gn))) | |||
start_time = time.time() | |||
pool = Pool(n_jobs) | |||
# get shortest path graphs of Gn | |||
getsp_partial = partial(wrapper_getSPGraph, weight) | |||
itr = zip(Gn, range(0, len(Gn))) | |||
if len(Gn) < 100 * n_jobs: | |||
# # use default chunksize as pool.map when iterable is less than 100 | |||
# chunksize, extra = divmod(len(Gn), n_jobs * 4) | |||
# if extra: | |||
# chunksize += 1 | |||
chunksize = int(len(Gn) / n_jobs) + 1 | |||
else: | |||
chunksize = 100 | |||
for i, g in tqdm( | |||
pool.imap_unordered(getsp_partial, itr, chunksize), | |||
desc='getting sp graphs', file=sys.stdout): | |||
Gn[i] = g | |||
pool.close() | |||
pool.join() | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
# ---- use pool.imap_unordered to parallel and track progress. ---- | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(Gn,), n_jobs=n_jobs) | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time, idx | |||
def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels): | |||
kernel = 0 | |||
# compute shortest path matrices first, method borrowed from FCSP. | |||
vk_dict = {} # shortest path matrices dict | |||
if ds_attrs['node_labeled']: | |||
# node symb and non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['mix'] | |||
for n1, n2 in product( | |||
g1.nodes(data=True), g2.nodes(data=True)): | |||
vk_dict[(n1[0], n2[0])] = kn( | |||
n1[1][node_label], n2[1][node_label], | |||
n1[1]['attributes'], n2[1]['attributes']) | |||
# node symb labeled | |||
else: | |||
kn = node_kernels['symb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label], | |||
n2[1][node_label]) | |||
else: | |||
# node non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['nsymb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'], | |||
n2[1]['attributes']) | |||
# node unlabeled | |||
else: | |||
for e1, e2 in product( | |||
g1.edges(data=True), g2.edges(data=True)): | |||
if e1[2]['cost'] == e2[2]['cost']: | |||
kernel += 1 | |||
return kernel | |||
# compute graph kernels | |||
if ds_attrs['is_directed']: | |||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | |||
if e1[2]['cost'] == e2[2]['cost']: | |||
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], | |||
e2[1])] | |||
kn1 = nk11 * nk22 | |||
kernel += kn1 | |||
else: | |||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | |||
if e1[2]['cost'] == e2[2]['cost']: | |||
# each edge walk is counted twice, starting from both its extreme nodes. | |||
nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[( | |||
e1[0], e2[1])], vk_dict[(e1[1], | |||
e2[0])], vk_dict[(e1[1], | |||
e2[1])] | |||
kn1 = nk11 * nk22 | |||
kn2 = nk12 * nk21 | |||
kernel += kn1 + kn2 | |||
return kernel | |||
def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, spkernel_do(G_gn[i], G_gn[j], ds_attrs, node_label, node_kernels) | |||
def wrapper_getSPGraph(weight, itr_item): | |||
g = itr_item[0] | |||
i = itr_item[1] | |||
return i, getSPGraph(g, edge_weight=weight) |
@@ -1,464 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Sun Dec 23 16:42:48 2018 | |||
@author: ljia | |||
""" | |||
import sys | |||
import time | |||
from itertools import combinations, product | |||
from functools import partial | |||
from multiprocessing import Pool | |||
from tqdm import tqdm | |||
import networkx as nx | |||
import numpy as np | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.parallel import parallel_gm | |||
sys.path.insert(0, "../") | |||
def structuralspkernel(*args, | |||
node_label='atom', | |||
edge_weight=None, | |||
edge_label='bond_type', | |||
node_kernels=None, | |||
edge_kernels=None, | |||
n_jobs=None): | |||
"""Calculate mean average structural shortest path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_weight : string | |||
Edge attribute name corresponding to the edge weight. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
node_kernels: dict | |||
A dictionary of kernel functions for nodes, including 3 items: 'symb' | |||
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' | |||
for both labels. The first 2 functions take two node labels as | |||
parameters, and the 'mix' function takes 4 parameters, a symbolic and a | |||
non-symbolic label for each the two nodes. Each label is in form of 2-D | |||
dimension array (n_samples, n_features). Each function returns a number | |||
as the kernel value. Ignored when nodes are unlabeled. | |||
edge_kernels: dict | |||
A dictionary of kernel functions for edges, including 3 items: 'symb' | |||
for symbolic edge labels, 'nsymb' for non-symbolic edge labels, 'mix' | |||
for both labels. The first 2 functions take two edge labels as | |||
parameters, and the 'mix' function takes 4 parameters, a symbolic and a | |||
non-symbolic label for each the two edges. Each label is in form of 2-D | |||
dimension array (n_samples, n_features). Each function returns a number | |||
as the kernel value. Ignored when edges are unlabeled. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the mean average structural | |||
shortest path kernel between 2 praphs. | |||
""" | |||
# pre-process | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
weight = None | |||
if edge_weight is None: | |||
print('\n None edge weight specified. Set all weight to 1.\n') | |||
else: | |||
try: | |||
some_weight = list( | |||
nx.get_edge_attributes(Gn[0], edge_weight).values())[0] | |||
if isinstance(some_weight, (float, int)): | |||
weight = edge_weight | |||
else: | |||
print( | |||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||
% edge_weight) | |||
except: | |||
print( | |||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||
% edge_weight) | |||
ds_attrs = get_dataset_attributes( | |||
Gn, | |||
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | |||
'edge_attr_dim', 'is_directed'], | |||
node_label=node_label, edge_label=edge_label) | |||
ds_attrs['node_attr_dim'] = 0 | |||
ds_attrs['edge_attr_dim'] = 0 | |||
start_time = time.time() | |||
# get shortest paths of each graph in Gn | |||
splist = [None] * len(Gn) | |||
pool = Pool(n_jobs) | |||
# get shortest path graphs of Gn | |||
getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed']) | |||
itr = zip(Gn, range(0, len(Gn))) | |||
if len(Gn) < 100 * n_jobs: | |||
chunksize = int(len(Gn) / n_jobs) + 1 | |||
else: | |||
chunksize = 100 | |||
# chunksize = 300 # int(len(list(itr)) / n_jobs) | |||
for i, sp in tqdm( | |||
pool.imap_unordered(getsp_partial, itr, chunksize), | |||
desc='getting shortest paths', | |||
file=sys.stdout): | |||
splist[i] = sp | |||
# time.sleep(10) | |||
pool.close() | |||
pool.join() | |||
# # get shortest paths of each graph in Gn | |||
# splist = [[] for _ in range(len(Gn))] | |||
# # get shortest path graphs of Gn | |||
# getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed']) | |||
# itr = zip(Gn, range(0, len(Gn))) | |||
# if len(Gn) < 1000 * n_jobs: | |||
# chunksize = int(len(Gn) / n_jobs) + 1 | |||
# else: | |||
# chunksize = 1000 | |||
# # chunksize = 300 # int(len(list(itr)) / n_jobs) | |||
# from contextlib import closing | |||
# with closing(Pool(n_jobs)) as pool: | |||
## for i, sp in tqdm( | |||
# res = pool.imap_unordered(getsp_partial, itr, 10) | |||
## desc='getting shortest paths', | |||
## file=sys.stdout): | |||
## splist[i] = sp | |||
## time.sleep(10) | |||
# pool.close() | |||
# pool.join() | |||
# ss = 0 | |||
# ss += sys.getsizeof(splist) | |||
# for spss in splist: | |||
# ss += sys.getsizeof(spss) | |||
# for spp in spss: | |||
# ss += sys.getsizeof(spp) | |||
# time.sleep(20) | |||
# # ---- direct running, normally use single CPU core. ---- | |||
# splist = [] | |||
# for g in tqdm(Gn, desc='getting sp graphs', file=sys.stdout): | |||
# splist.append(get_shortest_paths(g, weight, ds_attrs['is_directed'])) | |||
# # ---- only for the Fast Computation of Shortest Path Kernel (FCSP) | |||
# sp_ml = [0] * len(Gn) # shortest path matrices | |||
# for i in result_sp: | |||
# sp_ml[i[0]] = i[1] | |||
# edge_x_g = [[] for i in range(len(sp_ml))] | |||
# edge_y_g = [[] for i in range(len(sp_ml))] | |||
# edge_w_g = [[] for i in range(len(sp_ml))] | |||
# for idx, item in enumerate(sp_ml): | |||
# for i1 in range(len(item)): | |||
# for i2 in range(i1 + 1, len(item)): | |||
# if item[i1, i2] != np.inf: | |||
# edge_x_g[idx].append(i1) | |||
# edge_y_g[idx].append(i2) | |||
# edge_w_g[idx].append(item[i1, i2]) | |||
# print(len(edge_x_g[0])) | |||
# print(len(edge_y_g[0])) | |||
# print(len(edge_w_g[0])) | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
# ---- use pool.imap_unordered to parallel and track progress. ---- | |||
def init_worker(spl_toshare, gs_toshare): | |||
global G_spl, G_gs | |||
G_spl = spl_toshare | |||
G_gs = gs_toshare | |||
do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||
node_kernels, edge_kernels) | |||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||
glbv=(splist, Gn), n_jobs=n_jobs) | |||
# # ---- use pool.imap_unordered to parallel and track progress. ---- | |||
# pool = Pool(n_jobs) | |||
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||
# node_kernels, edge_kernels) | |||
# itr = zip(combinations_with_replacement(Gn, 2), | |||
# combinations_with_replacement(splist, 2), | |||
# combinations_with_replacement(range(0, len(Gn)), 2)) | |||
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
# if len_itr < 1000 * n_jobs: | |||
# chunksize = int(len_itr / n_jobs) + 1 | |||
# else: | |||
# chunksize = 1000 | |||
# for i, j, kernel in tqdm( | |||
# pool.imap_unordered(do_partial, itr, chunksize), | |||
# desc='calculating kernels', | |||
# file=sys.stdout): | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
# pool.close() | |||
# pool.join() | |||
# # ---- use pool.map to parallel. ---- | |||
# pool = Pool(n_jobs) | |||
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||
# node_kernels, edge_kernels) | |||
# itr = zip(combinations_with_replacement(Gn, 2), | |||
# combinations_with_replacement(splist, 2), | |||
# combinations_with_replacement(range(0, len(Gn)), 2)) | |||
# for i, j, kernel in tqdm( | |||
# pool.map(do_partial, itr), desc='calculating kernels', | |||
# file=sys.stdout): | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
# pool.close() | |||
# pool.join() | |||
# # ---- use pool.imap_unordered to parallel and track progress. ---- | |||
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label, | |||
# node_kernels, edge_kernels) | |||
# itr = zip(combinations_with_replacement(Gn, 2), | |||
# combinations_with_replacement(splist, 2), | |||
# combinations_with_replacement(range(0, len(Gn)), 2)) | |||
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
# if len_itr < 1000 * n_jobs: | |||
# chunksize = int(len_itr / n_jobs) + 1 | |||
# else: | |||
# chunksize = 1000 | |||
# from contextlib import closing | |||
# with closing(Pool(n_jobs)) as pool: | |||
# for i, j, kernel in tqdm( | |||
# pool.imap_unordered(do_partial, itr, 1000), | |||
# desc='calculating kernels', | |||
# file=sys.stdout): | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
# pool.close() | |||
# pool.join() | |||
# # ---- direct running, normally use single CPU core. ---- | |||
# from itertools import combinations_with_replacement | |||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||
# kernel = structuralspkernel_do(Gn[i], Gn[j], splist[i], splist[j], | |||
# ds_attrs, node_label, edge_label, node_kernels, edge_kernels) | |||
## if(kernel > 1): | |||
## print("error here ") | |||
# Kmatrix[i][j] = kernel | |||
# Kmatrix[j][i] = kernel | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time | |||
def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label, | |||
node_kernels, edge_kernels): | |||
kernel = 0 | |||
# First, compute shortest path matrices, method borrowed from FCSP. | |||
vk_dict = {} # shortest path matrices dict | |||
if ds_attrs['node_labeled']: | |||
# node symb and non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['mix'] | |||
for n1, n2 in product( | |||
g1.nodes(data=True), g2.nodes(data=True)): | |||
vk_dict[(n1[0], n2[0])] = kn( | |||
n1[1][node_label], n2[1][node_label], | |||
n1[1]['attributes'], n2[1]['attributes']) | |||
# node symb labeled | |||
else: | |||
kn = node_kernels['symb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label], | |||
n2[1][node_label]) | |||
else: | |||
# node non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['nsymb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'], | |||
n2[1]['attributes']) | |||
# node unlabeled | |||
else: | |||
pass | |||
# Then, compute kernels between all pairs of edges, which idea is an | |||
# extension of FCSP. It suits sparse graphs, which is the most case we | |||
# went though. For dense graphs, this would be slow. | |||
ek_dict = {} # dict of edge kernels | |||
if ds_attrs['edge_labeled']: | |||
# edge symb and non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['mix'] | |||
for e1, e2 in product( | |||
g1.edges(data=True), g2.edges(data=True)): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label], | |||
e1[2]['attributes'], e2[2]['attributes']) | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||
# edge symb labeled | |||
else: | |||
ke = edge_kernels['symb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label]) | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||
else: | |||
# edge non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['nsymb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_temp = kn(e1[2]['attributes'], e2[2]['attributes']) | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||
# edge unlabeled | |||
else: | |||
pass | |||
# compute graph kernels | |||
if vk_dict: | |||
if ek_dict: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
kpath = vk_dict[(p1[0], p2[0])] | |||
if kpath: | |||
for idx in range(1, len(p1)): | |||
kpath *= vk_dict[(p1[idx], p2[idx])] * \ | |||
ek_dict[((p1[idx-1], p1[idx]), | |||
(p2[idx-1], p2[idx]))] | |||
if not kpath: | |||
break | |||
kernel += kpath # add up kernels of all paths | |||
else: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
kpath = vk_dict[(p1[0], p2[0])] | |||
if kpath: | |||
for idx in range(1, len(p1)): | |||
kpath *= vk_dict[(p1[idx], p2[idx])] | |||
if not kpath: | |||
break | |||
kernel += kpath # add up kernels of all paths | |||
else: | |||
if ek_dict: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
if len(p1) == 0: | |||
kernel += 1 | |||
else: | |||
kpath = 1 | |||
for idx in range(0, len(p1) - 1): | |||
kpath *= ek_dict[((p1[idx], p1[idx+1]), | |||
(p2[idx], p2[idx+1]))] | |||
if not kpath: | |||
break | |||
kernel += kpath # add up kernels of all paths | |||
else: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
kernel += 1 | |||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||
# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation | |||
# # compute vertex kernel matrix | |||
# try: | |||
# vk_mat = np.zeros((nx.number_of_nodes(g1), | |||
# nx.number_of_nodes(g2))) | |||
# g1nl = enumerate(g1.nodes(data=True)) | |||
# g2nl = enumerate(g2.nodes(data=True)) | |||
# for i1, n1 in g1nl: | |||
# for i2, n2 in g2nl: | |||
# vk_mat[i1][i2] = kn( | |||
# n1[1][node_label], n2[1][node_label], | |||
# [n1[1]['attributes']], [n2[1]['attributes']]) | |||
# range1 = range(0, len(edge_w_g[i])) | |||
# range2 = range(0, len(edge_w_g[j])) | |||
# for i1 in range1: | |||
# x1 = edge_x_g[i][i1] | |||
# y1 = edge_y_g[i][i1] | |||
# w1 = edge_w_g[i][i1] | |||
# for i2 in range2: | |||
# x2 = edge_x_g[j][i2] | |||
# y2 = edge_y_g[j][i2] | |||
# w2 = edge_w_g[j][i2] | |||
# ke = (w1 == w2) | |||
# if ke > 0: | |||
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2] | |||
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2] | |||
# Kmatrix += kn1 + kn2 | |||
return kernel | |||
def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels, | |||
edge_kernels, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, structuralspkernel_do(G_gs[i], G_gs[j], G_spl[i], G_spl[j], | |||
ds_attrs, node_label, edge_label, | |||
node_kernels, edge_kernels) | |||
def get_shortest_paths(G, weight, directed): | |||
"""Get all shortest paths of a graph. | |||
Parameters | |||
---------- | |||
G : NetworkX graphs | |||
The graphs whose paths are calculated. | |||
weight : string/None | |||
edge attribute used as weight to calculate the shortest path. | |||
directed: boolean | |||
Whether graph is directed. | |||
Return | |||
------ | |||
sp : list of list | |||
List of shortest paths of the graph, where each path is represented by a list of nodes. | |||
""" | |||
sp = [] | |||
for n1, n2 in combinations(G.nodes(), 2): | |||
try: | |||
spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight)) | |||
except nx.NetworkXNoPath: # nodes not connected | |||
# sp.append([]) | |||
pass | |||
else: | |||
sp += spltemp | |||
# each edge walk is counted twice, starting from both its extreme nodes. | |||
if not directed: | |||
sp += [sptemp[::-1] for sptemp in spltemp] | |||
# add single nodes as length 0 paths. | |||
sp += [[n] for n in G.nodes()] | |||
return sp | |||
def wrapper_getSP(weight, directed, itr_item): | |||
g = itr_item[0] | |||
i = itr_item[1] | |||
return i, get_shortest_paths(g, weight, directed) |
@@ -1,147 +0,0 @@ | |||
""" | |||
@author: linlin <jajupmochi@gmail.com> | |||
@references: | |||
[1] Tamás Horváth, Thomas Gärtner, and Stefan Wrobel. Cyclic pattern kernels for predictive graph mining. In Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining, pages 158–167. ACM, 2004. | |||
[2] Hopcroft, J.; Tarjan, R. (1973). “Efficient algorithms for graph manipulation”. Communications of the ACM 16: 372–378. doi:10.1145/362248.362272. | |||
[3] Finding all the elementary circuits of a directed graph. D. B. Johnson, SIAM Journal on Computing 4, no. 1, 77-84, 1975. http://dx.doi.org/10.1137/0204007 | |||
""" | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") | |||
import time | |||
import networkx as nx | |||
import numpy as np | |||
from tqdm import tqdm | |||
def cyclicpatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None): | |||
"""Calculate cyclic pattern graph kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
labeled : boolean | |||
Whether the graphs are labeled. The default is True. | |||
depth : integer | |||
Depth of search. Longest length of paths. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the path kernel up to d between 2 praphs. | |||
""" | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
start_time = time.time() | |||
# get all cyclic and tree patterns of all graphs before calculating kernels to save time, but this may consume a lot of memory for large dataset. | |||
all_patterns = [ get_patterns(Gn[i], node_label=node_label, edge_label = edge_label, labeled = labeled, cycle_bound = cycle_bound) | |||
for i in tqdm(range(0, len(Gn)), desc='retrieve patterns', file=sys.stdout) ] | |||
for i in tqdm(range(0, len(Gn)), desc='calculate kernels', file=sys.stdout): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = _cyclicpatternkernel_do(all_patterns[i], all_patterns[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
run_time = time.time() - start_time | |||
print("\n --- kernel matrix of cyclic pattern kernel of size %d built in %s seconds ---" % (len(Gn), run_time)) | |||
return Kmatrix, run_time | |||
def _cyclicpatternkernel_do(patterns1, patterns2): | |||
"""Calculate path graph kernels up to depth d between 2 graphs. | |||
Parameters | |||
---------- | |||
paths1, paths2 : list | |||
List of paths in 2 graphs, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path. | |||
k_func : function | |||
A kernel function used using different notions of fingerprint similarity. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
labeled : boolean | |||
Whether the graphs are labeled. The default is True. | |||
Return | |||
------ | |||
kernel : float | |||
Treelet Kernel between 2 graphs. | |||
""" | |||
return len(set(patterns1) & set(patterns2)) | |||
def get_patterns(G, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None): | |||
"""Find all cyclic and tree patterns in a graph. | |||
Parameters | |||
---------- | |||
G : NetworkX graphs | |||
The graph in which paths are searched. | |||
length : integer | |||
The maximum length of paths. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
labeled : boolean | |||
Whether the graphs are labeled. The default is True. | |||
Return | |||
------ | |||
path : list | |||
List of paths retrieved, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path. | |||
""" | |||
number_simplecycles = 0 | |||
bridges = nx.Graph() | |||
patterns = [] | |||
bicomponents = nx.biconnected_component_subgraphs(G) # all biconnected components of G. this function use algorithm in reference [2], which (i guess) is slightly different from the one used in paper [1] | |||
for subgraph in bicomponents: | |||
if nx.number_of_edges(subgraph) > 1: | |||
simple_cycles = list(nx.simple_cycles(G.to_directed())) # all simple cycles in biconnected components. this function use algorithm in reference [3], which has time complexity O((n+e)(N+1)) for n nodes, e edges and N simple cycles. Which might be slower than the algorithm applied in paper [1] | |||
if cycle_bound != None and len(simple_cycles) > cycle_bound - number_simplecycles: # in paper [1], when applying another algorithm (subroutine RT), this becomes len(simple_cycles) == cycle_bound - number_simplecycles + 1, check again. | |||
return [] | |||
else: | |||
# calculate canonical representation for each simple cycle | |||
all_canonkeys = [] | |||
for cycle in simple_cycles: | |||
canonlist = [ G.node[node][node_label] + G[node][cycle[cycle.index(node) + 1]][edge_label] for node in cycle[:-1] ] | |||
canonkey = ''.join(canonlist) | |||
canonkey = canonkey if canonkey < canonkey[::-1] else canonkey[::-1] | |||
for i in range(1, len(cycle[:-1])): | |||
canonlist = [ G.node[node][node_label] + G[node][cycle[cycle.index(node) + 1]][edge_label] for node in cycle[i:-1] + cycle[:i] ] | |||
canonkey_t = ''.join(canonlist) | |||
canonkey_t = canonkey_t if canonkey_t < canonkey_t[::-1] else canonkey_t[::-1] | |||
canonkey = canonkey if canonkey < canonkey_t else canonkey_t | |||
all_canonkeys.append(canonkey) | |||
patterns = list(set(patterns) | set(all_canonkeys)) | |||
number_simplecycles += len(simple_cycles) | |||
else: | |||
bridges.add_edges_from(subgraph.edges(data=True)) | |||
# calculate canonical representation for each connected component in bridge set | |||
components = list(nx.connected_component_subgraphs(bridges)) # all connected components in the bridge | |||
tree_patterns = [] | |||
for tree in components: | |||
break | |||
# patterns += pi(bridges) | |||
return patterns |
@@ -1,234 +0,0 @@ | |||
""" | |||
@author: linlin | |||
@references: Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360). | |||
""" | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") | |||
import time | |||
import itertools | |||
from tqdm import tqdm | |||
import networkx as nx | |||
import numpy as np | |||
from gklearn.kernels.deltaKernel import deltakernel | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
def pathkernel(*args, node_label='atom', edge_label='bond_type'): | |||
"""Calculate mean average path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
Return | |||
------ | |||
Kmatrix/kernel : Numpy matrix/float | |||
Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs. | |||
""" | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
ds_attrs = get_dataset_attributes( | |||
Gn, | |||
attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | |||
node_label=node_label, | |||
edge_label=edge_label) | |||
try: | |||
some_weight = list(nx.get_edge_attributes(Gn[0], | |||
edge_label).values())[0] | |||
weight = edge_label if isinstance(some_weight, float) or isinstance( | |||
some_weight, int) else None | |||
except: | |||
weight = None | |||
start_time = time.time() | |||
splist = [ | |||
get_shortest_paths(Gn[i], weight) for i in tqdm( | |||
range(0, len(Gn)), desc='getting shortest paths', file=sys.stdout) | |||
] | |||
pbar = tqdm( | |||
total=((len(Gn) + 1) * len(Gn) / 2), | |||
desc='calculating kernels', | |||
file=sys.stdout) | |||
if ds_attrs['node_labeled']: | |||
if ds_attrs['edge_labeled']: | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = _pathkernel_do_l(Gn[i], Gn[j], splist[i], | |||
splist[j], node_label, | |||
edge_label) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
pbar.update(1) | |||
else: | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = _pathkernel_do_nl(Gn[i], Gn[j], splist[i], | |||
splist[j], node_label) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
pbar.update(1) | |||
else: | |||
if ds_attrs['edge_labeled']: | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = _pathkernel_do_el(Gn[i], Gn[j], splist[i], | |||
splist[j], edge_label) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
pbar.update(1) | |||
else: | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = _pathkernel_do_unl(Gn[i], Gn[j], splist[i], | |||
splist[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
pbar.update(1) | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- mean average path kernel matrix of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time | |||
def _pathkernel_do_l(G1, G2, sp1, sp2, node_label, edge_label): | |||
"""Calculate mean average path kernel between 2 fully-labeled graphs. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
sp1, sp2 : list of list | |||
List of shortest paths of 2 graphs, where each path is represented by a list of nodes. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
Return | |||
------ | |||
kernel : float | |||
Path Kernel between 2 graphs. | |||
""" | |||
# calculate kernel | |||
kernel = 0 | |||
# if len(sp1) == 0 or len(sp2) == 0: | |||
# return 0 # @todo: should it be zero? | |||
for path1 in sp1: | |||
for path2 in sp2: | |||
if len(path1) == len(path2): | |||
kernel_path = (G1.node[path1[0]][node_label] == G2.node[path2[ | |||
0]][node_label]) | |||
if kernel_path: | |||
for i in range(1, len(path1)): | |||
# kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0 | |||
if G1[path1[i - 1]][path1[i]][edge_label] != G2[path2[i - 1]][path2[i]][edge_label] or G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]: | |||
kernel_path = 0 | |||
break | |||
kernel += kernel_path # add up kernels of all paths | |||
kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average | |||
return kernel | |||
def _pathkernel_do_nl(G1, G2, sp1, sp2, node_label): | |||
"""Calculate mean average path kernel between 2 node-labeled graphs. | |||
""" | |||
# calculate kernel | |||
kernel = 0 | |||
# if len(sp1) == 0 or len(sp2) == 0: | |||
# return 0 # @todo: should it be zero? | |||
for path1 in sp1: | |||
for path2 in sp2: | |||
if len(path1) == len(path2): | |||
kernel_path = 1 | |||
for i in range(0, len(path1)): | |||
# kernel = 1 if all corresponding nodes in the 2 paths have same labels, otherwise 0 | |||
if G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]: | |||
kernel_path = 0 | |||
break | |||
kernel += kernel_path | |||
kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average | |||
return kernel | |||
def _pathkernel_do_el(G1, G2, sp1, sp2, edge_label): | |||
"""Calculate mean average path kernel between 2 edge-labeled graphs. | |||
""" | |||
# calculate kernel | |||
kernel = 0 | |||
for path1 in sp1: | |||
for path2 in sp2: | |||
if len(path1) == len(path2): | |||
if len(path1) == 0: | |||
kernel += 1 | |||
else: | |||
kernel_path = 1 | |||
for i in range(0, len(path1) - 1): | |||
# kernel = 1 if all corresponding edges in the 2 paths have same labels, otherwise 0 | |||
if G1[path1[i]][path1[i + 1]][edge_label] != G2[path2[ | |||
i]][path2[i + 1]][edge_label]: | |||
kernel_path = 0 | |||
break | |||
kernel += kernel_path | |||
kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average | |||
return kernel | |||
def _pathkernel_do_unl(G1, G2, sp1, sp2): | |||
"""Calculate mean average path kernel between 2 unlabeled graphs. | |||
""" | |||
# calculate kernel | |||
kernel = 0 | |||
for path1 in sp1: | |||
for path2 in sp2: | |||
if len(path1) == len(path2): | |||
kernel += 1 | |||
kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average | |||
return kernel | |||
def get_shortest_paths(G, weight): | |||
"""Get all shortest paths of a graph. | |||
Parameters | |||
---------- | |||
G : NetworkX graphs | |||
The graphs whose paths are calculated. | |||
weight : string/None | |||
edge attribute used as weight to calculate the shortest path. | |||
Return | |||
------ | |||
sp : list of list | |||
List of shortest paths of the graph, where each path is represented by a list of nodes. | |||
""" | |||
sp = [] | |||
for n1, n2 in itertools.combinations(G.nodes(), 2): | |||
try: | |||
sp.append(nx.shortest_path(G, n1, n2, weight=weight)) | |||
except nx.NetworkXNoPath: # nodes not connected | |||
sp.append([]) | |||
# add single nodes as length 0 paths. | |||
sp += [[n] for n in G.nodes()] | |||
return sp |
@@ -1,241 +0,0 @@ | |||
""" | |||
@author: linlin | |||
@references: Pierre Mahé and Jean-Philippe Vert. Graph kernels based on tree patterns for molecules. Machine learning, 75(1):3–35, 2009. | |||
""" | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../") | |||
import time | |||
import networkx as nx | |||
import numpy as np | |||
from collections import Counter | |||
from tqdm import tqdm | |||
tqdm.monitor_interval = 0 | |||
from gklearn.utils.utils import untotterTransformation | |||
def treepatternkernel(*args, | |||
node_label='atom', | |||
edge_label='bond_type', | |||
labeled=True, | |||
kernel_type='untiln', | |||
lmda=1, | |||
h=1, | |||
remove_totters=True): | |||
"""Calculate tree pattern graph kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
labeled : boolean | |||
Whether the graphs are labeled. The default is True. | |||
kernel_type : string | |||
Type of tree pattern kernel, could be 'untiln', 'size' or 'branching'. | |||
lmda : float | |||
Weight to decide whether linear patterns or trees pattern of increasing complexity are favored. | |||
h : integer | |||
The upper bound of the height of tree patterns. | |||
remove_totters : boolean | |||
whether to remove totters. The default value is True. | |||
Return | |||
------ | |||
Kmatrix: Numpy matrix | |||
Kernel matrix, each element of which is the tree pattern graph kernel between 2 praphs. | |||
""" | |||
if h < 1: | |||
raise Exception('h > 0 is requested.') | |||
kernel_type = kernel_type.lower() | |||
# arrange all graphs in a list | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
h = int(h) | |||
start_time = time.time() | |||
if remove_totters: | |||
Gn = [untotterTransformation(G, node_label, edge_label) for G in Gn] | |||
pbar = tqdm( | |||
total=(1 + len(Gn)) * len(Gn) / 2, | |||
desc='calculate kernels', | |||
file=sys.stdout) | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
Kmatrix[i][j] = _treepatternkernel_do(Gn[i], Gn[j], node_label, | |||
edge_label, labeled, | |||
kernel_type, lmda, h) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
pbar.update(1) | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- kernel matrix of tree pattern kernel of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time | |||
def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type, | |||
lmda, h): | |||
"""Calculate tree pattern graph kernels between 2 graphs. | |||
Parameters | |||
---------- | |||
paths1, paths2 : list | |||
List of paths in 2 graphs, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path. | |||
k_func : function | |||
A kernel function used using different notions of fingerprint similarity. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
labeled : boolean | |||
Whether the graphs are labeled. The default is True. | |||
kernel_type : string | |||
Type of tree pattern kernel, could be 'untiln', 'size' or 'branching'. | |||
lmda : float | |||
Weight to decide whether linear patterns or trees pattern of increasing complexity are favored. | |||
h : integer | |||
The upper bound of the height of tree patterns. | |||
Return | |||
------ | |||
kernel : float | |||
Treelet Kernel between 2 graphs. | |||
""" | |||
def matchingset(n1, n2): | |||
"""Get neiborhood matching set of two nodes in two graphs. | |||
""" | |||
def mset_com(allpairs, length): | |||
"""Find all sets R of pairs by combination. | |||
""" | |||
if length == 1: | |||
mset = [[pair] for pair in allpairs] | |||
return mset, mset | |||
else: | |||
mset, mset_l = mset_com(allpairs, length - 1) | |||
mset_tmp = [] | |||
for pairset in mset_l: # for each pair set of length l-1 | |||
nodeset1 = [pair[0] for pair in pairset | |||
] # nodes already in the set | |||
nodeset2 = [pair[1] for pair in pairset] | |||
for pair in allpairs: | |||
if (pair[0] not in nodeset1) and ( | |||
pair[1] not in nodeset2 | |||
): # nodes in R should be unique | |||
mset_tmp.append( | |||
pairset + [pair] | |||
) # add this pair to the pair set of length l-1, constructing a new set of length l | |||
nodeset1.append(pair[0]) | |||
nodeset2.append(pair[1]) | |||
mset.extend(mset_tmp) | |||
return mset, mset_tmp | |||
allpairs = [ | |||
] # all pairs those have the same node labels and edge labels | |||
for neighbor1 in G1[n1]: | |||
for neighbor2 in G2[n2]: | |||
if G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label] \ | |||
and G1[n1][neighbor1][edge_label] == G2[n2][neighbor2][edge_label]: | |||
allpairs.append([neighbor1, neighbor2]) | |||
if allpairs != []: | |||
mset, _ = mset_com(allpairs, len(allpairs)) | |||
else: | |||
mset = [] | |||
return mset | |||
def kernel_h(h): | |||
"""Calculate kernel of h-th iteration. | |||
""" | |||
if kernel_type == 'untiln': | |||
all_kh = { str(n1) + '.' + str(n2) : (G1.node[n1][node_label] == G2.node[n2][node_label]) \ | |||
for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ] | |||
all_kh_tmp = all_kh.copy() | |||
for i in range(2, h + 1): | |||
for n1 in G1.nodes(): | |||
for n2 in G2.nodes(): | |||
kh = 0 | |||
mset = all_msets[str(n1) + '.' + str(n2)] | |||
for R in mset: | |||
kh_tmp = 1 | |||
for pair in R: | |||
kh_tmp *= lmda * all_kh[str(pair[0]) | |||
+ '.' + str(pair[1])] | |||
kh += 1 / lmda * kh_tmp | |||
kh = (G1.node[n1][node_label] == G2.node[n2][ | |||
node_label]) * (1 + kh) | |||
all_kh_tmp[str(n1) + '.' + str(n2)] = kh | |||
all_kh = all_kh_tmp.copy() | |||
elif kernel_type == 'size': | |||
all_kh = { str(n1) + '.' + str(n2) : lmda * (G1.node[n1][node_label] == G2.node[n2][node_label]) \ | |||
for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ] | |||
all_kh_tmp = all_kh.copy() | |||
for i in range(2, h + 1): | |||
for n1 in G1.nodes(): | |||
for n2 in G2.nodes(): | |||
kh = 0 | |||
mset = all_msets[str(n1) + '.' + str(n2)] | |||
for R in mset: | |||
kh_tmp = 1 | |||
for pair in R: | |||
kh_tmp *= lmda * all_kh[str(pair[0]) | |||
+ '.' + str(pair[1])] | |||
kh += kh_tmp | |||
kh *= lmda * ( | |||
G1.node[n1][node_label] == G2.node[n2][node_label]) | |||
all_kh_tmp[str(n1) + '.' + str(n2)] = kh | |||
all_kh = all_kh_tmp.copy() | |||
elif kernel_type == 'branching': | |||
all_kh = { str(n1) + '.' + str(n2) : (G1.node[n1][node_label] == G2.node[n2][node_label]) \ | |||
for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ] | |||
all_kh_tmp = all_kh.copy() | |||
for i in range(2, h + 1): | |||
for n1 in G1.nodes(): | |||
for n2 in G2.nodes(): | |||
kh = 0 | |||
mset = all_msets[str(n1) + '.' + str(n2)] | |||
for R in mset: | |||
kh_tmp = 1 | |||
for pair in R: | |||
kh_tmp *= lmda * all_kh[str(pair[0]) | |||
+ '.' + str(pair[1])] | |||
kh += 1 / lmda * kh_tmp | |||
kh *= ( | |||
G1.node[n1][node_label] == G2.node[n2][node_label]) | |||
all_kh_tmp[str(n1) + '.' + str(n2)] = kh | |||
all_kh = all_kh_tmp.copy() | |||
return all_kh | |||
# calculate matching sets for every pair of nodes at first to avoid calculating in every iteration. | |||
all_msets = ({ str(node1) + '.' + str(node2) : matchingset(node1, node2) for node1 in G1.nodes() \ | |||
for node2 in G2.nodes() } if h > 1 else {}) | |||
all_kh = kernel_h(h) | |||
kernel = sum(all_kh.values()) | |||
if kernel_type == 'size': | |||
kernel = kernel / (lmda**h) | |||
return kernel |
@@ -1,403 +0,0 @@ | |||
""" | |||
@author: linlin | |||
@references: | |||
[1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61. | |||
""" | |||
import sys | |||
import pathlib | |||
from collections import Counter | |||
sys.path.insert(0, "../") | |||
import networkx as nx | |||
import numpy as np | |||
import time | |||
from gklearn.kernels.pathKernel import pathkernel | |||
def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'): | |||
"""Calculate Weisfeiler-Lehman kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
node_label : string | |||
node attribute used as label. The default node label is atom. | |||
edge_label : string | |||
edge attribute used as label. The default edge label is bond_type. | |||
height : int | |||
subtree height | |||
base_kernel : string | |||
base kernel used in each iteration of WL kernel. The default base kernel is subtree kernel. For user-defined kernel, base_kernel is the name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. | |||
Notes | |||
----- | |||
This function now supports WL subtree kernel, WL shortest path kernel and WL edge kernel. | |||
""" | |||
base_kernel = base_kernel.lower() | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
start_time = time.time() | |||
# for WL subtree kernel | |||
if base_kernel == 'subtree': | |||
Kmatrix = _wl_subtreekernel_do(args[0], node_label, edge_label, height) | |||
# for WL shortest path kernel | |||
elif base_kernel == 'sp': | |||
Kmatrix = _wl_spkernel_do(args[0], node_label, edge_label, height) | |||
# for WL edge kernel | |||
elif base_kernel == 'edge': | |||
Kmatrix = _wl_edgekernel_do(args[0], node_label, edge_label, height) | |||
# for user defined base kernel | |||
else: | |||
Kmatrix = _wl_userkernel_do(args[0], node_label, edge_label, height, base_kernel) | |||
run_time = time.time() - start_time | |||
print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" % (base_kernel, len(args[0]), run_time)) | |||
return Kmatrix, run_time | |||
def _wl_subtreekernel_do(Gn, node_label, edge_label, height): | |||
"""Calculate Weisfeiler-Lehman subtree kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
height : int | |||
subtree height. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. | |||
""" | |||
height = int(height) | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs | |||
# initial for height = 0 | |||
all_labels_ori = set() # all unique orignal labels in all graphs in this iteration | |||
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration | |||
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | |||
num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs | |||
# for each graph | |||
for G in Gn: | |||
# get the set of original labels | |||
labels_ori = list(nx.get_node_attributes(G, node_label).values()) | |||
all_labels_ori.update(labels_ori) | |||
num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph | |||
all_num_of_each_label.append(num_of_each_label) | |||
num_of_labels = len(num_of_each_label) # number of all unique labels | |||
all_labels_ori.update(labels_ori) | |||
all_num_of_labels_occured += len(all_labels_ori) | |||
# calculate subtree kernel with the 0th iteration and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys())) | |||
vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ]) | |||
vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ]) | |||
Kmatrix[i][j] += np.dot(vector1, vector2.transpose()) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
# iterate each height | |||
for h in range(1, height + 1): | |||
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | |||
num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs | |||
all_labels_ori = set() | |||
all_num_of_each_label = [] | |||
# for each graph | |||
for idx, G in enumerate(Gn): | |||
set_multisets = [] | |||
for node in G.nodes(data = True): | |||
# Multiset-label determination. | |||
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] | |||
# sorting each multiset | |||
multiset.sort() | |||
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix | |||
set_multisets.append(multiset) | |||
# label compression | |||
set_unique = list(set(set_multisets)) # set of unique multiset labels | |||
# a dictionary mapping original labels to new ones. | |||
set_compressed = {} | |||
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label | |||
for value in set_unique: | |||
if value in all_set_compressed.keys(): | |||
set_compressed.update({ value : all_set_compressed[value] }) | |||
else: | |||
set_compressed.update({ value : str(num_of_labels_occured + 1) }) | |||
num_of_labels_occured += 1 | |||
all_set_compressed.update(set_compressed) | |||
# relabel nodes | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# get the set of compressed labels | |||
labels_comp = list(nx.get_node_attributes(G, node_label).values()) | |||
all_labels_ori.update(labels_comp) | |||
num_of_each_label = dict(Counter(labels_comp)) | |||
all_num_of_each_label.append(num_of_each_label) | |||
all_num_of_labels_occured += len(all_labels_ori) | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys())) | |||
vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ]) | |||
vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ]) | |||
Kmatrix[i][j] += np.dot(vector1, vector2.transpose()) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
return Kmatrix | |||
def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
height : int | |||
subtree height. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. | |||
""" | |||
from gklearn.utils.utils import getSPGraph | |||
# init. | |||
height = int(height) | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel | |||
Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn | |||
# initial for height = 0 | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
for e2 in Gn[j].edges(data = True): | |||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
Kmatrix[i][j] += 1 | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
# iterate each height | |||
for h in range(1, height + 1): | |||
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | |||
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs | |||
for G in Gn: # for each graph | |||
set_multisets = [] | |||
for node in G.nodes(data = True): | |||
# Multiset-label determination. | |||
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] | |||
# sorting each multiset | |||
multiset.sort() | |||
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix | |||
set_multisets.append(multiset) | |||
# label compression | |||
set_unique = list(set(set_multisets)) # set of unique multiset labels | |||
# a dictionary mapping original labels to new ones. | |||
set_compressed = {} | |||
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label | |||
for value in set_unique: | |||
if value in all_set_compressed.keys(): | |||
set_compressed.update({ value : all_set_compressed[value] }) | |||
else: | |||
set_compressed.update({ value : str(num_of_labels_occured + 1) }) | |||
num_of_labels_occured += 1 | |||
all_set_compressed.update(set_compressed) | |||
# relabel nodes | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
for e2 in Gn[j].edges(data = True): | |||
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
Kmatrix[i][j] += 1 | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
return Kmatrix | |||
def _wl_edgekernel_do(Gn, node_label, edge_label, height): | |||
"""Calculate Weisfeiler-Lehman edge kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
height : int | |||
subtree height. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. | |||
""" | |||
# init. | |||
height = int(height) | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel | |||
# initial for height = 0 | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
for e2 in Gn[j].edges(data = True): | |||
if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
Kmatrix[i][j] += 1 | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
# iterate each height | |||
for h in range(1, height + 1): | |||
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | |||
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs | |||
for G in Gn: # for each graph | |||
set_multisets = [] | |||
for node in G.nodes(data = True): | |||
# Multiset-label determination. | |||
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] | |||
# sorting each multiset | |||
multiset.sort() | |||
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix | |||
set_multisets.append(multiset) | |||
# label compression | |||
set_unique = list(set(set_multisets)) # set of unique multiset labels | |||
# a dictionary mapping original labels to new ones. | |||
set_compressed = {} | |||
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label | |||
for value in set_unique: | |||
if value in all_set_compressed.keys(): | |||
set_compressed.update({ value : all_set_compressed[value] }) | |||
else: | |||
set_compressed.update({ value : str(num_of_labels_occured + 1) }) | |||
num_of_labels_occured += 1 | |||
all_set_compressed.update(set_compressed) | |||
# relabel nodes | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate subtree kernel with h iterations and add it to the final kernel | |||
for i in range(0, len(Gn)): | |||
for j in range(i, len(Gn)): | |||
for e1 in Gn[i].edges(data = True): | |||
for e2 in Gn[j].edges(data = True): | |||
if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): | |||
Kmatrix[i][j] += 1 | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
return Kmatrix | |||
def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | |||
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
node_label : string | |||
node attribute used as label. | |||
edge_label : string | |||
edge attribute used as label. | |||
height : int | |||
subtree height. | |||
base_kernel : string | |||
Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. | |||
""" | |||
# init. | |||
height = int(height) | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel | |||
# initial for height = 0 | |||
Kmatrix = base_kernel(Gn, node_label, edge_label) | |||
# iterate each height | |||
for h in range(1, height + 1): | |||
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | |||
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs | |||
for G in Gn: # for each graph | |||
set_multisets = [] | |||
for node in G.nodes(data = True): | |||
# Multiset-label determination. | |||
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] | |||
# sorting each multiset | |||
multiset.sort() | |||
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix | |||
set_multisets.append(multiset) | |||
# label compression | |||
set_unique = list(set(set_multisets)) # set of unique multiset labels | |||
# a dictionary mapping original labels to new ones. | |||
set_compressed = {} | |||
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label | |||
for value in set_unique: | |||
if value in all_set_compressed.keys(): | |||
set_compressed.update({ value : all_set_compressed[value] }) | |||
else: | |||
set_compressed.update({ value : str(num_of_labels_occured + 1) }) | |||
num_of_labels_occured += 1 | |||
all_set_compressed.update(set_compressed) | |||
# relabel nodes | |||
for node in G.nodes(data = True): | |||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | |||
# calculate kernel with h iterations and add it to the final kernel | |||
Kmatrix += base_kernel(Gn, node_label, edge_label) | |||
return Kmatrix |
@@ -1,16 +0,0 @@ | |||
""" Functions for python system. | |||
""" | |||
def isNotebook(): | |||
"""check if code is executed in the IPython notebook. | |||
""" | |||
try: | |||
shell = get_ipython().__class__.__name__ | |||
if shell == 'ZMQInteractiveShell': | |||
return True # Jupyter notebook or qtconsole | |||
elif shell == 'TerminalInteractiveShell': | |||
return False # Terminal running IPython | |||
else: | |||
return False # Other type (?) | |||
except NameError: | |||
return False # Probably standard Python interpreter |
@@ -1,27 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Nov 8 14:21:25 2019 | |||
@author: ljia | |||
""" | |||
import sys | |||
import time | |||
class Logger(object): | |||
def __init__(self): | |||
self.terminal = sys.stdout | |||
self.log = open("log." + str(time.time()) + ".log", "a") | |||
def write(self, message): | |||
self.terminal.write(message) | |||
self.log.write(message) | |||
def flush(self): | |||
#this flush method is needed for python 3 compatibility. | |||
#this handles the flush command by doing nothing. | |||
#you might want to specify some extra behavior here. | |||
pass | |||
sys.stdout = Logger() |
@@ -1,86 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Dec 19 15:31:01 2018 | |||
A script to set the thread number of OpenBLAS (if used). | |||
Some modules (such as Numpy, Scipy, sklearn) using OpenBLAS perform parallel | |||
computation automatically, which causes conflict when other paralleling modules | |||
such as multiprossing.Pool, highly increase the computing time. By setting | |||
thread to 1, OpenBLAS is forced to use single thread/CPU, thus this conflict | |||
can be avoided. | |||
-e.g: | |||
with num_threads(8): | |||
np.dot(x, y) | |||
@author: ali_m | |||
@Reference: `ali_m's answer <https://stackoverflow.com/a/29582987>`__, 2018.12 | |||
""" | |||
import contextlib | |||
import ctypes | |||
from ctypes.util import find_library | |||
import os | |||
# Prioritize hand-compiled OpenBLAS library over version in /usr/lib/ | |||
# from Ubuntu repos | |||
try_paths = ['/opt/OpenBLAS/lib/libopenblas.so', | |||
'/lib/libopenblas.so', | |||
'/usr/lib/libopenblas.so.0', | |||
find_library('openblas')] | |||
openblas_lib = None | |||
for libpath in try_paths: | |||
try: | |||
openblas_lib = ctypes.cdll.LoadLibrary(libpath) | |||
break | |||
except OSError: | |||
continue | |||
if openblas_lib is None: | |||
raise EnvironmentError('Could not locate an OpenBLAS shared library', 2) | |||
def set_num_threads(n): | |||
"""Set the current number of threads used by the OpenBLAS server.""" | |||
openblas_lib.openblas_set_num_threads(int(n)) | |||
# At the time of writing these symbols were very new: | |||
# https://github.com/xianyi/OpenBLAS/commit/65a847c | |||
try: | |||
openblas_lib.openblas_get_num_threads() | |||
def get_num_threads(): | |||
"""Get the current number of threads used by the OpenBLAS server.""" | |||
return openblas_lib.openblas_get_num_threads() | |||
except AttributeError: | |||
def get_num_threads(): | |||
"""Dummy function (symbol not present in %s), returns -1.""" | |||
return -1 | |||
pass | |||
try: | |||
len(os.sched_getaffinity(0)) | |||
def get_num_procs(): | |||
"""Get the total number of physical processors""" | |||
return len(os.sched_getaffinity(0)) | |||
except AttributeError: | |||
def get_num_procs(): | |||
"""Dummy function (symbol not present), returns -1.""" | |||
return -1 | |||
pass | |||
@contextlib.contextmanager | |||
def num_threads(n): | |||
"""Temporarily changes the number of OpenBLAS threads. | |||
Example usage: | |||
print("Before: {}".format(get_num_threads())) | |||
with num_threads(n): | |||
print("In thread context: {}".format(get_num_threads())) | |||
print("After: {}".format(get_num_threads())) | |||
""" | |||
old_n = get_num_threads() | |||
set_num_threads(n) | |||
try: | |||
yield | |||
finally: | |||
set_num_threads(old_n) |
@@ -1,320 +0,0 @@ | |||
""" | |||
@author: linlin | |||
@references: | |||
[1] `ptrus/suffix-trees <https://github.com/ptrus/suffix-trees/blob/master/suffix_trees/STree.py>`__, 2018.6 | |||
""" | |||
import sys | |||
class STree(): | |||
"""Class representing the suffix tree. The generalized suffix tree is supported.""" | |||
def __init__(self, input=''): | |||
self.root = _SNode() | |||
self.root.depth = 0 | |||
self.root.idx = 0 | |||
self.root.parent = self.root | |||
self.root._add_suffix_link(self.root) | |||
if not input == '': | |||
self.build(input) | |||
def _check_input(self, input): | |||
"""Checks the validity of the input. | |||
In case of an invalid input throws ValueError. | |||
""" | |||
if isinstance(input, str): | |||
return 'st' | |||
elif isinstance(input, list): | |||
if all(isinstance(item, str) for item in input): | |||
return 'gst' | |||
raise ValueError("String argument should be of type String or" | |||
" a list of strings") | |||
def build(self, x): | |||
"""Builds the Suffix tree on the given input. | |||
If the input is of type List of Strings: | |||
Generalized Suffix Tree is built. | |||
:param x: String or List of Strings | |||
""" | |||
type = self._check_input(x) | |||
if type == 'st': | |||
x += next(self._terminalSymbolsGenerator()) | |||
self._build(x) | |||
if type == 'gst': | |||
self._build_generalized(x) | |||
def _build(self, x): | |||
"""Builds a Suffix tree.""" | |||
self.word = x | |||
self._build_McCreight(x) | |||
def _build_McCreight(self, x): | |||
"""Builds a Suffix tree using McCreight O(n) algorithm. | |||
Algorithm based on: | |||
McCreight, Edward M. "A space-economical suffix tree construction algorithm." - ACM, 1976. | |||
Implementation based on: | |||
UH CS - 58093 String Processing Algorithms Lecture Notes | |||
""" | |||
u = self.root | |||
d = 0 | |||
for i in range(len(x)): | |||
while u.depth == d and u._has_transition(x[d + i]): | |||
u = u._get_transition_link(x[d + i]) | |||
d = d + 1 | |||
while d < u.depth and x[u.idx + d] == x[i + d]: | |||
d = d + 1 | |||
if d < u.depth: | |||
u = self._create_node(x, u, d) | |||
self._create_leaf(x, i, u, d) | |||
if not u._get_suffix_link(): | |||
self._compute_slink(x, u) | |||
u = u._get_suffix_link() | |||
d = d - 1 | |||
if d < 0: | |||
d = 0 | |||
def _create_node(self, x, u, d): | |||
i = u.idx | |||
p = u.parent | |||
v = _SNode(idx=i, depth=d) | |||
v._add_transition_link(u, x[i + d]) | |||
u.parent = v | |||
p._add_transition_link(v, x[i + p.depth]) | |||
v.parent = p | |||
return v | |||
def _create_leaf(self, x, i, u, d): | |||
w = _SNode() | |||
w.idx = i | |||
w.depth = len(x) - i | |||
u._add_transition_link(w, x[i + d]) | |||
w.parent = u | |||
return w | |||
def _compute_slink(self, x, u): | |||
d = u.depth | |||
v = u.parent._get_suffix_link() | |||
while v.depth < d - 1: | |||
v = v._get_transition_link(x[u.idx + v.depth + 1]) | |||
if v.depth > d - 1: | |||
v = self._create_node(x, v, d - 1) | |||
u._add_suffix_link(v) | |||
def _build_Ukkonen(self, x): | |||
"""Builds a Suffix tree using Ukkonen's online O(n) algorithm. | |||
Algorithm based on: | |||
Ukkonen, Esko. "On-line construction of suffix trees." - Algorithmica, 1995. | |||
""" | |||
# TODO. | |||
raise NotImplementedError() | |||
def _build_generalized(self, xs): | |||
"""Builds a Generalized Suffix Tree (GST) from the array of strings provided. | |||
""" | |||
terminal_gen = self._terminalSymbolsGenerator() | |||
_xs = ''.join([x + next(terminal_gen) for x in xs]) | |||
self.word = _xs | |||
self._generalized_word_starts(xs) | |||
self._build(_xs) | |||
self.root._traverse(self._label_generalized) | |||
def _label_generalized(self, node): | |||
"""Helper method that labels the nodes of GST with indexes of strings | |||
found in their descendants. | |||
""" | |||
if node.is_leaf(): | |||
x = {self._get_word_start_index(node.idx)} | |||
else: | |||
x = { | |||
n | |||
for ns in node.transition_links for n in ns[0].generalized_idxs | |||
} | |||
node.generalized_idxs = x | |||
def _get_word_start_index(self, idx): | |||
"""Helper method that returns the index of the string based on node's | |||
starting index""" | |||
i = 0 | |||
for _idx in self.word_starts[1:]: | |||
if idx < _idx: | |||
return i | |||
else: | |||
i += 1 | |||
return i | |||
def lcs(self, stringIdxs=-1): | |||
"""Returns the Largest Common Substring of Strings provided in stringIdxs. | |||
If stringIdxs is not provided, the LCS of all strings is returned. | |||
::param stringIdxs: Optional: List of indexes of strings. | |||
""" | |||
if stringIdxs == -1 or not isinstance(stringIdxs, list): | |||
stringIdxs = set(range(len(self.word_starts))) | |||
else: | |||
stringIdxs = set(stringIdxs) | |||
deepestNode = self._find_lcs(self.root, stringIdxs) | |||
start = deepestNode.idx | |||
end = deepestNode.idx + deepestNode.depth | |||
return self.word[start:end] | |||
def _find_lcs(self, node, stringIdxs): | |||
"""Helper method that finds LCS by traversing the labeled GSD.""" | |||
nodes = [ | |||
self._find_lcs(n, stringIdxs) for (n, _) in node.transition_links | |||
if n.generalized_idxs.issuperset(stringIdxs) | |||
] | |||
if nodes == []: | |||
return node | |||
deepestNode = max(nodes, key=lambda n: n.depth) | |||
return deepestNode | |||
def _generalized_word_starts(self, xs): | |||
"""Helper method returns the starting indexes of strings in GST""" | |||
self.word_starts = [] | |||
i = 0 | |||
for n in range(len(xs)): | |||
self.word_starts.append(i) | |||
i += len(xs[n]) + 1 | |||
def find(self, y): | |||
"""Returns starting position of the substring y in the string used for | |||
building the Suffix tree. | |||
:param y: String | |||
:return: Index of the starting position of string y in the string used for building the Suffix tree | |||
-1 if y is not a substring. | |||
""" | |||
node = self.root | |||
while True: | |||
edge = self._edgeLabel(node, node.parent) | |||
if edge.startswith(y): | |||
return node.idx | |||
i = 0 | |||
while (i < len(edge) and edge[i] == y[0]): | |||
y = y[1:] | |||
i += 1 | |||
if i != 0: | |||
if i == len(edge) and y != '': | |||
pass | |||
else: | |||
return -1 | |||
node = node._get_transition_link(y[0]) | |||
if not node: | |||
return -1 | |||
def find_all(self, y): | |||
y_input = y | |||
node = self.root | |||
while True: | |||
edge = self._edgeLabel(node, node.parent) | |||
if edge.startswith(y): | |||
break | |||
i = 0 | |||
while (i < len(edge) and edge[i] == y[0]): | |||
y = y[1:] | |||
i += 1 | |||
if i != 0: | |||
if i == len(edge) and y != '': | |||
pass | |||
else: | |||
return [] | |||
node = node._get_transition_link(y[0]) | |||
if not node: | |||
return [] | |||
leaves = node._get_leaves() | |||
return [n.idx for n in leaves] | |||
def _edgeLabel(self, node, parent): | |||
"""Helper method, returns the edge label between a node and it's parent""" | |||
return self.word[node.idx + parent.depth:node.idx + node.depth] | |||
def _terminalSymbolsGenerator(self): | |||
"""Generator of unique terminal symbols used for building the Generalized Suffix Tree. | |||
Unicode Private Use Area U+E000..U+F8FF is used to ensure that terminal symbols | |||
are not part of the input string. | |||
""" | |||
py2 = sys.version[0] < '3' | |||
UPPAs = list( | |||
list(range(0xE000, 0xF8FF + 1)) + | |||
list(range(0xF0000, 0xFFFFD + 1)) + | |||
list(range(0x100000, 0x10FFFD + 1))) | |||
for i in UPPAs: | |||
if py2: | |||
yield (unichr(i)) | |||
else: | |||
yield (chr(i)) | |||
raise ValueError("To many input strings.") | |||
class _SNode(): | |||
"""Class representing a Node in the Suffix tree.""" | |||
def __init__(self, idx=-1, parentNode=None, depth=-1): | |||
# Links | |||
self._suffix_link = None | |||
self.transition_links = [] | |||
# Properties | |||
self.idx = idx | |||
self.depth = depth | |||
self.parent = parentNode | |||
self.generalized_idxs = {} | |||
def __str__(self): | |||
return ("SNode: idx:" + str(self.idx) + " depth:" + str(self.depth) + | |||
" transitons:" + str(self.transition_links)) | |||
def _add_suffix_link(self, snode): | |||
self._suffix_link = snode | |||
def _get_suffix_link(self): | |||
if self._suffix_link != None: | |||
return self._suffix_link | |||
else: | |||
return False | |||
def _get_transition_link(self, suffix): | |||
for node, _suffix in self.transition_links: | |||
if _suffix == '__@__' or suffix == _suffix: | |||
return node | |||
return False | |||
def _add_transition_link(self, snode, suffix=''): | |||
tl = self._get_transition_link(suffix) | |||
if tl: # TODO: imporve this. | |||
self.transition_links.remove((tl, suffix)) | |||
self.transition_links.append((snode, suffix)) | |||
def _has_transition(self, suffix): | |||
for node, _suffix in self.transition_links: | |||
if _suffix == '__@__' or suffix == _suffix: | |||
return True | |||
return False | |||
def is_leaf(self): | |||
return self.transition_links == [] | |||
def _traverse(self, f): | |||
for (node, _) in self.transition_links: | |||
node._traverse(f) | |||
f(self) | |||
def _get_leaves(self): | |||
if self.is_leaf(): | |||
return [self] | |||
else: | |||
return [ | |||
x for (n, _) in self.transition_links for x in n._get_leaves() | |||
] |
@@ -1,52 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Sun Dec 23 16:40:52 2018 | |||
@author: ljia | |||
""" | |||
import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from gklearn.utils.model_selection_precomputed import compute_gram_matrices | |||
from gklearn.kernels.spKernel import spkernel | |||
from sklearn.model_selection import ParameterGrid | |||
from libs import * | |||
import multiprocessing | |||
import functools | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
if __name__ == "__main__": | |||
# load dataset. | |||
print('getting dataset and computing kernel distance matrix first...') | |||
ds_name = 'SYNTHETICnew' | |||
gkernel = 'spkernel' | |||
dataset = '../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
Gn, y_all = loadDataset(dataset) | |||
for G in Gn: | |||
G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
# compute/read Gram matrix and pair distances. | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix = np.empty((len(Gn), len(Gn))) | |||
Kmatrix, run_time, idx = spkernel(Gn, node_label=None, node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
Kmatrix=Kmatrix, run_time=run_time) | |||
print('complete!') |
@@ -1,54 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Sun Dec 23 16:40:52 2018 | |||
@author: ljia | |||
""" | |||
import sys | |||
import numpy as np | |||
import networkx as nx | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from gklearn.utils.model_selection_precomputed import compute_gram_matrices | |||
from gklearn.kernels.structuralspKernel import structuralspkernel | |||
from sklearn.model_selection import ParameterGrid | |||
from libs import * | |||
import multiprocessing | |||
import functools | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
if __name__ == "__main__": | |||
# load dataset. | |||
print('getting dataset and computing kernel distance matrix first...') | |||
ds_name = 'SYNTHETICnew' | |||
gkernel = 'structuralspkernel' | |||
dataset = '../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
Gn, y_all = loadDataset(dataset) | |||
for G in Gn: | |||
G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
# compute/read Gram matrix and pair distances. | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
Kmatrix, run_time = structuralspkernel(Gn, node_label=None, edge_label=None, | |||
node_kernels=sub_kernels, edge_kernels=sub_kernels, | |||
parallel=None, # parallel='imap_unordered', | |||
n_jobs=multiprocessing.cpu_count(), | |||
verbose=True) | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
Kmatrix=Kmatrix, run_time=run_time) | |||
print('complete!') |
@@ -1,19 +0,0 @@ | |||
#!/bin/bash | |||
#SBATCH --exclusive | |||
#SBATCH --job-name="graphkernels" | |||
#SBATCH --partition=tcourt | |||
#SBATCH --mail-type=ALL | |||
#SBATCH --mail-user=jajupmochi@gmail.com | |||
#SBATCH --output=output_graphkernels.txt | |||
#SBATCH --error=error_graphkernels.txt | |||
# | |||
#SBATCH --ntasks=1 | |||
#SBATCH --nodes=2 | |||
#SBATCH --cpus-per-task=56 | |||
#SBATCH --time=24:00:00 | |||
#SBATCH --mem-per-cpu=4000 | |||
srun hostname | |||
srun cd /home/2017018/ljia01/graphkit-learn/notebooks | |||
srun python3 run_spkernel.py |
@@ -1,12 +0,0 @@ | |||
#!/bin/bash | |||
# | |||
#SBATCH --job-name=test | |||
#SBATCH --output=res.txt | |||
#SBATCH --partition=long | |||
# | |||
#SBATCH --ntasks=1 | |||
#SBATCH --time=10:00 | |||
#SBATCH --mem-per-cpu=100 | |||
srun hostname | |||
srun sleep 60 |
@@ -1,70 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Sun Dec 23 16:56:44 2018 | |||
@author: ljia | |||
""" | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from gklearn.kernels.rwalk_sym import randomwalkkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import numpy as np | |||
dslist = [ | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
estimator = randomwalkkernel | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
print() | |||
print(ds['name']) | |||
for compute_method in ['conjugate', 'fp']: | |||
if compute_method == 'sylvester': | |||
param_grid_precomputed = {'compute_method': ['sylvester'], | |||
# 'weight': np.linspace(0.01, 0.10, 10)} | |||
'weight': np.logspace(-1, -10, num=10, base=10)} | |||
elif compute_method == 'conjugate': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
param_grid_precomputed = {'compute_method': ['conjugate'], | |||
'node_kernels': | |||
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||
'edge_kernels': | |||
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||
'weight': np.logspace(-1, -10, num=10, base=10)} | |||
elif compute_method == 'fp': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
param_grid_precomputed = {'compute_method': ['fp'], | |||
'node_kernels': | |||
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||
'edge_kernels': | |||
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||
'weight': np.logspace(-3, -10, num=8, base=10)} | |||
elif compute_method == 'spectral': | |||
param_grid_precomputed = {'compute_method': ['spectral'], | |||
'weight': np.logspace(-1, -10, num=10, base=10), | |||
'sub_kernel': ['geo', 'exp']} | |||
model_selection_for_precomputed_kernel( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
(param_grid[1] if ('task' in ds and ds['task'] | |||
== 'regression') else param_grid[0]), | |||
(ds['task'] if 'task' in ds else 'classification'), | |||
NUM_TRIALS=30, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
print() |
@@ -1,61 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Dec 21 17:59:28 2018 | |||
@author: ljia | |||
""" | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from gklearn.kernels.sp_sym import spkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
#from gklearn.utils.model_selection_precomputed import trial_do | |||
dslist = [ | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||
# | |||
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
] | |||
estimator = spkernel | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
param_grid_precomputed = {'node_kernels': [ | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
print() | |||
print(ds['name']) | |||
model_selection_for_precomputed_kernel( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
(param_grid[1] if ('task' in ds and ds['task'] | |||
== 'regression') else param_grid[0]), | |||
(ds['task'] if 'task' in ds else 'classification'), | |||
NUM_TRIALS=30, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
print() |
@@ -1,47 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Sun Dec 23 16:40:52 2018 | |||
@author: ljia | |||
""" | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from gklearn.kernels.ssp_sym import structuralspkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
dslist = [ | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node symb/nsymb | |||
] | |||
estimator = structuralspkernel | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
param_grid_precomputed = {'node_kernels': | |||
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}], | |||
'edge_kernels': | |||
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
print() | |||
print(ds['name']) | |||
model_selection_for_precomputed_kernel( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
(param_grid[1] if ('task' in ds and ds['task'] | |||
== 'regression') else param_grid[0]), | |||
(ds['task'] if 'task' in ds else 'classification'), | |||
NUM_TRIALS=30, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
print() |
@@ -1,47 +0,0 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Test Message Passing Interface for cluster paralleling. | |||
Created on Wed Nov 7 17:26:40 2018 | |||
@author: ljia | |||
""" | |||
from mpi4py import MPI | |||
comm = MPI.COMM_WORLD | |||
rank = comm.Get_rank() | |||
import numpy as np | |||
import time | |||
size = comm.Get_size() | |||
numDataPerRank = 10 | |||
data = None | |||
if rank == 0: | |||
data = np.linspace(1, size * numDataPerRank, size * numDataPerRank) | |||
recvbuf = np.empty(numDataPerRank, dtype='d') | |||
comm.Scatter(data, recvbuf, root=0) | |||
recvbuf += 1 | |||
print('Rank: ', rank, ', recvbuf received: ', recvbuf, ', size: ', size, ', time: ', time.time()) | |||
#if rank == 0: | |||
# data = {'key1' : [1,2, 3], | |||
# 'key2' : ( 'abc', 'xyz')} | |||
#else: | |||
# data = None | |||
# | |||
#data = comm.bcast(data, root=0) | |||
#print('Rank: ',rank,', data: ' ,data) | |||
#if rank == 0: | |||
# data = {'a': 7, 'b': 3.14} | |||
# comm.send(data, dest=1) | |||
#elif rank == 1: | |||
# data = comm.recv(source=0) | |||
# print('On process 1, data is ', data) | |||
#print('My rank is ', rank) | |||
#for i in range(0, 100000000): | |||
# print(i) |