Browse Source

clear repo: remove useless files.

v0.1
jajupmochi 5 years ago
parent
commit
f1d4a6e5e3
25 changed files with 0 additions and 17423 deletions
  1. +0
    -188
      gklearn/kernels/.tags
  2. +0
    -842
      gklearn/kernels/else/rwalk_sym.py
  3. +0
    -200
      gklearn/kernels/else/sp_sym.py
  4. +0
    -464
      gklearn/kernels/else/ssp_sym.py
  5. +0
    -147
      gklearn/kernels/unfinished/cyclicPatternKernel.py
  6. +0
    -234
      gklearn/kernels/unfinished/pathKernel.py
  7. +0
    -241
      gklearn/kernels/unfinished/treePatternKernel.py
  8. +0
    -403
      gklearn/kernels/unfinished/weisfeilerLehmanKernel.py
  9. +0
    -16
      gklearn/utils/isNotebook.py
  10. +0
    -27
      gklearn/utils/logger2file.py
  11. +0
    -86
      gklearn/utils/unfinished/openblassettings.py
  12. +0
    -320
      gklearn/utils/unused/suffix_tree.py
  13. +0
    -52
      notebooks/else/compute_spkernel_for_syntheticnew.py
  14. +0
    -54
      notebooks/else/compute_sspkernel_for_syntheticnew.py
  15. +0
    -19
      notebooks/else/job_graphkernels.sl
  16. +0
    -12
      notebooks/else/job_test.sl
  17. +0
    -70
      notebooks/else/run_rwalk_symonly.py
  18. +0
    -61
      notebooks/else/run_sp_symonly.py
  19. +0
    -47
      notebooks/else/run_ssp_symonly.py
  20. BIN
      notebooks/preimage/results.gm.npz
  21. +0
    -1329
      notebooks/unfinished/run_cyclicpatternkernel.ipynb
  22. +0
    -786
      notebooks/unfinished/run_treeletkernel_acyclic.ipynb
  23. +0
    -7966
      notebooks/unfinished/run_treepatternkernel.ipynb
  24. +0
    -3812
      notebooks/unfinished/run_weisfeilerLehmankernel.ipynb
  25. +0
    -47
      notebooks/unfinished/test_mpi.py

+ 0
- 188
gklearn/kernels/.tags View File

@@ -1,188 +0,0 @@
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED 0 /0=unsorted, 1=sorted, 2=foldcase/
!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/
!_TAG_PROGRAM_NAME Exuberant Ctags //
!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
!_TAG_PROGRAM_VERSION 5.9~svn20110310 //
commonwalkkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def commonwalkkernel(*args,$/;" function line:23
compute_method /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ compute_method = compute_method.lower()$/;" variable line:67
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = args[0] if len(args) == 1 else [args[0], args[1]]$/;" variable line:69
len_gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ len_gn = len(Gn)$/;" variable line:72
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 1]$/;" variable line:73
idx /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ idx = [G[0] for G in Gn]$/;" variable line:74
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = [G[1] for G in Gn]$/;" variable line:75
ds_attrs /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ ds_attrs = get_dataset_attributes($/;" variable line:81
attr_names /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ attr_names=['node_labeled', 'edge_labeled', 'is_directed'],$/;" variable line:83
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Gn = [G.to_directed() for G in Gn]$/;" variable line:92
start_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ start_time = time.time()$/;" variable line:94
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ Kmatrix = np.zeros((len(Gn), len(Gn)))$/;" variable line:96
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ def init_worker(gn_toshare):$/;" function line:99
run_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^ run_time = time.time() - start_time$/;" variable line:173
_commonwalkkernel_exp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):$/;" function line:181
wrapper_cw_exp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def wrapper_cw_exp(node_label, edge_label, beta, itr):$/;" function line:249
_commonwalkkernel_geo /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma):$/;" function line:255
wrapper_cw_geo /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def wrapper_cw_geo(node_label, edge_label, gama, itr):$/;" function line:290
_commonwalkkernel_brute /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def _commonwalkkernel_brute(walks1,$/;" function line:296
find_all_walks_until_length /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def find_all_walks_until_length(G,$/;" function line:336
find_walks /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def find_walks(G, source_node, length):$/;" function line:388
find_all_walks /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/commonWalkKernel.py /^def find_all_walks(G, length):$/;" function line:412
randomwalkkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def randomwalkkernel(*args,$/;" function line:27
_sylvester_equation /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs):$/;" function line:150
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(Awl_toshare):$/;" function line:184 function:_sylvester_equation
wrapper_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_se_do(lmda, itr):$/;" function line:214
_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _se_do(A_wave1, A_wave2, lmda):$/;" function line:220
_conjugate_gradient /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:236
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(gn_toshare):$/;" function line:280 function:_conjugate_gradient
wrapper_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_cg_unlabled_do(lmda, itr):$/;" function line:302
_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _cg_unlabled_do(A_wave1, A_wave2, lmda):$/;" function line:308
wrapper_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:320
_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:328
_fixed_point /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:351
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(gn_toshare):$/;" function line:408 function:_fixed_point
wrapper_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:418
_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:426
func_fp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def func_fp(x, p_times, lmda, w_times):$/;" function line:448
_spectral_decomposition /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs):$/;" function line:456
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^ def init_worker(q_T_toshare, P_toshare, D_toshare):$/;" function line:492 function:_spectral_decomposition
wrapper_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def wrapper_sd_do(weight, sub_kernel, itr):$/;" function line:516
_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): $/;" function line:523
_randomwalkkernel_kron /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def _randomwalkkernel_kron(G1, G2, node_label, edge_label):$/;" function line:540
getLabels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def getLabels(Gn, node_label, edge_label, directed):$/;" function line:561
filterGramMatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def filterGramMatrix(gmt, label_dict, label, directed):$/;" function line:581
computeVK /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def computeVK(g1, g2, ds_attrs, node_kernels, node_label):$/;" function line:593
computeW /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/rwalk_sym.py /^def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):$/;" function line:627
spkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def spkernel(*args,$/;" function line:24
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^ def init_worker(gn_toshare):$/;" function line:115 function:spkernel
spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):$/;" function line:130
wrapper_sp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):$/;" function line:191
wrapper_getSPGraph /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/sp_sym.py /^def wrapper_getSPGraph(weight, itr_item):$/;" function line:197
structuralspkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def structuralspkernel(*args,$/;" function line:25
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^ def init_worker(spl_toshare, gs_toshare):$/;" function line:177 function:structuralspkernel
structuralspkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,$/;" function line:265
wrapper_ssp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels, $/;" function line:417
get_shortest_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def get_shortest_paths(G, weight, directed):$/;" function line:426
wrapper_getSP /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/else/ssp_sym.py /^def wrapper_getSP(weight, directed, itr_item):$/;" function line:461
marginalizedkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def marginalizedkernel(*args,$/;" function line:31
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^ def init_worker(gn_toshare):$/;" function line:114 function:marginalizedkernel
_marginalizedkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):$/;" function line:144
wrapper_marg_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def wrapper_marg_do(node_label, edge_label, p_quit, n_iteration, itr):$/;" function line:290
wrapper_untotter /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/marginalizedKernel.py /^def wrapper_untotter(Gn, node_label, edge_label, i):$/;" function line:296
randomwalkkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def randomwalkkernel(*args,$/;" function line:21
_sylvester_equation /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, verbose=True):$/;" function line:197
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(Awl_toshare):$/;" function line:232 function:_sylvester_equation
wrapper_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_se_do(lmda, itr):$/;" function line:262
_se_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _se_do(A_wave1, A_wave2, lmda):$/;" function line:268
_conjugate_gradient /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:284
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(gn_toshare):$/;" function line:328 function:_conjugate_gradient
wrapper_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_cg_unlabled_do(lmda, itr):$/;" function line:350
_cg_unlabled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _cg_unlabled_do(A_wave1, A_wave2, lmda):$/;" function line:356
wrapper_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:368
_cg_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:376
_fixed_point /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, $/;" function line:399
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(gn_toshare):$/;" function line:456 function:_fixed_point
wrapper_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, $/;" function line:466
_fp_labled_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, $/;" function line:474
func_fp /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def func_fp(x, p_times, lmda, w_times):$/;" function line:496
_spectral_decomposition /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, verbose=True):$/;" function line:504
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^ def init_worker(q_T_toshare, P_toshare, D_toshare):$/;" function line:541 function:_spectral_decomposition
wrapper_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def wrapper_sd_do(weight, sub_kernel, itr):$/;" function line:566
_sd_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): $/;" function line:573
_randomwalkkernel_kron /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def _randomwalkkernel_kron(G1, G2, node_label, edge_label):$/;" function line:590
getLabels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def getLabels(Gn, node_label, edge_label, directed):$/;" function line:611
filterGramMatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def filterGramMatrix(gmt, label_dict, label, directed):$/;" function line:631
computeVK /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def computeVK(g1, g2, ds_attrs, node_kernels, node_label):$/;" function line:643
computeW /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/randomWalkKernel.py /^def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):$/;" function line:677
spkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def spkernel(*args,$/;" function line:22
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^ def init_worker(gn_toshare):$/;" function line:157 function:spkernel
spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):$/;" function line:207
wrapper_sp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):$/;" function line:297
wrapper_getSPGraph /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py /^def wrapper_getSPGraph(weight, itr_item):$/;" function line:310
structuralspkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def structuralspkernel(*args,$/;" function line:28
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^ def init_worker(spl_toshare, gs_toshare):$/;" function line:179 function:structuralspkernel
structuralspkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,$/;" function line:258
wrapper_ssp_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels, $/;" function line:346
ssp_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label,$/;" function line:355
wrapper_ssp_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_ssp_do_trie(ds_attrs, node_label, edge_label, node_kernels, $/;" function line:463
getAllNodeKernels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def getAllNodeKernels(g1, g2, node_kernels, node_label, ds_attrs):$/;" function line:471
getAllEdgeKernels /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def getAllEdgeKernels(g1, g2, edge_kernels, edge_label, ds_attrs):$/;" function line:505
traverseBothTriem /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTriem(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:551
traverseTrie2m /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2m(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:568
traverseBothTriev /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTriev(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:592
traverseTrie2v /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2v(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:609
traverseBothTriee /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTriee(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:631
traverseTrie2e /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2e(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:648
traverseBothTrieu /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseBothTrieu(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:673
traverseTrie2u /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def traverseTrie2u(root, p1, kernel, vk_dict, ek_dict, pcurrent=[]):$/;" function line:690
get_shortest_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def get_shortest_paths(G, weight, directed):$/;" function line:748
wrapper_getSP_naive /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_getSP_naive(weight, directed, itr_item):$/;" function line:783
get_sps_as_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def get_sps_as_trie(G, weight, directed):$/;" function line:789
wrapper_getSP_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/structuralspKernel.py /^def wrapper_getSP_trie(weight, directed, itr_item):$/;" function line:830
treeletkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def treeletkernel(*args, $/;" function line:23
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^ def init_worker(canonkeys_toshare):$/;" function line:105 function:treeletkernel
_treeletkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def _treeletkernel_do(canonkey1, canonkey2, sub_kernel):$/;" function line:140
wrapper_treeletkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def wrapper_treeletkernel_do(sub_kernel, itr):$/;" function line:160
get_canonkeys /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def get_canonkeys(G, node_label, edge_label, labeled, is_directed):$/;" function line:166
wrapper_get_canonkeys /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def wrapper_get_canonkeys(node_label, edge_label, labeled, is_directed, itr_item):$/;" function line:418
find_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def find_paths(G, source_node, length):$/;" function line:424
find_all_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/treeletKernel.py /^def find_all_paths(G, length, is_directed):$/;" function line:449
cyclicpatternkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/cyclicPatternKernel.py /^def cyclicpatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None):$/;" function line:20
_cyclicpatternkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/cyclicPatternKernel.py /^def _cyclicpatternkernel_do(patterns1, patterns2):$/;" function line:63
get_patterns /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/cyclicPatternKernel.py /^def get_patterns(G, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None):$/;" function line:87
pathkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def pathkernel(*args, node_label='atom', edge_label='bond_type'):$/;" function line:20
_pathkernel_do_l /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_l(G1, G2, sp1, sp2, node_label, edge_label):$/;" function line:107
_pathkernel_do_nl /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_nl(G1, G2, sp1, sp2, node_label):$/;" function line:148
_pathkernel_do_el /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_el(G1, G2, sp1, sp2, edge_label):$/;" function line:171
_pathkernel_do_unl /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def _pathkernel_do_unl(G1, G2, sp1, sp2):$/;" function line:196
get_shortest_paths /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/pathKernel.py /^def get_shortest_paths(G, weight):$/;" function line:211
treepatternkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^def treepatternkernel(*args,$/;" function line:21
_treepatternkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,$/;" function line:90
matchingset /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^ def matchingset(n1, n2):$/;" function line:119 function:_treepatternkernel_do
mset_com /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^ def mset_com(allpairs, length):$/;" function line:123 function:_treepatternkernel_do.matchingset
kernel_h /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/treePatternKernel.py /^ def kernel_h(h):$/;" function line:165 function:_treepatternkernel_do
weisfeilerlehmankernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'):$/;" function line:18
_wl_subtreekernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_subtreekernel_do(Gn, node_label, edge_label, height):$/;" function line:75
_wl_spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_spkernel_do(Gn, node_label, edge_label, height):$/;" function line:183
_wl_edgekernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_edgekernel_do(Gn, node_label, edge_label, height):$/;" function line:264
_wl_userkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/unfinished/weisfeilerLehmanKernel.py /^def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):$/;" function line:340
untilhpathkernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def untilhpathkernel(*args,$/;" function line:25
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def init_worker(trie_toshare):$/;" function line:142 function:untilhpathkernel
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def init_worker(plist_toshare):$/;" function line:149 function:untilhpathkernel
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def init_worker(plist_toshare):$/;" function line:156 function:untilhpathkernel
_untilhpathkernel_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def _untilhpathkernel_do_trie(trie1, trie2, k_func):$/;" function line:207
traverseTrie1t /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie1t(root, trie2, setlist, pcurrent=[]):$/;" function line:226 function:_untilhpathkernel_do_trie
traverseTrie2t /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie2t(root, trie1, setlist, pcurrent=[]):$/;" function line:244 function:_untilhpathkernel_do_trie
traverseTrie1m /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):$/;" function line:271 function:_untilhpathkernel_do_trie
traverseTrie2m /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseTrie2m(root, trie1, sumlist, pcurrent=[]):$/;" function line:289 function:_untilhpathkernel_do_trie
wrapper_uhpath_do_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_uhpath_do_trie(k_func, itr):$/;" function line:316
_untilhpathkernel_do_naive /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def _untilhpathkernel_do_naive(paths1, paths2, k_func):$/;" function line:322
wrapper_uhpath_do_naive /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_uhpath_do_naive(k_func, itr):$/;" function line:365
_untilhpathkernel_do_kernelless /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def _untilhpathkernel_do_kernelless(paths1, paths2, k_func):$/;" function line:371
wrapper_uhpath_do_kernelless /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_uhpath_do_kernelless(k_func, itr):$/;" function line:414
find_all_paths_until_length /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def find_all_paths_until_length(G,$/;" function line:421
wrapper_find_all_paths_until_length /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_find_all_paths_until_length(length, ds_attrs, node_label, $/;" function line:492
find_all_path_as_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def find_all_path_as_trie(G,$/;" function line:501
traverseGraph /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^ def traverseGraph(root, ptrie, length, G, ds_attrs, node_label, edge_label,$/;" function line:542 function:find_all_path_as_trie
wrapper_find_all_path_as_trie /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def wrapper_find_all_path_as_trie(length, ds_attrs, node_label, $/;" function line:593
paths2labelseqs /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/untilHPathKernel.py /^def paths2labelseqs(plist, G, ds_attrs, node_label, edge_label):$/;" function line:601
weisfeilerlehmankernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def weisfeilerlehmankernel(*args, $/;" function line:25
base_kernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ base_kernel = base_kernel.lower()$/;" variable line:74
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list$/;" variable line:75
Gn /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Gn = [g.copy() for g in Gn]$/;" variable line:76
ds_attrs /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ ds_attrs = get_dataset_attributes(Gn, attr_names=['node_labeled'], $/;" variable line:77
node_label /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ node_label=node_label)$/;" variable line:78
start_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ start_time = time.time()$/;" variable line:83
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose)$/;" variable line:87
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_spkernel_do(Gn, node_label, edge_label, height)$/;" variable line:91
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_edgekernel_do(Gn, node_label, edge_label, height)$/;" variable line:95
Kmatrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ Kmatrix = _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel)$/;" variable line:99
run_time /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ run_time = time.time() - start_time$/;" variable line:101
_wl_kernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, verbose):$/;" function line:109
wl_iteration /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def wl_iteration(G, node_label):$/;" function line:256
wrapper_wl_iteration /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def wrapper_wl_iteration(node_label, itr_item):$/;" function line:293
compute_kernel_matrix /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, verbose):$/;" function line:300
init_worker /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^ def init_worker(alllabels_toshare):$/;" function line:305 function:compute_kernel_matrix
compute_subtree_kernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def compute_subtree_kernel(num_of_each_label1, num_of_each_label2, kernel):$/;" function line:319
wrapper_compute_subtree_kernel /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def wrapper_compute_subtree_kernel(Kmatrix, itr):$/;" function line:333
_wl_spkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_spkernel_do(Gn, node_label, edge_label, height):$/;" function line:339
_wl_edgekernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_edgekernel_do(Gn, node_label, edge_label, height):$/;" function line:421
_wl_userkernel_do /media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/weisfeilerLehmanKernel.py /^def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):$/;" function line:498

+ 0
- 842
gklearn/kernels/else/rwalk_sym.py View File

@@ -1,842 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:53:57 2018

@author: ljia
@references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and
Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research,
11(Apr):1201–1242, 2010.
"""

import sys
sys.path.insert(0, "../")
import time
from functools import partial
from tqdm import tqdm

import networkx as nx
import numpy as np
from scipy.sparse import identity, kron
from scipy.sparse.linalg import cg
from scipy.optimize import fixed_point

from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm

def randomwalkkernel(*args,
# params for all method.
compute_method=None,
weight=1,
p=None,
q=None,
edge_weight=None,
# params for conjugate and fp method.
node_kernels=None,
edge_kernels=None,
node_label='atom',
edge_label='bond_type',
# params for spectral method.
sub_kernel=None,
n_jobs=None):
"""Calculate random walk graph kernels.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
h : integer
Longest length of walks.
method : string
Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
"""
compute_method = compute_method.lower()
Gn = args[0] if len(args) == 1 else [args[0], args[1]]

eweight = None
if edge_weight == None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, float) or isinstance(some_weight, int):
eweight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)

ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'],
node_label=node_label,
edge_label=edge_label)
ds_attrs['node_attr_dim'] = 0
ds_attrs['edge_attr_dim'] = 0
# remove graphs with no edges, as no walk can be found in their structures,
# so the weight matrix between such a graph and itself might be zero.
len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
if len(Gn) != len_gn:
print('\n %d graphs are removed as they don\'t contain edges.\n' %
(len_gn - len(Gn)))

start_time = time.time()
# # get vertex and edge concatenated labels for each graph
# label_list, d = getLabels(Gn, node_label, edge_label, ds_attrs['is_directed'])
# gmf = filterGramMatrix(A_wave_list[0], label_list[0], ('C', '0', 'O'), ds_attrs['is_directed'])

if compute_method == 'sylvester':
import warnings
warnings.warn('All labels are ignored.')
Kmatrix = _sylvester_equation(Gn, weight, p, q, eweight, n_jobs)

elif compute_method == 'conjugate':
Kmatrix = _conjugate_gradient(Gn, weight, p, q, ds_attrs,
node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs)
elif compute_method == 'fp':
Kmatrix = _fixed_point(Gn, weight, p, q, ds_attrs, node_kernels,
edge_kernels, node_label, edge_label,
eweight, n_jobs)

elif compute_method == 'spectral':
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
Kmatrix = _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs)

elif compute_method == 'kron':
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j],
node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
else:
raise Exception(
'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
)

run_time = time.time() - start_time
print(
"\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time, idx


###############################################################################
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list accually contains the transposes of the adjacency matrices.
A_wave_list = [
nx.adjacency_matrix(G, eweight).todense().transpose() for G in tqdm(
Gn, desc='compute adjacency matrices', file=sys.stdout)
]
# # normalized adjacency matrices
# A_wave_list = []
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose()
# norm = A_tilde.sum(axis=0)
# norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm)
if p == None: # p is uniform distribution as default.
def init_worker(Awl_toshare):
global G_Awl
G_Awl = Awl_toshare
do_partial = partial(wrapper_se_do, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(A_wave_list,), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# S = lmda * A_wave_list[j]
# T_t = A_wave_list[i]
# # use uniform distribution if there is no prior knowledge.
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
# p_times_uni = 1 / nb_pd
# M0 = np.full((len(A_wave_list[j]), len(A_wave_list[i])), p_times_uni)
# X = dlyap(S, T_t, M0)
# X = np.reshape(X, (-1, 1), order='F')
# # use uniform distribution if there is no prior knowledge.
# q_times = np.full((1, nb_pd), p_times_uni)
# Kmatrix[i][j] = np.dot(q_times, X)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)

return Kmatrix


def wrapper_se_do(lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _se_do(G_Awl[i], G_Awl[j], lmda)


def _se_do(A_wave1, A_wave2, lmda):
from control import dlyap
S = lmda * A_wave2
T_t = A_wave1
# use uniform distribution if there is no prior knowledge.
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni)
X = dlyap(S, T_t, M0)
X = np.reshape(X, (-1, 1), order='F')
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, X)


###############################################################################
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list accually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# def init_worker(Awl_toshare):
# global G_Awl
# G_Awl = Awl_toshare
# do_partial = partial(wrapper_cg_unlabled_do, lmda)
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
# glbv=(A_wave_list,), n_jobs=n_jobs)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in tqdm(
Gn, desc='reindex vertices', file=sys.stdout)]
if p == None and q == None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# result = _cg_labled_do(Gn[i], Gn[j], ds_attrs, node_kernels,
# node_label, edge_kernels, edge_label, lmda)
# Kmatrix[i][j] = result
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
return Kmatrix


def wrapper_cg_unlabled_do(lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _cg_unlabled_do(G_Awl[i], G_Awl[j], lmda)


def _cg_unlabled_do(A_wave1, A_wave2, lmda):
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
w_times = kron(A_wave1, A_wave2).todense()
A = identity(w_times.shape[0]) - w_times * lmda
b = np.full((nb_pd, 1), p_times_uni)
x, _ = cg(A, b)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, x)


def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, ompute kernels between all pairs of nodes, method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge.
p_times_uni = 1 / w_dim
A = identity(w_times.shape[0]) - w_times * lmda
b = np.full((w_dim, 1), p_times_uni)
x, _ = cg(A, b)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)


###############################################################################
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""

Kmatrix = np.zeros((len(Gn), len(Gn)))
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list accually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# # use uniform distribution if there is no prior knowledge.
# nb_pd = len(A_wave_list[i]) * len(A_wave_list[j])
# p_times_uni = 1 / nb_pd
# w_times = kron(A_wave_list[i], A_wave_list[j]).todense()
# p_times = np.full((nb_pd, 1), p_times_uni)
# x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times))
# # use uniform distribution if there is no prior knowledge.
# q_times = np.full((1, nb_pd), p_times_uni)
# Kmatrix[i][j] = np.dot(q_times, x)
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in tqdm(
Gn, desc='reindex vertices', file=sys.stdout)]
if p == None and q == None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)
return Kmatrix


def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, ompute kernels between all pairs of nodes, method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge.
p_times_uni = 1 / w_dim
p_times = np.full((w_dim, 1), p_times_uni)
x = fixed_point(func_fp, p_times, args=(p_times, lmda, w_times),
xtol=1e-06, maxiter=1000)
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, w_dim), p_times_uni)
return np.dot(q_times, x)


def func_fp(x, p_times, lmda, w_times):
haha = w_times * x
haha = lmda * haha
haha = p_times + haha
return p_times + lmda * np.dot(w_times, x)


###############################################################################
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs):
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using
spectral decomposition method. Labels will be ignored.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
# precompute the spectral decomposition of each graph.
P_list = []
D_list = []
for G in tqdm(Gn, desc='spectral decompose', file=sys.stdout):
# don't normalize adjacency matrices if q is a uniform vector. Note
# A accually is the transpose of the adjacency matrix.
A = nx.adjacency_matrix(G, eweight).todense().transpose()
ew, ev = np.linalg.eig(A)
D_list.append(ew)
P_list.append(ev)
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?

if p == None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn]
# q_T_list = [q.T for q in q_list]
def init_worker(q_T_toshare, P_toshare, D_toshare):
global G_q_T, G_P, G_D
G_q_T = q_T_toshare
G_P = P_toshare
G_D = D_toshare
do_partial = partial(wrapper_sd_do, weight, sub_kernel)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(q_T_list, P_list, D_list), n_jobs=n_jobs)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# result = _sd_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j],
# D_list[i], D_list[j], weight, sub_kernel)
# Kmatrix[i][j] = result
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
return Kmatrix


def wrapper_sd_do(weight, sub_kernel, itr):
i = itr[0]
j = itr[1]
return i, j, _sd_do(G_q_T[i], G_q_T[j], G_P[i], G_P[j], G_D[i], G_D[j],
weight, sub_kernel)


def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
# use uniform distribution if there is no prior knowledge.
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense()
# @todo: this is not be needed when p = q (kr = kl.T) for undirected graphs
# kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense()
if sub_kernel == 'exp':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(np.exp(weight * D_diag))
elif sub_kernel == 'geo':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(weight * D_diag)
kmiddle = np.identity(len(kmiddle)) - weight * kmiddle
kmiddle = np.linalg.inv(kmiddle)
return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0]


###############################################################################
def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
pass


###############################################################################
def getLabels(Gn, node_label, edge_label, directed):
"""Get symbolic labels of a graph dataset, where vertex labels are dealt
with by concatenating them to the edge labels of adjacent edges.
"""
label_list = []
label_set = set()
for g in Gn:
label_g = {}
for e in g.edges(data=True):
nl1 = g.node[e[0]][node_label]
nl2 = g.node[e[1]][node_label]
if not directed and nl1 > nl2:
nl1, nl2 = nl2, nl1
label = (nl1, e[2][edge_label], nl2)
label_g[(e[0], e[1])] = label
label_list.append(label_g)
label_set = set([l for lg in label_list for l in lg.values()])
return label_list, len(label_set)


def filterGramMatrix(gmt, label_dict, label, directed):
"""Compute (the transpose of) the Gram matrix filtered by a label.
"""
gmf = np.zeros(gmt.shape)
for (n1, n2), l in label_dict.items():
if l == label:
gmf[n2, n1] = gmt[n2, n1]
if not directed:
gmf[n1, n2] = gmt[n1, n2]
return gmf


def computeVK(g1, g2, ds_attrs, node_kernels, node_label):
'''Compute vertex kernels between vertices of two graphs.
'''
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
pass
return vk_dict


def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):
'''Compute weight matrix of the direct product graph.
'''
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
w_times = np.zeros((w_dim, w_dim))
if vk_dict: # node labeled
if ds_attrs['is_directed']:
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* vk_dict[(e1[1], e2[1])]
else: # undirected
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* ek_temp * vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* ek_temp * vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = vk_dict[(e1[0], e2[0])] \
* vk_dict[(e1[1], e2[1])] \
+ vk_dict[(e1[0], e2[1])] \
* vk_dict[(e1[1], e2[0])]
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else: # node unlabeled
if ds_attrs['is_directed']:
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = 1
else: # undirected
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2]['attributes'], e2[2]['attributes'])
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = ek_temp
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
# edge unlabeled
else:
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0],
e1[1] * nx.number_of_nodes(g2) + e2[1])
w_times[w_idx] = 1
w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1],
e1[1] * nx.number_of_nodes(g2) + e2[0])
w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
return w_times, w_dim

+ 0
- 200
gklearn/kernels/else/sp_sym.py View File

@@ -1,200 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 21 18:02:00 2018

@author: ljia
"""

import sys
import time
from itertools import product
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm

import networkx as nx
import numpy as np

from gklearn.utils.utils import getSPGraph
from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm
sys.path.insert(0, "../")

def spkernel(*args,
node_label='atom',
edge_weight=None,
node_kernels=None,
n_jobs=None):
"""Calculate shortest-path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_weight : string
Edge attribute name corresponding to the edge weight.
node_kernels: dict
A dictionary of kernel functions for nodes, including 3 items: 'symb'
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
for both labels. The first 2 functions take two node labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two nodes. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns an
number as the kernel value. Ignored when nodes are unlabeled.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the sp kernel between 2 praphs.
"""
# pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
weight = None
if edge_weight is None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, (float, int)):
weight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'is_directed'],
node_label=node_label)
ds_attrs['node_attr_dim'] = 0

# remove graphs with no edges, as no sp can be found in their structures,
# so the kernel between such a graph and itself will be zero.
len_gn = len(Gn)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
if len(Gn) != len_gn:
print('\n %d graphs are removed as they don\'t contain edges.\n' %
(len_gn - len(Gn)))

start_time = time.time()

pool = Pool(n_jobs)
# get shortest path graphs of Gn
getsp_partial = partial(wrapper_getSPGraph, weight)
itr = zip(Gn, range(0, len(Gn)))
if len(Gn) < 100 * n_jobs:
# # use default chunksize as pool.map when iterable is less than 100
# chunksize, extra = divmod(len(Gn), n_jobs * 4)
# if extra:
# chunksize += 1
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 100
for i, g in tqdm(
pool.imap_unordered(getsp_partial, itr, chunksize),
desc='getting sp graphs', file=sys.stdout):
Gn[i] = g
pool.close()
pool.join()

Kmatrix = np.zeros((len(Gn), len(Gn)))

# ---- use pool.imap_unordered to parallel and track progress. ----
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_sp_do, ds_attrs, node_label, node_kernels)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs)

run_time = time.time() - start_time
print(
"\n --- shortest path kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time, idx


def spkernel_do(g1, g2, ds_attrs, node_label, node_kernels):
kernel = 0

# compute shortest path matrices first, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kernel += 1
return kernel

# compute graph kernels
if ds_attrs['is_directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kernel += kn1
else:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
# each edge walk is counted twice, starting from both its extreme nodes.
nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(
e1[0], e2[1])], vk_dict[(e1[1],
e2[0])], vk_dict[(e1[1],
e2[1])]
kn1 = nk11 * nk22
kn2 = nk12 * nk21
kernel += kn1 + kn2

return kernel


def wrapper_sp_do(ds_attrs, node_label, node_kernels, itr):
i = itr[0]
j = itr[1]
return i, j, spkernel_do(G_gn[i], G_gn[j], ds_attrs, node_label, node_kernels)


def wrapper_getSPGraph(weight, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, getSPGraph(g, edge_weight=weight)

+ 0
- 464
gklearn/kernels/else/ssp_sym.py View File

@@ -1,464 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:42:48 2018

@author: ljia
"""

import sys
import time
from itertools import combinations, product
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm

import networkx as nx
import numpy as np

from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm

sys.path.insert(0, "../")


def structuralspkernel(*args,
node_label='atom',
edge_weight=None,
edge_label='bond_type',
node_kernels=None,
edge_kernels=None,
n_jobs=None):
"""Calculate mean average structural shortest path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_weight : string
Edge attribute name corresponding to the edge weight.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
node_kernels: dict
A dictionary of kernel functions for nodes, including 3 items: 'symb'
for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
for both labels. The first 2 functions take two node labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two nodes. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns a number
as the kernel value. Ignored when nodes are unlabeled.
edge_kernels: dict
A dictionary of kernel functions for edges, including 3 items: 'symb'
for symbolic edge labels, 'nsymb' for non-symbolic edge labels, 'mix'
for both labels. The first 2 functions take two edge labels as
parameters, and the 'mix' function takes 4 parameters, a symbolic and a
non-symbolic label for each the two edges. Each label is in form of 2-D
dimension array (n_samples, n_features). Each function returns a number
as the kernel value. Ignored when edges are unlabeled.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the mean average structural
shortest path kernel between 2 praphs.
"""
# pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
weight = None
if edge_weight is None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, (float, int)):
weight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'],
node_label=node_label, edge_label=edge_label)
ds_attrs['node_attr_dim'] = 0
ds_attrs['edge_attr_dim'] = 0

start_time = time.time()

# get shortest paths of each graph in Gn
splist = [None] * len(Gn)
pool = Pool(n_jobs)
# get shortest path graphs of Gn
getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed'])
itr = zip(Gn, range(0, len(Gn)))
if len(Gn) < 100 * n_jobs:
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 100
# chunksize = 300 # int(len(list(itr)) / n_jobs)
for i, sp in tqdm(
pool.imap_unordered(getsp_partial, itr, chunksize),
desc='getting shortest paths',
file=sys.stdout):
splist[i] = sp
# time.sleep(10)
pool.close()
pool.join()
# # get shortest paths of each graph in Gn
# splist = [[] for _ in range(len(Gn))]
# # get shortest path graphs of Gn
# getsp_partial = partial(wrapper_getSP, weight, ds_attrs['is_directed'])
# itr = zip(Gn, range(0, len(Gn)))
# if len(Gn) < 1000 * n_jobs:
# chunksize = int(len(Gn) / n_jobs) + 1
# else:
# chunksize = 1000
# # chunksize = 300 # int(len(list(itr)) / n_jobs)
# from contextlib import closing
# with closing(Pool(n_jobs)) as pool:
## for i, sp in tqdm(
# res = pool.imap_unordered(getsp_partial, itr, 10)
## desc='getting shortest paths',
## file=sys.stdout):
## splist[i] = sp
## time.sleep(10)
# pool.close()
# pool.join()
# ss = 0
# ss += sys.getsizeof(splist)
# for spss in splist:
# ss += sys.getsizeof(spss)
# for spp in spss:
# ss += sys.getsizeof(spp)
# time.sleep(20)
# # ---- direct running, normally use single CPU core. ----
# splist = []
# for g in tqdm(Gn, desc='getting sp graphs', file=sys.stdout):
# splist.append(get_shortest_paths(g, weight, ds_attrs['is_directed']))

# # ---- only for the Fast Computation of Shortest Path Kernel (FCSP)
# sp_ml = [0] * len(Gn) # shortest path matrices
# for i in result_sp:
# sp_ml[i[0]] = i[1]
# edge_x_g = [[] for i in range(len(sp_ml))]
# edge_y_g = [[] for i in range(len(sp_ml))]
# edge_w_g = [[] for i in range(len(sp_ml))]
# for idx, item in enumerate(sp_ml):
# for i1 in range(len(item)):
# for i2 in range(i1 + 1, len(item)):
# if item[i1, i2] != np.inf:
# edge_x_g[idx].append(i1)
# edge_y_g[idx].append(i2)
# edge_w_g[idx].append(item[i1, i2])
# print(len(edge_x_g[0]))
# print(len(edge_y_g[0]))
# print(len(edge_w_g[0]))

Kmatrix = np.zeros((len(Gn), len(Gn)))
# ---- use pool.imap_unordered to parallel and track progress. ----
def init_worker(spl_toshare, gs_toshare):
global G_spl, G_gs
G_spl = spl_toshare
G_gs = gs_toshare
do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
node_kernels, edge_kernels)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(splist, Gn), n_jobs=n_jobs)

# # ---- use pool.imap_unordered to parallel and track progress. ----
# pool = Pool(n_jobs)
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 1000 * n_jobs:
# chunksize = int(len_itr / n_jobs) + 1
# else:
# chunksize = 1000
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, chunksize),
# desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()
# # ---- use pool.map to parallel. ----
# pool = Pool(n_jobs)
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# for i, j, kernel in tqdm(
# pool.map(do_partial, itr), desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()

# # ---- use pool.imap_unordered to parallel and track progress. ----
# do_partial = partial(wrapper_ssp_do, ds_attrs, node_label, edge_label,
# node_kernels, edge_kernels)
# itr = zip(combinations_with_replacement(Gn, 2),
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
# if len_itr < 1000 * n_jobs:
# chunksize = int(len_itr / n_jobs) + 1
# else:
# chunksize = 1000
# from contextlib import closing
# with closing(Pool(n_jobs)) as pool:
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, 1000),
# desc='calculating kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# pool.close()
# pool.join()


# # ---- direct running, normally use single CPU core. ----
# from itertools import combinations_with_replacement
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# kernel = structuralspkernel_do(Gn[i], Gn[j], splist[i], splist[j],
# ds_attrs, node_label, edge_label, node_kernels, edge_kernels)
## if(kernel > 1):
## print("error here ")
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel

run_time = time.time() - start_time
print(
"\n --- shortest path kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time


def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
node_kernels, edge_kernels):
kernel = 0

# First, compute shortest path matrices, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
n1[1]['attributes'], n2[1]['attributes'])
# node symb labeled
else:
kn = node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1]['attributes'],
n2[1]['attributes'])
# node unlabeled
else:
pass

# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
e1[2]['attributes'], e2[2]['attributes'])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled
else:
ke = edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = kn(e1[2]['attributes'], e2[2]['attributes'])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge unlabeled
else:
pass

# compute graph kernels
if vk_dict:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])] * \
ek_dict[((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
kernel += 1
else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= ek_dict[((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kernel += 1

kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average

# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernel matrix
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])

# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# Kmatrix += kn1 + kn2
return kernel


def wrapper_ssp_do(ds_attrs, node_label, edge_label, node_kernels,
edge_kernels, itr):
i = itr[0]
j = itr[1]
return i, j, structuralspkernel_do(G_gs[i], G_gs[j], G_spl[i], G_spl[j],
ds_attrs, node_label, edge_label,
node_kernels, edge_kernels)


def get_shortest_paths(G, weight, directed):
"""Get all shortest paths of a graph.

Parameters
----------
G : NetworkX graphs
The graphs whose paths are calculated.
weight : string/None
edge attribute used as weight to calculate the shortest path.
directed: boolean
Whether graph is directed.

Return
------
sp : list of list
List of shortest paths of the graph, where each path is represented by a list of nodes.
"""
sp = []
for n1, n2 in combinations(G.nodes(), 2):
try:
spltemp = list(nx.all_shortest_paths(G, n1, n2, weight=weight))
except nx.NetworkXNoPath: # nodes not connected
# sp.append([])
pass
else:
sp += spltemp
# each edge walk is counted twice, starting from both its extreme nodes.
if not directed:
sp += [sptemp[::-1] for sptemp in spltemp]
# add single nodes as length 0 paths.
sp += [[n] for n in G.nodes()]
return sp


def wrapper_getSP(weight, directed, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, get_shortest_paths(g, weight, directed)

+ 0
- 147
gklearn/kernels/unfinished/cyclicPatternKernel.py View File

@@ -1,147 +0,0 @@
"""
@author: linlin <jajupmochi@gmail.com>
@references:
[1] Tamás Horváth, Thomas Gärtner, and Stefan Wrobel. Cyclic pattern kernels for predictive graph mining. In Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining, pages 158–167. ACM, 2004.
[2] Hopcroft, J.; Tarjan, R. (1973). “Efficient algorithms for graph manipulation”. Communications of the ACM 16: 372–378. doi:10.1145/362248.362272.
[3] Finding all the elementary circuits of a directed graph. D. B. Johnson, SIAM Journal on Computing 4, no. 1, 77-84, 1975. http://dx.doi.org/10.1137/0204007
"""

import sys
import pathlib
sys.path.insert(0, "../")
import time

import networkx as nx
import numpy as np

from tqdm import tqdm


def cyclicpatternkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None):
"""Calculate cyclic pattern graph kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.
depth : integer
Depth of search. Longest length of paths.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
"""
Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list
Kmatrix = np.zeros((len(Gn), len(Gn)))

start_time = time.time()

# get all cyclic and tree patterns of all graphs before calculating kernels to save time, but this may consume a lot of memory for large dataset.
all_patterns = [ get_patterns(Gn[i], node_label=node_label, edge_label = edge_label, labeled = labeled, cycle_bound = cycle_bound)
for i in tqdm(range(0, len(Gn)), desc='retrieve patterns', file=sys.stdout) ]

for i in tqdm(range(0, len(Gn)), desc='calculate kernels', file=sys.stdout):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _cyclicpatternkernel_do(all_patterns[i], all_patterns[j])
Kmatrix[j][i] = Kmatrix[i][j]

run_time = time.time() - start_time
print("\n --- kernel matrix of cyclic pattern kernel of size %d built in %s seconds ---" % (len(Gn), run_time))

return Kmatrix, run_time


def _cyclicpatternkernel_do(patterns1, patterns2):
"""Calculate path graph kernels up to depth d between 2 graphs.

Parameters
----------
paths1, paths2 : list
List of paths in 2 graphs, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path.
k_func : function
A kernel function used using different notions of fingerprint similarity.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.

Return
------
kernel : float
Treelet Kernel between 2 graphs.
"""
return len(set(patterns1) & set(patterns2))


def get_patterns(G, node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = None):
"""Find all cyclic and tree patterns in a graph.

Parameters
----------
G : NetworkX graphs
The graph in which paths are searched.
length : integer
The maximum length of paths.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.

Return
------
path : list
List of paths retrieved, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path.
"""
number_simplecycles = 0
bridges = nx.Graph()
patterns = []

bicomponents = nx.biconnected_component_subgraphs(G) # all biconnected components of G. this function use algorithm in reference [2], which (i guess) is slightly different from the one used in paper [1]
for subgraph in bicomponents:
if nx.number_of_edges(subgraph) > 1:
simple_cycles = list(nx.simple_cycles(G.to_directed())) # all simple cycles in biconnected components. this function use algorithm in reference [3], which has time complexity O((n+e)(N+1)) for n nodes, e edges and N simple cycles. Which might be slower than the algorithm applied in paper [1]
if cycle_bound != None and len(simple_cycles) > cycle_bound - number_simplecycles: # in paper [1], when applying another algorithm (subroutine RT), this becomes len(simple_cycles) == cycle_bound - number_simplecycles + 1, check again.
return []
else:

# calculate canonical representation for each simple cycle
all_canonkeys = []
for cycle in simple_cycles:
canonlist = [ G.node[node][node_label] + G[node][cycle[cycle.index(node) + 1]][edge_label] for node in cycle[:-1] ]
canonkey = ''.join(canonlist)
canonkey = canonkey if canonkey < canonkey[::-1] else canonkey[::-1]
for i in range(1, len(cycle[:-1])):
canonlist = [ G.node[node][node_label] + G[node][cycle[cycle.index(node) + 1]][edge_label] for node in cycle[i:-1] + cycle[:i] ]
canonkey_t = ''.join(canonlist)
canonkey_t = canonkey_t if canonkey_t < canonkey_t[::-1] else canonkey_t[::-1]
canonkey = canonkey if canonkey < canonkey_t else canonkey_t
all_canonkeys.append(canonkey)

patterns = list(set(patterns) | set(all_canonkeys))
number_simplecycles += len(simple_cycles)
else:
bridges.add_edges_from(subgraph.edges(data=True))

# calculate canonical representation for each connected component in bridge set
components = list(nx.connected_component_subgraphs(bridges)) # all connected components in the bridge
tree_patterns = []
for tree in components:
break



# patterns += pi(bridges)
return patterns

+ 0
- 234
gklearn/kernels/unfinished/pathKernel.py View File

@@ -1,234 +0,0 @@
"""
@author: linlin
@references: Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
"""

import sys
import pathlib
sys.path.insert(0, "../")
import time
import itertools
from tqdm import tqdm

import networkx as nx
import numpy as np

from gklearn.kernels.deltaKernel import deltakernel
from gklearn.utils.graphdataset import get_dataset_attributes


def pathkernel(*args, node_label='atom', edge_label='bond_type'):
"""Calculate mean average path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.

Return
------
Kmatrix/kernel : Numpy matrix/float
Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs.
"""
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
node_label=node_label,
edge_label=edge_label)
try:
some_weight = list(nx.get_edge_attributes(Gn[0],
edge_label).values())[0]
weight = edge_label if isinstance(some_weight, float) or isinstance(
some_weight, int) else None
except:
weight = None

start_time = time.time()

splist = [
get_shortest_paths(Gn[i], weight) for i in tqdm(
range(0, len(Gn)), desc='getting shortest paths', file=sys.stdout)
]

pbar = tqdm(
total=((len(Gn) + 1) * len(Gn) / 2),
desc='calculating kernels',
file=sys.stdout)
if ds_attrs['node_labeled']:
if ds_attrs['edge_labeled']:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _pathkernel_do_l(Gn[i], Gn[j], splist[i],
splist[j], node_label,
edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
else:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _pathkernel_do_nl(Gn[i], Gn[j], splist[i],
splist[j], node_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

else:
if ds_attrs['edge_labeled']:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _pathkernel_do_el(Gn[i], Gn[j], splist[i],
splist[j], edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
else:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _pathkernel_do_unl(Gn[i], Gn[j], splist[i],
splist[j])
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

run_time = time.time() - start_time
print(
"\n --- mean average path kernel matrix of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time


def _pathkernel_do_l(G1, G2, sp1, sp2, node_label, edge_label):
"""Calculate mean average path kernel between 2 fully-labeled graphs.

Parameters
----------
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
sp1, sp2 : list of list
List of shortest paths of 2 graphs, where each path is represented by a list of nodes.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.

Return
------
kernel : float
Path Kernel between 2 graphs.
"""
# calculate kernel
kernel = 0
# if len(sp1) == 0 or len(sp2) == 0:
# return 0 # @todo: should it be zero?
for path1 in sp1:
for path2 in sp2:
if len(path1) == len(path2):
kernel_path = (G1.node[path1[0]][node_label] == G2.node[path2[
0]][node_label])
if kernel_path:
for i in range(1, len(path1)):
# kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
if G1[path1[i - 1]][path1[i]][edge_label] != G2[path2[i - 1]][path2[i]][edge_label] or G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]:
kernel_path = 0
break
kernel += kernel_path # add up kernels of all paths

kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average

return kernel


def _pathkernel_do_nl(G1, G2, sp1, sp2, node_label):
"""Calculate mean average path kernel between 2 node-labeled graphs.
"""
# calculate kernel
kernel = 0
# if len(sp1) == 0 or len(sp2) == 0:
# return 0 # @todo: should it be zero?
for path1 in sp1:
for path2 in sp2:
if len(path1) == len(path2):
kernel_path = 1
for i in range(0, len(path1)):
# kernel = 1 if all corresponding nodes in the 2 paths have same labels, otherwise 0
if G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]:
kernel_path = 0
break
kernel += kernel_path

kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average

return kernel


def _pathkernel_do_el(G1, G2, sp1, sp2, edge_label):
"""Calculate mean average path kernel between 2 edge-labeled graphs.
"""
# calculate kernel
kernel = 0
for path1 in sp1:
for path2 in sp2:
if len(path1) == len(path2):
if len(path1) == 0:
kernel += 1
else:
kernel_path = 1
for i in range(0, len(path1) - 1):
# kernel = 1 if all corresponding edges in the 2 paths have same labels, otherwise 0
if G1[path1[i]][path1[i + 1]][edge_label] != G2[path2[
i]][path2[i + 1]][edge_label]:
kernel_path = 0
break
kernel += kernel_path

kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average

return kernel


def _pathkernel_do_unl(G1, G2, sp1, sp2):
"""Calculate mean average path kernel between 2 unlabeled graphs.
"""
# calculate kernel
kernel = 0
for path1 in sp1:
for path2 in sp2:
if len(path1) == len(path2):
kernel += 1

kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average

return kernel


def get_shortest_paths(G, weight):
"""Get all shortest paths of a graph.

Parameters
----------
G : NetworkX graphs
The graphs whose paths are calculated.
weight : string/None
edge attribute used as weight to calculate the shortest path.

Return
------
sp : list of list
List of shortest paths of the graph, where each path is represented by a list of nodes.
"""
sp = []
for n1, n2 in itertools.combinations(G.nodes(), 2):
try:
sp.append(nx.shortest_path(G, n1, n2, weight=weight))
except nx.NetworkXNoPath: # nodes not connected
sp.append([])
# add single nodes as length 0 paths.
sp += [[n] for n in G.nodes()]
return sp

+ 0
- 241
gklearn/kernels/unfinished/treePatternKernel.py View File

@@ -1,241 +0,0 @@
"""
@author: linlin
@references: Pierre Mahé and Jean-Philippe Vert. Graph kernels based on tree patterns for molecules. Machine learning, 75(1):3–35, 2009.
"""

import sys
import pathlib
sys.path.insert(0, "../")
import time

import networkx as nx
import numpy as np

from collections import Counter
from tqdm import tqdm
tqdm.monitor_interval = 0

from gklearn.utils.utils import untotterTransformation


def treepatternkernel(*args,
node_label='atom',
edge_label='bond_type',
labeled=True,
kernel_type='untiln',
lmda=1,
h=1,
remove_totters=True):
"""Calculate tree pattern graph kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.
kernel_type : string
Type of tree pattern kernel, could be 'untiln', 'size' or 'branching'.
lmda : float
Weight to decide whether linear patterns or trees pattern of increasing complexity are favored.
h : integer
The upper bound of the height of tree patterns.
remove_totters : boolean
whether to remove totters. The default value is True.

Return
------
Kmatrix: Numpy matrix
Kernel matrix, each element of which is the tree pattern graph kernel between 2 praphs.
"""
if h < 1:
raise Exception('h > 0 is requested.')
kernel_type = kernel_type.lower()
# arrange all graphs in a list
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))
h = int(h)

start_time = time.time()

if remove_totters:
Gn = [untotterTransformation(G, node_label, edge_label) for G in Gn]

pbar = tqdm(
total=(1 + len(Gn)) * len(Gn) / 2,
desc='calculate kernels',
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _treepatternkernel_do(Gn[i], Gn[j], node_label,
edge_label, labeled,
kernel_type, lmda, h)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

run_time = time.time() - start_time
print(
"\n --- kernel matrix of tree pattern kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time


def _treepatternkernel_do(G1, G2, node_label, edge_label, labeled, kernel_type,
lmda, h):
"""Calculate tree pattern graph kernels between 2 graphs.

Parameters
----------
paths1, paths2 : list
List of paths in 2 graphs, where for unlabeled graphs, each path is represented by a list of nodes; while for labeled graphs, each path is represented by a string consists of labels of nodes and edges on that path.
k_func : function
A kernel function used using different notions of fingerprint similarity.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
labeled : boolean
Whether the graphs are labeled. The default is True.
kernel_type : string
Type of tree pattern kernel, could be 'untiln', 'size' or 'branching'.
lmda : float
Weight to decide whether linear patterns or trees pattern of increasing complexity are favored.
h : integer
The upper bound of the height of tree patterns.

Return
------
kernel : float
Treelet Kernel between 2 graphs.
"""

def matchingset(n1, n2):
"""Get neiborhood matching set of two nodes in two graphs.
"""

def mset_com(allpairs, length):
"""Find all sets R of pairs by combination.
"""
if length == 1:
mset = [[pair] for pair in allpairs]
return mset, mset
else:
mset, mset_l = mset_com(allpairs, length - 1)
mset_tmp = []
for pairset in mset_l: # for each pair set of length l-1
nodeset1 = [pair[0] for pair in pairset
] # nodes already in the set
nodeset2 = [pair[1] for pair in pairset]
for pair in allpairs:
if (pair[0] not in nodeset1) and (
pair[1] not in nodeset2
): # nodes in R should be unique
mset_tmp.append(
pairset + [pair]
) # add this pair to the pair set of length l-1, constructing a new set of length l
nodeset1.append(pair[0])
nodeset2.append(pair[1])

mset.extend(mset_tmp)

return mset, mset_tmp

allpairs = [
] # all pairs those have the same node labels and edge labels
for neighbor1 in G1[n1]:
for neighbor2 in G2[n2]:
if G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label] \
and G1[n1][neighbor1][edge_label] == G2[n2][neighbor2][edge_label]:
allpairs.append([neighbor1, neighbor2])

if allpairs != []:
mset, _ = mset_com(allpairs, len(allpairs))
else:
mset = []

return mset

def kernel_h(h):
"""Calculate kernel of h-th iteration.
"""

if kernel_type == 'untiln':
all_kh = { str(n1) + '.' + str(n2) : (G1.node[n1][node_label] == G2.node[n2][node_label]) \
for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ]
all_kh_tmp = all_kh.copy()
for i in range(2, h + 1):
for n1 in G1.nodes():
for n2 in G2.nodes():
kh = 0
mset = all_msets[str(n1) + '.' + str(n2)]
for R in mset:
kh_tmp = 1
for pair in R:
kh_tmp *= lmda * all_kh[str(pair[0])
+ '.' + str(pair[1])]
kh += 1 / lmda * kh_tmp
kh = (G1.node[n1][node_label] == G2.node[n2][
node_label]) * (1 + kh)
all_kh_tmp[str(n1) + '.' + str(n2)] = kh
all_kh = all_kh_tmp.copy()

elif kernel_type == 'size':
all_kh = { str(n1) + '.' + str(n2) : lmda * (G1.node[n1][node_label] == G2.node[n2][node_label]) \
for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ]
all_kh_tmp = all_kh.copy()
for i in range(2, h + 1):
for n1 in G1.nodes():
for n2 in G2.nodes():
kh = 0
mset = all_msets[str(n1) + '.' + str(n2)]
for R in mset:
kh_tmp = 1
for pair in R:
kh_tmp *= lmda * all_kh[str(pair[0])
+ '.' + str(pair[1])]
kh += kh_tmp
kh *= lmda * (
G1.node[n1][node_label] == G2.node[n2][node_label])
all_kh_tmp[str(n1) + '.' + str(n2)] = kh
all_kh = all_kh_tmp.copy()

elif kernel_type == 'branching':
all_kh = { str(n1) + '.' + str(n2) : (G1.node[n1][node_label] == G2.node[n2][node_label]) \
for n1 in G1.nodes() for n2 in G2.nodes() } # kernels between all pair of nodes with h = 1 ]
all_kh_tmp = all_kh.copy()
for i in range(2, h + 1):
for n1 in G1.nodes():
for n2 in G2.nodes():
kh = 0
mset = all_msets[str(n1) + '.' + str(n2)]
for R in mset:
kh_tmp = 1
for pair in R:
kh_tmp *= lmda * all_kh[str(pair[0])
+ '.' + str(pair[1])]
kh += 1 / lmda * kh_tmp
kh *= (
G1.node[n1][node_label] == G2.node[n2][node_label])
all_kh_tmp[str(n1) + '.' + str(n2)] = kh
all_kh = all_kh_tmp.copy()

return all_kh

# calculate matching sets for every pair of nodes at first to avoid calculating in every iteration.
all_msets = ({ str(node1) + '.' + str(node2) : matchingset(node1, node2) for node1 in G1.nodes() \
for node2 in G2.nodes() } if h > 1 else {})

all_kh = kernel_h(h)
kernel = sum(all_kh.values())

if kernel_type == 'size':
kernel = kernel / (lmda**h)

return kernel

+ 0
- 403
gklearn/kernels/unfinished/weisfeilerLehmanKernel.py View File

@@ -1,403 +0,0 @@
"""
@author: linlin
@references:
[1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61.
"""

import sys
import pathlib
from collections import Counter
sys.path.insert(0, "../")

import networkx as nx
import numpy as np
import time

from gklearn.kernels.pathKernel import pathkernel

def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', height = 0, base_kernel = 'subtree'):
"""Calculate Weisfeiler-Lehman kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
/
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
height : int
subtree height
base_kernel : string
base kernel used in each iteration of WL kernel. The default base kernel is subtree kernel. For user-defined kernel, base_kernel is the name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.

Notes
-----
This function now supports WL subtree kernel, WL shortest path kernel and WL edge kernel.
"""
base_kernel = base_kernel.lower()
Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list
Kmatrix = np.zeros((len(Gn), len(Gn)))

start_time = time.time()

# for WL subtree kernel
if base_kernel == 'subtree':
Kmatrix = _wl_subtreekernel_do(args[0], node_label, edge_label, height)

# for WL shortest path kernel
elif base_kernel == 'sp':
Kmatrix = _wl_spkernel_do(args[0], node_label, edge_label, height)

# for WL edge kernel
elif base_kernel == 'edge':
Kmatrix = _wl_edgekernel_do(args[0], node_label, edge_label, height)

# for user defined base kernel
else:
Kmatrix = _wl_userkernel_do(args[0], node_label, edge_label, height, base_kernel)

run_time = time.time() - start_time
print("\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---" % (base_kernel, len(args[0]), run_time))

return Kmatrix, run_time



def _wl_subtreekernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman subtree kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.
height : int
subtree height.

Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
height = int(height)
Kmatrix = np.zeros((len(Gn), len(Gn)))
all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs

# initial for height = 0
all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs

# for each graph
for G in Gn:
# get the set of original labels
labels_ori = list(nx.get_node_attributes(G, node_label).values())
all_labels_ori.update(labels_ori)
num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph
all_num_of_each_label.append(num_of_each_label)
num_of_labels = len(num_of_each_label) # number of all unique labels

all_labels_ori.update(labels_ori)

all_num_of_labels_occured += len(all_labels_ori)

# calculate subtree kernel with the 0th iteration and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))
vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])
vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])
Kmatrix[i][j] += np.dot(vector1, vector2.transpose())
Kmatrix[j][i] = Kmatrix[i][j]

# iterate each height
for h in range(1, height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs
all_labels_ori = set()
all_num_of_each_label = []

# for each graph
for idx, G in enumerate(Gn):

set_multisets = []
for node in G.nodes(data = True):
# Multiset-label determination.
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
# sorting each multiset
multiset.sort()
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
set_multisets.append(multiset)

# label compression
set_unique = list(set(set_multisets)) # set of unique multiset labels
# a dictionary mapping original labels to new ones.
set_compressed = {}
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
for value in set_unique:
if value in all_set_compressed.keys():
set_compressed.update({ value : all_set_compressed[value] })
else:
set_compressed.update({ value : str(num_of_labels_occured + 1) })
num_of_labels_occured += 1

all_set_compressed.update(set_compressed)

# relabel nodes
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]

# get the set of compressed labels
labels_comp = list(nx.get_node_attributes(G, node_label).values())
all_labels_ori.update(labels_comp)
num_of_each_label = dict(Counter(labels_comp))
all_num_of_each_label.append(num_of_each_label)

all_num_of_labels_occured += len(all_labels_ori)

# calculate subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))
vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])
vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])
Kmatrix[i][j] += np.dot(vector1, vector2.transpose())
Kmatrix[j][i] = Kmatrix[i][j]

return Kmatrix


def _wl_spkernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.
height : int
subtree height.
Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
from gklearn.utils.utils import getSPGraph
# init.
height = int(height)
Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel

Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn
# initial for height = 0
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
for e2 in Gn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
# iterate each height
for h in range(1, height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
for G in Gn: # for each graph
set_multisets = []
for node in G.nodes(data = True):
# Multiset-label determination.
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
# sorting each multiset
multiset.sort()
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
set_multisets.append(multiset)

# label compression
set_unique = list(set(set_multisets)) # set of unique multiset labels
# a dictionary mapping original labels to new ones.
set_compressed = {}
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
for value in set_unique:
if value in all_set_compressed.keys():
set_compressed.update({ value : all_set_compressed[value] })
else:
set_compressed.update({ value : str(num_of_labels_occured + 1) })
num_of_labels_occured += 1

all_set_compressed.update(set_compressed)
# relabel nodes
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
for e2 in Gn[j].edges(data = True):
if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix



def _wl_edgekernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman edge kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.
height : int
subtree height.
Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
# init.
height = int(height)
Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel
# initial for height = 0
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
for e2 in Gn[j].edges(data = True):
if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
# iterate each height
for h in range(1, height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
for G in Gn: # for each graph
set_multisets = []
for node in G.nodes(data = True):
# Multiset-label determination.
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
# sorting each multiset
multiset.sort()
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
set_multisets.append(multiset)

# label compression
set_unique = list(set(set_multisets)) # set of unique multiset labels
# a dictionary mapping original labels to new ones.
set_compressed = {}
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
for value in set_unique:
if value in all_set_compressed.keys():
set_compressed.update({ value : all_set_compressed[value] })
else:
set_compressed.update({ value : str(num_of_labels_occured + 1) })
num_of_labels_occured += 1

all_set_compressed.update(set_compressed)
# relabel nodes
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
for e2 in Gn[j].edges(data = True):
if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix


def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.
height : int
subtree height.
base_kernel : string
Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs.
Return
------
Kmatrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
# init.
height = int(height)
Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel
# initial for height = 0
Kmatrix = base_kernel(Gn, node_label, edge_label)
# iterate each height
for h in range(1, height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
for G in Gn: # for each graph
set_multisets = []
for node in G.nodes(data = True):
# Multiset-label determination.
multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
# sorting each multiset
multiset.sort()
multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
set_multisets.append(multiset)

# label compression
set_unique = list(set(set_multisets)) # set of unique multiset labels
# a dictionary mapping original labels to new ones.
set_compressed = {}
# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
for value in set_unique:
if value in all_set_compressed.keys():
set_compressed.update({ value : all_set_compressed[value] })
else:
set_compressed.update({ value : str(num_of_labels_occured + 1) })
num_of_labels_occured += 1

all_set_compressed.update(set_compressed)
# relabel nodes
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate kernel with h iterations and add it to the final kernel
Kmatrix += base_kernel(Gn, node_label, edge_label)
return Kmatrix

+ 0
- 16
gklearn/utils/isNotebook.py View File

@@ -1,16 +0,0 @@
""" Functions for python system.
"""

def isNotebook():
"""check if code is executed in the IPython notebook.
"""
try:
shell = get_ipython().__class__.__name__
if shell == 'ZMQInteractiveShell':
return True # Jupyter notebook or qtconsole
elif shell == 'TerminalInteractiveShell':
return False # Terminal running IPython
else:
return False # Other type (?)
except NameError:
return False # Probably standard Python interpreter

+ 0
- 27
gklearn/utils/logger2file.py View File

@@ -1,27 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 8 14:21:25 2019

@author: ljia
"""

import sys
import time

class Logger(object):
def __init__(self):
self.terminal = sys.stdout
self.log = open("log." + str(time.time()) + ".log", "a")

def write(self, message):
self.terminal.write(message)
self.log.write(message)

def flush(self):
#this flush method is needed for python 3 compatibility.
#this handles the flush command by doing nothing.
#you might want to specify some extra behavior here.
pass

sys.stdout = Logger()

+ 0
- 86
gklearn/utils/unfinished/openblassettings.py View File

@@ -1,86 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 19 15:31:01 2018
A script to set the thread number of OpenBLAS (if used).
Some modules (such as Numpy, Scipy, sklearn) using OpenBLAS perform parallel
computation automatically, which causes conflict when other paralleling modules
such as multiprossing.Pool, highly increase the computing time. By setting
thread to 1, OpenBLAS is forced to use single thread/CPU, thus this conflict
can be avoided.
-e.g:
with num_threads(8):
np.dot(x, y)
@author: ali_m
@Reference: `ali_m's answer <https://stackoverflow.com/a/29582987>`__, 2018.12
"""

import contextlib
import ctypes
from ctypes.util import find_library
import os

# Prioritize hand-compiled OpenBLAS library over version in /usr/lib/
# from Ubuntu repos
try_paths = ['/opt/OpenBLAS/lib/libopenblas.so',
'/lib/libopenblas.so',
'/usr/lib/libopenblas.so.0',
find_library('openblas')]
openblas_lib = None
for libpath in try_paths:
try:
openblas_lib = ctypes.cdll.LoadLibrary(libpath)
break
except OSError:
continue
if openblas_lib is None:
raise EnvironmentError('Could not locate an OpenBLAS shared library', 2)


def set_num_threads(n):
"""Set the current number of threads used by the OpenBLAS server."""
openblas_lib.openblas_set_num_threads(int(n))


# At the time of writing these symbols were very new:
# https://github.com/xianyi/OpenBLAS/commit/65a847c
try:
openblas_lib.openblas_get_num_threads()
def get_num_threads():
"""Get the current number of threads used by the OpenBLAS server."""
return openblas_lib.openblas_get_num_threads()
except AttributeError:
def get_num_threads():
"""Dummy function (symbol not present in %s), returns -1."""
return -1
pass

try:
len(os.sched_getaffinity(0))
def get_num_procs():
"""Get the total number of physical processors"""
return len(os.sched_getaffinity(0))
except AttributeError:
def get_num_procs():
"""Dummy function (symbol not present), returns -1."""
return -1
pass


@contextlib.contextmanager
def num_threads(n):
"""Temporarily changes the number of OpenBLAS threads.

Example usage:

print("Before: {}".format(get_num_threads()))
with num_threads(n):
print("In thread context: {}".format(get_num_threads()))
print("After: {}".format(get_num_threads()))
"""
old_n = get_num_threads()
set_num_threads(n)
try:
yield
finally:
set_num_threads(old_n)

+ 0
- 320
gklearn/utils/unused/suffix_tree.py View File

@@ -1,320 +0,0 @@
"""
@author: linlin
@references:
[1] `ptrus/suffix-trees <https://github.com/ptrus/suffix-trees/blob/master/suffix_trees/STree.py>`__, 2018.6
"""

import sys


class STree():
"""Class representing the suffix tree. The generalized suffix tree is supported."""

def __init__(self, input=''):
self.root = _SNode()
self.root.depth = 0
self.root.idx = 0
self.root.parent = self.root
self.root._add_suffix_link(self.root)

if not input == '':
self.build(input)

def _check_input(self, input):
"""Checks the validity of the input.
In case of an invalid input throws ValueError.
"""
if isinstance(input, str):
return 'st'
elif isinstance(input, list):
if all(isinstance(item, str) for item in input):
return 'gst'

raise ValueError("String argument should be of type String or"
" a list of strings")

def build(self, x):
"""Builds the Suffix tree on the given input.
If the input is of type List of Strings:
Generalized Suffix Tree is built.
:param x: String or List of Strings
"""
type = self._check_input(x)

if type == 'st':
x += next(self._terminalSymbolsGenerator())
self._build(x)
if type == 'gst':
self._build_generalized(x)

def _build(self, x):
"""Builds a Suffix tree."""
self.word = x
self._build_McCreight(x)

def _build_McCreight(self, x):
"""Builds a Suffix tree using McCreight O(n) algorithm.
Algorithm based on:
McCreight, Edward M. "A space-economical suffix tree construction algorithm." - ACM, 1976.
Implementation based on:
UH CS - 58093 String Processing Algorithms Lecture Notes
"""
u = self.root
d = 0
for i in range(len(x)):
while u.depth == d and u._has_transition(x[d + i]):
u = u._get_transition_link(x[d + i])
d = d + 1
while d < u.depth and x[u.idx + d] == x[i + d]:
d = d + 1
if d < u.depth:
u = self._create_node(x, u, d)
self._create_leaf(x, i, u, d)
if not u._get_suffix_link():
self._compute_slink(x, u)
u = u._get_suffix_link()
d = d - 1
if d < 0:
d = 0

def _create_node(self, x, u, d):
i = u.idx
p = u.parent
v = _SNode(idx=i, depth=d)
v._add_transition_link(u, x[i + d])
u.parent = v
p._add_transition_link(v, x[i + p.depth])
v.parent = p
return v

def _create_leaf(self, x, i, u, d):
w = _SNode()
w.idx = i
w.depth = len(x) - i
u._add_transition_link(w, x[i + d])
w.parent = u
return w

def _compute_slink(self, x, u):
d = u.depth
v = u.parent._get_suffix_link()
while v.depth < d - 1:
v = v._get_transition_link(x[u.idx + v.depth + 1])
if v.depth > d - 1:
v = self._create_node(x, v, d - 1)
u._add_suffix_link(v)

def _build_Ukkonen(self, x):
"""Builds a Suffix tree using Ukkonen's online O(n) algorithm.
Algorithm based on:
Ukkonen, Esko. "On-line construction of suffix trees." - Algorithmica, 1995.
"""
# TODO.
raise NotImplementedError()

def _build_generalized(self, xs):
"""Builds a Generalized Suffix Tree (GST) from the array of strings provided.
"""
terminal_gen = self._terminalSymbolsGenerator()

_xs = ''.join([x + next(terminal_gen) for x in xs])
self.word = _xs
self._generalized_word_starts(xs)
self._build(_xs)
self.root._traverse(self._label_generalized)

def _label_generalized(self, node):
"""Helper method that labels the nodes of GST with indexes of strings
found in their descendants.
"""
if node.is_leaf():
x = {self._get_word_start_index(node.idx)}
else:
x = {
n
for ns in node.transition_links for n in ns[0].generalized_idxs
}
node.generalized_idxs = x

def _get_word_start_index(self, idx):
"""Helper method that returns the index of the string based on node's
starting index"""
i = 0
for _idx in self.word_starts[1:]:
if idx < _idx:
return i
else:
i += 1
return i

def lcs(self, stringIdxs=-1):
"""Returns the Largest Common Substring of Strings provided in stringIdxs.
If stringIdxs is not provided, the LCS of all strings is returned.
::param stringIdxs: Optional: List of indexes of strings.
"""
if stringIdxs == -1 or not isinstance(stringIdxs, list):
stringIdxs = set(range(len(self.word_starts)))
else:
stringIdxs = set(stringIdxs)

deepestNode = self._find_lcs(self.root, stringIdxs)
start = deepestNode.idx
end = deepestNode.idx + deepestNode.depth
return self.word[start:end]

def _find_lcs(self, node, stringIdxs):
"""Helper method that finds LCS by traversing the labeled GSD."""
nodes = [
self._find_lcs(n, stringIdxs) for (n, _) in node.transition_links
if n.generalized_idxs.issuperset(stringIdxs)
]

if nodes == []:
return node

deepestNode = max(nodes, key=lambda n: n.depth)
return deepestNode

def _generalized_word_starts(self, xs):
"""Helper method returns the starting indexes of strings in GST"""
self.word_starts = []
i = 0
for n in range(len(xs)):
self.word_starts.append(i)
i += len(xs[n]) + 1

def find(self, y):
"""Returns starting position of the substring y in the string used for
building the Suffix tree.
:param y: String
:return: Index of the starting position of string y in the string used for building the Suffix tree
-1 if y is not a substring.
"""
node = self.root
while True:
edge = self._edgeLabel(node, node.parent)
if edge.startswith(y):
return node.idx

i = 0
while (i < len(edge) and edge[i] == y[0]):
y = y[1:]
i += 1

if i != 0:
if i == len(edge) and y != '':
pass
else:
return -1

node = node._get_transition_link(y[0])
if not node:
return -1

def find_all(self, y):
y_input = y
node = self.root
while True:
edge = self._edgeLabel(node, node.parent)
if edge.startswith(y):
break

i = 0
while (i < len(edge) and edge[i] == y[0]):
y = y[1:]
i += 1

if i != 0:
if i == len(edge) and y != '':
pass
else:
return []

node = node._get_transition_link(y[0])
if not node:
return []

leaves = node._get_leaves()
return [n.idx for n in leaves]

def _edgeLabel(self, node, parent):
"""Helper method, returns the edge label between a node and it's parent"""
return self.word[node.idx + parent.depth:node.idx + node.depth]

def _terminalSymbolsGenerator(self):
"""Generator of unique terminal symbols used for building the Generalized Suffix Tree.
Unicode Private Use Area U+E000..U+F8FF is used to ensure that terminal symbols
are not part of the input string.
"""
py2 = sys.version[0] < '3'
UPPAs = list(
list(range(0xE000, 0xF8FF + 1)) +
list(range(0xF0000, 0xFFFFD + 1)) +
list(range(0x100000, 0x10FFFD + 1)))
for i in UPPAs:
if py2:
yield (unichr(i))
else:
yield (chr(i))
raise ValueError("To many input strings.")


class _SNode():
"""Class representing a Node in the Suffix tree."""

def __init__(self, idx=-1, parentNode=None, depth=-1):
# Links
self._suffix_link = None
self.transition_links = []
# Properties
self.idx = idx
self.depth = depth
self.parent = parentNode
self.generalized_idxs = {}

def __str__(self):
return ("SNode: idx:" + str(self.idx) + " depth:" + str(self.depth) +
" transitons:" + str(self.transition_links))

def _add_suffix_link(self, snode):
self._suffix_link = snode

def _get_suffix_link(self):
if self._suffix_link != None:
return self._suffix_link
else:
return False

def _get_transition_link(self, suffix):
for node, _suffix in self.transition_links:
if _suffix == '__@__' or suffix == _suffix:
return node
return False

def _add_transition_link(self, snode, suffix=''):
tl = self._get_transition_link(suffix)
if tl: # TODO: imporve this.
self.transition_links.remove((tl, suffix))
self.transition_links.append((snode, suffix))

def _has_transition(self, suffix):
for node, _suffix in self.transition_links:
if _suffix == '__@__' or suffix == _suffix:
return True
return False

def is_leaf(self):
return self.transition_links == []

def _traverse(self, f):
for (node, _) in self.transition_links:
node._traverse(f)
f(self)

def _get_leaves(self):
if self.is_leaf():
return [self]
else:
return [
x for (n, _) in self.transition_links for x in n._get_leaves()
]

+ 0
- 52
notebooks/else/compute_spkernel_for_syntheticnew.py View File

@@ -1,52 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:40:52 2018

@author: ljia
"""
import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from gklearn.utils.model_selection_precomputed import compute_gram_matrices
from gklearn.kernels.spKernel import spkernel
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing
import functools
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct


if __name__ == "__main__":
# load dataset.
print('getting dataset and computing kernel distance matrix first...')
ds_name = 'SYNTHETICnew'
gkernel = 'spkernel'
dataset = '../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
Gn, y_all = loadDataset(dataset)

for G in Gn:
G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
# compute/read Gram matrix and pair distances.
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix = np.empty((len(Gn), len(Gn)))
Kmatrix, run_time, idx = spkernel(Gn, node_label=None, node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=True)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
Kmatrix=Kmatrix, run_time=run_time)
print('complete!')

+ 0
- 54
notebooks/else/compute_sspkernel_for_syntheticnew.py View File

@@ -1,54 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:40:52 2018

@author: ljia
"""
import sys
import numpy as np
import networkx as nx

sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from gklearn.utils.model_selection_precomputed import compute_gram_matrices
from gklearn.kernels.structuralspKernel import structuralspkernel
from sklearn.model_selection import ParameterGrid

from libs import *
import multiprocessing
import functools
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct


if __name__ == "__main__":
# load dataset.
print('getting dataset and computing kernel distance matrix first...')
ds_name = 'SYNTHETICnew'
gkernel = 'structuralspkernel'
dataset = '../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
Gn, y_all = loadDataset(dataset)

for G in Gn:
G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
# compute/read Gram matrix and pair distances.
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
Kmatrix, run_time = structuralspkernel(Gn, node_label=None, edge_label=None,
node_kernels=sub_kernels, edge_kernels=sub_kernels,
parallel=None, # parallel='imap_unordered',
n_jobs=multiprocessing.cpu_count(),
verbose=True)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
Kmatrix=Kmatrix, run_time=run_time)
print('complete!')

+ 0
- 19
notebooks/else/job_graphkernels.sl View File

@@ -1,19 +0,0 @@
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="graphkernels"
#SBATCH --partition=tcourt
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output=output_graphkernels.txt
#SBATCH --error=error_graphkernels.txt
#
#SBATCH --ntasks=1
#SBATCH --nodes=2
#SBATCH --cpus-per-task=56
#SBATCH --time=24:00:00
#SBATCH --mem-per-cpu=4000

srun hostname
srun cd /home/2017018/ljia01/graphkit-learn/notebooks
srun python3 run_spkernel.py

+ 0
- 12
notebooks/else/job_test.sl View File

@@ -1,12 +0,0 @@
#!/bin/bash
#
#SBATCH --job-name=test
#SBATCH --output=res.txt
#SBATCH --partition=long
#
#SBATCH --ntasks=1
#SBATCH --time=10:00
#SBATCH --mem-per-cpu=100

srun hostname
srun sleep 60

+ 0
- 70
notebooks/else/run_rwalk_symonly.py View File

@@ -1,70 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:56:44 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing

from gklearn.kernels.rwalk_sym import randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct

import numpy as np


dslist = [
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]
estimator = randomwalkkernel
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
for compute_method in ['conjugate', 'fp']:
if compute_method == 'sylvester':
param_grid_precomputed = {'compute_method': ['sylvester'],
# 'weight': np.linspace(0.01, 0.10, 10)}
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'conjugate':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['conjugate'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-1, -10, num=10, base=10)}
elif compute_method == 'fp':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'compute_method': ['fp'],
'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'weight': np.logspace(-3, -10, num=8, base=10)}
elif compute_method == 'spectral':
param_grid_precomputed = {'compute_method': ['spectral'],
'weight': np.logspace(-1, -10, num=10, base=10),
'sub_kernel': ['geo', 'exp']}
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 0
- 61
notebooks/else/run_sp_symonly.py View File

@@ -1,61 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 21 17:59:28 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing

from gklearn.kernels.sp_sym import spkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
#from gklearn.utils.model_selection_precomputed import trial_do

dslist = [
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'},
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb
]
estimator = spkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels': [
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

+ 0
- 47
notebooks/else/run_ssp_symonly.py View File

@@ -1,47 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 23 16:40:52 2018

@author: ljia
"""

import functools
from libs import *
import multiprocessing

from gklearn.kernels.ssp_sym import structuralspkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct

dslist = [
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
]
estimator = structuralspkernel
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
param_grid_precomputed = {'node_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
'edge_kernels':
[{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]}
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
{'alpha': np.logspace(-10, 10, num=41, base=10)}]

for ds in dslist:
print()
print(ds['name'])
model_selection_for_precomputed_kernel(
ds['dataset'],
estimator,
param_grid_precomputed,
(param_grid[1] if ('task' in ds and ds['task']
== 'regression') else param_grid[0]),
(ds['task'] if 'task' in ds else 'classification'),
NUM_TRIALS=30,
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
extra_params=(ds['extra_params'] if 'extra_params' in ds else None),
ds_name=ds['name'],
n_jobs=multiprocessing.cpu_count(),
read_gm_from_file=False)
print()

BIN
notebooks/preimage/results.gm.npz View File


+ 0
- 1329
notebooks/unfinished/run_cyclicpatternkernel.ipynb
File diff suppressed because it is too large
View File


+ 0
- 786
notebooks/unfinished/run_treeletkernel_acyclic.ipynb
File diff suppressed because it is too large
View File


+ 0
- 7966
notebooks/unfinished/run_treepatternkernel.ipynb
File diff suppressed because it is too large
View File


+ 0
- 3812
notebooks/unfinished/run_weisfeilerLehmankernel.ipynb
File diff suppressed because it is too large
View File


+ 0
- 47
notebooks/unfinished/test_mpi.py View File

@@ -1,47 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test Message Passing Interface for cluster paralleling.
Created on Wed Nov 7 17:26:40 2018

@author: ljia
"""

from mpi4py import MPI

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

import numpy as np
import time
size = comm.Get_size()
numDataPerRank = 10
data = None
if rank == 0:
data = np.linspace(1, size * numDataPerRank, size * numDataPerRank)
recvbuf = np.empty(numDataPerRank, dtype='d')
comm.Scatter(data, recvbuf, root=0)
recvbuf += 1
print('Rank: ', rank, ', recvbuf received: ', recvbuf, ', size: ', size, ', time: ', time.time())

#if rank == 0:
# data = {'key1' : [1,2, 3],
# 'key2' : ( 'abc', 'xyz')}
#else:
# data = None
#
#data = comm.bcast(data, root=0)
#print('Rank: ',rank,', data: ' ,data)

#if rank == 0:
# data = {'a': 7, 'b': 3.14}
# comm.send(data, dest=1)
#elif rank == 1:
# data = comm.recv(source=0)
# print('On process 1, data is ', data)

#print('My rank is ', rank)

#for i in range(0, 100000000):
# print(i)

Loading…
Cancel
Save