From 713edf2794abfb65200aeda9872852bc9315ae16 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Wed, 17 Jan 2018 17:06:57 +0100 Subject: [PATCH] ADD comments to code of treelet kernel. --- README.md | 15 +- .../run_treeletkernel_acyclic-checkpoint.ipynb | 4 +- notebooks/run_treeletkernel_acyclic.ipynb | 4 +- .../__pycache__/treeletKernel.cpython-35.pyc | Bin 10032 -> 13388 bytes pygraph/kernels/deltaKernel.py | 4 +- pygraph/kernels/marginalizedKernel.py | 16 +- pygraph/kernels/pathKernel.py | 8 +- pygraph/kernels/spKernel.py | 4 +- pygraph/kernels/treeletKernel.py | 578 ++++++++++++--------- pygraph/kernels/weisfeilerLehmanKernel.py | 14 +- 10 files changed, 380 insertions(+), 267 deletions(-) diff --git a/README.md b/README.md index bee38d2..bd582cc 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,20 @@ For predition we randomly divide the data in train and test subset, where 90% of * The targets of training data are normalized before calculating *path kernel* and *treelet kernel*. * See detail results in [results.md](pygraph/kernels/results.md). +## References +[1] K. M. Borgwardt and H.-P. Kriegel. Shortest-path kernels on graphs. In Proceedings of the International Conference on Data Mining, pages 74-81, 2005. + +[2] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003. + +[3] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360). + +[4] N. Shervashidze, P. Schweitzer, E. J. van Leeuwen, K. Mehlhorn, and K. M. Borgwardt. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, 12:2539-2561, 2011. + +[5] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. + ## Updates +### 2018.01.17 +* ADD comments to code of treelet kernel. - linlin ### 2018.01.16 * ADD *treelet kernel* and its result on dataset Asyclic. - linlin * MOD the way to calculate WL subtree kernel, correct its results. - linlin @@ -55,4 +68,4 @@ For predition we randomly divide the data in train and test subset, where 90% of * ADD *marginalized kernel* and its result. - linlin * ADD list required python packages in file README.md. - linlin ### 2017.11.24 -* ADD *shortest path kernel* and its result. - linlin \ No newline at end of file +* ADD *shortest path kernel* and its result. - linlin diff --git a/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb index 425930a..c25eb36 100644 --- a/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb +++ b/notebooks/.ipynb_checkpoints/run_treeletkernel_acyclic-checkpoint.ipynb @@ -101,8 +101,8 @@ "\n", "kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = True)\n", "\n", - "# %lprun -f spkernel \\\n", - "# kernel_train_test(datafile, kernel_file_path, spkernel, kernel_para, normalize = False)" + "# %lprun -f treeletkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)" ] }, { diff --git a/notebooks/run_treeletkernel_acyclic.ipynb b/notebooks/run_treeletkernel_acyclic.ipynb index 425930a..c25eb36 100644 --- a/notebooks/run_treeletkernel_acyclic.ipynb +++ b/notebooks/run_treeletkernel_acyclic.ipynb @@ -101,8 +101,8 @@ "\n", "kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = True)\n", "\n", - "# %lprun -f spkernel \\\n", - "# kernel_train_test(datafile, kernel_file_path, spkernel, kernel_para, normalize = False)" + "# %lprun -f treeletkernel \\\n", + "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)" ] }, { diff --git a/pygraph/kernels/__pycache__/treeletKernel.cpython-35.pyc b/pygraph/kernels/__pycache__/treeletKernel.cpython-35.pyc index eb8890b9cc3a57dfb77492d34577c7f582ab09ae..7e648db8045e372a4bc081c60e9c16690ebb170a 100644 GIT binary patch delta 7582 zcmV;P9bw|IPRwW{77Yyw?QmaOjzRzc;{X5v000000000000;m806+i$0CUrkBY0qB z2LNOU0AvXOWC{Re3jkyc0E7SoS`7eX1^{FT0AvaPWDNjh4giD$09p`003GL0Av#YWE22|003GM0Av9GQz;dZ<{wJh3T|&? zWnXMzVr6XF1YvY0{~=#@0RRDf3IKcw0A>OJQ)CDLQz;2y?*ae-Lt$)Vb!=gDWgv8N zWo2w-bRcJPVQ^@FAZulEZe?t9AYx^7cV%U6AZK!6aA) zARr(hARr(hOlfm;Aa7lQM=~HfAWmg;cW-iQSRiL|VQ^@33LqdL zARr(hATl6la$#_2b0A`6ba!QCZXkDPX=7+0bZBKDYh`k7Wo#g6b0A}3Y-4q7VRU6= zE(#zZARumkZ)9a(Y++($Y#=%yb98cPZf6Q0ARr(hARr)aZ)9a4VRUqIX<~JBWgvBP zWn>^>b0BPCVr6VDAXI2&AY^4`VRdYDAZ~ADWgu)}Vr6U~X>%Z9bZ>1gARr(hARr(h z3LqdLAZ27{WnXMzVr6U~Iv{g&a%pa73LqdLARr)rARuLAXJsH^baZlQVs&(7Aa!$P zWFTR4AZ%e`Wo#}WRA^-&WMyVyb!>DXWn^b%AZ%e`Wo#g6b0A`GZe(9{d2nSe3LqdL zAZ%e`Wo%_+AUYsoZ*Od6VQvZ_ARr(hARr)DXk~P0WpW^NXk{R0a$#_2b0A@IWgu)} zVr6W9Wn?ZORA^-&WMyVyb!>DXX>%Y{a&=`c3LqdLARr(hAPOKLARtm@bairW3LqdL zAT2E|EiEkyARr(hOKo9va%p%kYh`k7Wo#fiAWn5{aCsnYVRUk7crRvbZ((!_ARr(h zARr(hOJ#CyWo#gAVRUk7cq|}gVPj|@Wo%`CZDnqBAa7DcN1#)3-XJy(EUvzS1Wo%`1Yh`k7Wo%z$Z`uWL za%paKDGPE00001T1^@s6atHtb0NMm$a%Xds+9eqt+5u_W0czR^a&>NBbZKp6+6HT7 za&Bd8sQ_{q005H;0w#aj4Rms4Wo%`1Yh`k7Wo!)q0046!0000077_sp0U-hm0vrJx z0Wkp{0uTWj0ty5S0x1Fz0S*Fk8UO$QV+8;J0000000{s900;m800RI30CUg)0049V z0DJ%dWB~wt0sv$J0DJ=gWCH+v1OS5p1Bn0tbN~Q+0RUtH0DOM}0AvCHd;003qI z0AvIJWCZ|*004Xi07QcU0D}PleFgw@0|06S0EYnpW&r?X1^{FQ0EPend<6hRg8=}8 z0RVjm0CWQYY6X7)bOQit1^{#e0BQ#SdlQM=~Hf zAWmg;cW-iQSRiL|VQ^@33LqdLARr(hATl6la$#_2b0A`6ba!QCZXkDPX=7+0bZBKD zYh`k7Wo#g6b0A}3Y-4q7VRU6=E(#zZARumUWMyA$VPa)$AUYs(baH8KX9^%7ARr(h zARumUWMzLKVRUqIX<~JBWgvBPWn>^>b0BPCVr6VDAXI2&AY^4`VRdYDAZ~ADWgu)} zVr6U~X>%Z9bZ>1gARr(hARr(h3LqdLAZ27{WnXMzVr6U~Iv{g&a%pa73LqdLARr(h zAZ27{Wgua6baH88b#!GQb#rB8AYpSLY++($Y%YHwRA^-&WMyVyb!>DXWn^b%AZ%e` zWo#g6b0A`GZe(9{d2nSe3LqdLAZ%e`Wo%_+AUYsoZ*Od6VQvZ_ARr(hARr)DXk~P0 zWpW^NXk{R0a$#_2b0A@IWgu)}Vr6V)WG*07Xk{Q|WoBV@Y;+)Lb0Ac5b!9FJARr(h zARuTUAPOKLARtm@bairW3LqdLAT2E|EiEkyARr(hYh`k7Wo#fiAZBcDVRQ;0ARr(h zARr)Aa%E*~Wpp4*WpZw1Y#?G~ba!QCZXhxsXL4b1Xmc(KARr(has&VX0CENZ0043b z0001!+ay(!e+5#>x+5>B4c_{(f31eYyZ*FU4c`)XA zZ*6csRsaA1a{>SW00{tkC0}%MWo2w-bZcdDZe?s=WN(wlBx+e30000f0&*Mx0043w z0000f0on;;VQz13Yh`&dau@&r0CE`s0043y000120001U0ssI22>|H=0000f2-*#2 zWprO-VQz13Yh`(J+5>ZCbaER20043elcpsee+k+GM={z0M>28*0001T1^@s6atHtb z0CF1u003GCc4cF9Z*nnO2XGpau)yq0CE@r001chavlHx0CFGz0043s0001T z4*&oFKL7v#a}EFi000vn0UiP(0U`k+0V)D=4*&oFV*~&I0000002=@R00{s900RI3 zvv&o70TB=YWDo#_0st9_1^|c#0Avx90SOfW5tAPW9sv@QJ_k(!6O)VwOc4+OWDo#_ z0st9_1^|c#0Av)C0SOfW6q6qZ9sv@Q9tar$6O%Xy9~Bb-8G``;d<6hx6#zVE004Xh z0Av=EYzQ3}5&#)|2LNja0CEBYdp2__sC02zY;0AvpU8-oD=d<6hx7XUnG004Xh0Av`G$_XGQ831Gj z0EPenhyVbH1ps6c02vuXg8=|UUHbri3IK=&0AvsVWDt`O3LGdB02zY;02u;^1^|c# z0BQ;Vd^0DJ@hWEztp z3uFNjlZFc+5fA`m5CDY&02zq}0Eh+vWD%1A2^AC(06d2R0EYtrhXnwK1^{LR0Aw4J z;R_i78k74A9}*e>8G``;d<6hx8~{9g0{~YcMu=}6qAt-7Z()(hyVa;699+>0AvyX85xKK02vo#Ad|li6+{#O85xKK z02vpE004*u0Av&Z85xKL0AvyX85xKH02vpE003$e0Eh(uWFr6>8HfY`85f8E0Eh(u zWFr6>8Hfb{WE7JH5Iigz02xGG`~Zms0EhqpY7+p61ps6t02vvG1OOR`004*u0AwQo z85xKL0Av)CZ4euie-RG>5tES)7Z4x-Y7_u`5CDS#0DTewWE_(*6ckQ~003$e0Eh(u zWE21y8HfY`85f8E0Eh(uWE21y8Hfb{WDx)v8HfP@85f8E0BREehy?&-5&#()hy(x` z7l;4=hy?&-5&#()hy?&-5tAViL@E{l8AM%80f_|whX4SF0RV>t0EYztW&{9aCjevx z0EPenhy?&-0sv$YlaCQ5NGJeg1ptNs0Eh(uWE22o0|12r02xGs0RVjz0DKexY8C*4 z0004G7XXL=0BREehy?&-5&#()hy(x`7i1s+Y7_u`699t&0F(U?BN!3@85xKH02vo# zAOLC<0DKexg8={+eHD`~6c++yAd^uN6(bS=85xKK02vpE004*u0AvyX85xKL0AvvW z85xKH02vo#Ad`s_Q6v}u8AM%g0f_|whyVa;699+>0Av&Z85xKK02zn?0Eh(uWE7MB z5*-*L02vvG1OOR`004*u0AwQo85xKL0Avx9GZQQU5|dyP6_Y#^2oQ(>0Eh(uWDx)v z8Hfb{WD=8&6F>nHlg<-A0TPoA6e0~60EhqpY7+p61ps6clRXqLe=T)q0{{R=Wo~71 zVRU66V_|M@ZfRp-Y#?i8d2=9dW*}i~Y#?-UWo2w-baNnSZXjVGXL4b1Xf6sMARr(L zARr(hP+@XmZDn+2a&rnGARr(uEiElAEiElAEeaqYARtE|Iv`GEba!uZYgiy>a$#_2 za|$3JARr(hARts|e`O$Na$#_2AZczOcW7y2Xdr84d2=9Pa%CWAWo~71VRU6=E(#zZ zARumUWMyA$VPa)$AUYs(baH8KX9^%7ARr(hARumUWMv>>baZlQVs&(7Aa!$PWFTR4 zAZ%e`Wo#}WRA^-&WMyVyb!>DXZf|5|AZ%e`Wo#g6b0A@Ke{XFrARr(hARr(h3LqdL zAZ27{WnXMzVr6U~Iv{g&a%pa73LqdLARr(hAZ27{Wgua6baH88b#!GQb#rB8AYpSL zY++($Y%U;FXk{Q|WoBV@Y;+)HWM^d{Y++($Y#?cKAYyNBWM6c7aAhtEARr(hY++($ zY-MC1Iv`?ie{XDMVQvZ_ARr(hARr)DXk~P0WpW^NXk{R0a$#_2b0A@IWgu)}Vr6V) zWG*07Xk{Q|WoBV@Y;+)Lb0Ac5b!9FJARr(hARr(h3LqdLAW~&?b#iVBARr(hEiElA zEiDQlARr)PVQz13Yh`&aV_|M@Zfj+EUu+;cAY^G{e{>2UARr(hARr({Z*m}YZfs#< zWo%_+AZK!6aAYT_8tc!@PRKsB&c>LM$Lca&>MXOe`Q)e`#!NWo>D0AVe-8RCjM6Ze@2MXL4b1 zXmcQIWpZw1Y;z!KZXjc5Wo>V1Zf0+CZDDk2V{V>I zOl5R*WpZ;aATls9G9XTGb|5i3GczbLH7PnWFf%wUG&e2^ARr(h+5s?f2><{9=>`A* zlR+I9lNlFEliU^O5XJvF>V_|M@Zfj+EbCcQ^Aw${*WMyY^Wo0P>avT5v z0NMm@Z)9aD0onmaau@&r0CE`s0043y0001j0001U0ssI22>^N_XJvF>V_|M@Zfj+E zbCb6futOLC0043r0000f0&*Mx0043=0001T7ytkOau@&r0CE`s0043y0001k0001U z0ssI22>^O5XJvF>V_|M@Zfj+EbCcQ^A}1~Y001chavT5v0CFz?001cgaxee@0CE@r z0043s0001T9{>OVgOl^O5XJvF>V_|M@Zfj+EbCcQ^B3dp0001chavT5v0CFz? z001cgaxee@0CE@r0043s0001T9{>OVga7~la{>SW00{tE1vPVYVRG65HrfF=+5u$R z0XK3X0000^ax4G<0CE8U007zoF)?y6lT{d$6d?cr0CE8U001cgaxMS>04W1<9FzSR z933zK007ztaA9<{9PALI$F8}}lDFbpG0002m18HP< zaxVY?04W4=FaQ7mas~hZ0CEHX004430001T7ytkOav1;s0Qw0$XJu|>cyMw)SW00seiAZKNCUt?izZ*FU4d2^G!8gxZ30000f0&*Mx007zqY-M3)DFkvb0001T z1^@s6as&VX0CGD30043r0001T82|tPavuNy0Pz3-0CNHW000R9axwq_lR+I?HZK4G z04V};8~^|Sazg+B04W4=FaQ7mas~hZ0CEHX004430001T7ytkOav1;s0CFDy008=v z(H&eq0RR91DFJdX0000f0&*Mx0044B0000f1adF{0043Z0001T1ONa4aytM30CE@r z0043s0001T9{>OV1p$+w9AY;w0000f0&*Mx0044B0000f1adF{0043Z0001T1ONa4 zaytM30CE@r0043s0001T9{>OV5do8+9AY;w0000f0&*Mx0044B0000f1adF{0043Z z0001T1ONa4aytM30CE@r0043s0001T9{>OV9s!e}99vi{0000f0dg+@001chavT5v z0CGbB001cjaxee@0CENZ0043X0001TI{*Lxau@&r0CE`s0043y0000W0RRAV0ssI2 z2>|K;|NsATL;wH)azp?C0CGf=g&rXxat;6h0NM^_X>Md+VQg$)aA9<4bJ_!QbaHYE z0001TE|b0<7ZchBWq5RDZe(%-0002m1Z-(@bd%&BF%8-YWo~tCWpZJ3WpXc*93L7J zFaQ7mas&VX0CENZ0043b0001!O&@RtavcBw0F%NWTm^C<0001!DIgv~7ytkODF<>e z0001T1^@s6aufgn0CGJ50043X0001TI{*Lxay|e60CE`s0043#000180001UA^`vZ z02c-U1_qOoAZq~vll~wl0S1#MA!h*wla3)F1R?+c0Fwm)S$|>h0RR9-X>Md7VQg$5 zaA9<4b0BwVbZ8)9AY)~6bYW?3AZ%rBXLM*FbZBpLWgv5OVRCdJW^!+BAYmYLZ*_8G zWgu>EWMwWOK_GHvV|8+KX?A5GWMy!4Xdq^3a&vSbb7f(2V`w00b0A@GaBOL1WG)II zARr(LARr(hP=8@^VQpn}WpZ;0ARr(hEiElAEiElAEiDQlARr(|AUYsUWpsCMa%)&1 zXL4b1Xmbi6ARr(hARr)AXk{R0a$#_2AZczOcW7y2XdrN5bZB!RVRB_4b7f(2V`yb$ zE(#zZARu#Zb#h~6Uv6(?Wgt2rX>N37XJv8b95kY zW*~53bZB!Ra%FUKX=Qe0WGo)WSZ)PBFZ)9b2E(#zZARux90001E0h49{U{Z1k0000f0on>?X>Md+aA9<4 zb14II8~^|SaxDM=0NMm_VRUFI19C6`007zsY-Mg|bZFWOb8mHWV`X1%Z)9b17ytkO zav1;s0CFDy003D5lRg40O&9Md7VQg$5aA9<4 zb0BwVbZ8)9AY)~6bYW?3AZ%rBXLM*FX>K54AZK!6aA+)WSZ)PBDX>)W6ARr(hARr(hOlfm;Aa7-K`;pb0044C0001T7ytkODFSjF0001TSpWb4au@&r z0CE@r0043s0001T9{>OVZ~>D61T6zjatV_#1S=*O0000f0dhY80043r0001T7ytkO zav1;s0NMyVY+-F;WMMve0h50ODt|-(001cka!mjL0CHFW0044Q0001TP5=M^+6HE6 zY;Xfkpb0001T7ytkOav1;s0CGqG003SA0046o000004+a4Z0Wuu{69Xdw z8vzUf3IPcMa!3FG04WmM19N$E+6Qo9bZBg8Vscpk007zsX>N06a&&SHlbs01N>P0TTfX0uTWZ0t^8R10FIJ9u?^f7_-SI AoB#j- delta 4355 zcmV+e5&Z7VXs}Kr77YywY#3fzVKe{$;{X5v000000000000;m806+i$0CT>PBX~=M z003GI0AvRMWC#F+003GJ0AvXOWC{Re3jkyc0AvjSgaZIt4*+Bi0AvXOWDfvj3IJpf z0AvdQWDx*l5&(n%16mLOWB~wEDHDY%$04V|53TA0;WM6P$ zbZB!a1KI*EFxm)iWoc( zY6JiPWCoM*FCPU102gEclNB&B6$Aie1ptHq0DJ-fg8~490RU4e2TqgcF+nf{0002# z|NsC0atr_f04V};1^@s6atHtb0CEff0043f0001T4FCWDat;6h02lz1STZdFas-oz zGAkqu0000f0onm!atr_f0CEff0043g0002m2s~_IZDM3$J|O^;*)l4B4*&oFDFxaF zWq5RDZe(%=0002m31x0|ZDn#{bY3i31V+^0DJ@hWEB8BX8-_w z2LO020el1iWCj2kL|q{Pi3I?N1ps6b02zD-0BZ*TasmT<2LNga0Eh(uWD%2z4>JJ} zlf4fd5dZ*W004yo02zq}0Eh+vWDt{~5)}auk`NvN5tAPfFaZ&hP!J#i6_argCSnx; z8G``;WDfuvg8=}11ps6g06b>^0DJ@hWETKDcs2oi1OQ|P02xGGDglWF0C*t*hX4SF z1psCO0Av^dWCZ|*004*p0Eh(uWDx)v8AO8t07PB;0DTGohy?&-003kFllc%FC=mb| zg8={;0*D3xhz0;^3IKcx0AvvW8G``;JctGWWDfv5hz0;;5R*L-FCY*AJa}{fhyVb5 z3IJpg02vuXU1R`#2mpKt0Eh(uWD%2$5hMW-ld%yX0T+|f5hE2B02zY;0DJ`iWETKD zX8-_v1OQ|ilL``K0TGjL5+M-)0Av6Fg#rK>i3R|O1^{Fblc5q7Ul0I1hXMeH0|18w z0EY$uW&{9a8USPk0EPenhyVbH0{|IBg8=}23jlls0Av{e8EOjvd`F+u?1Ij1^|2n0Av{e8G``;d<6hx8vs0f0{~t0EYztW&{9aApm3r0EPenhy?&-0sv$HlYti&1&9IwWFnK^6eJ4~02vvG1OOQq zWFM0a6003$e0Eh(uWDo!u z8HfY`85f8E0Eh(uWDo!u8Hfb{WDx)v8HfP@85f8E0BREehy?&-695?*hy(x`7l;4= zhy?&-695?*hy?&-5R(BGJOL7uQ57fw6O(}#7k?B0hyVa;699+>0AvvW85xKK02vo# z9{_3;0DKStg8={+hyVa;699+>0Av#Y85xKK02vpE004*u0Av#Y85xKL0AvvW85xKH z02vpE003$e0Eh(uWFi0=8HfY`85f8E0Eh(uWFi0=8Hfb{WD@`x8HfP@85eyG0DKPs zYLgHa3X=yHBQ_ZT8AM(D0Eq0AwNn85xKK02zn?0Eh(uWFi0=8Hfb{ zWD@`x8HfP@85eyKlSURGlfo1iPh=keY7_u`5CDS#0DTewWE%j8003$e0Eh(uWDo!u z8HfY`85f8E0BREehy?&-695?*hy(x`7l;4=hy?&-695?*hy?&-5C9n&hyef@ljayB z0T7c47(@XTlTR2K5GDX*1ptNs0Eh(uWC8$W5R-TqCX;9w3;`39s~8%8766C<0BREe zhy?&-5daw(hy(x`7i1p*Y7_u`699t&02hb=0BREehy?&-5C9n&hy(x`7l;4=hy?&- z5C9n&hy?&-5daw(hyef@7i1p*Y7_u`6aa$(02h4~0Av;bhyVa;699+>0AvsV85xKK z02vo#9{_3;0DKexg8=}OvK1o>5C9n&hyef@7i1rkav4!17XTSVU2g%21ptTu0BREe zhy?&-695?*hy(x`hyVbH1ps6dljj*77$N`}8HfY`8HfM?hy?&-A^;f~hy?&-5R)7l zEG`iM85xKH02vp3832d?0BREehy?&-5C9n&hy(x`hyVbH1ps6a02vvG1ps6blXeSW00{tkD`#bNUt?iz zZ*FU4d6N+!6q8CK4ksP}001chas~hZ0CE-p001cgatQzc0CEff0043g0001T4gdfE zGn0QJ9Tn*U0001T9RL6TasdDU0NMgEF>)W1vLcieB>(^basdDU04V`-9smFUDFbo_ zlUgGj9SHyc0NMv|VRUq5a&B@A0001T4FCWDat;6h0BVzyBVSt#0001T3;+NCDFSi^ z000153T|a-XJ}$?a(H+t1lj><+6iuDX=i9+Z*q8YDgXcg+6i!BbaZ8MZeMhA3;+NC zat#0g0CElh004jh0046W000090CErj005IsBwl|60000^DFJd80000f19Aoc0043q z0001T761SMDFku}0002m3T0$xWnXMzVr6XF3T|&?WnXMzVr6V{DgXcgatr_f0CEig z008<4JZEKYWq5FMK9B$a0CNHW000I7dLL(HbYEj(Zf|aDWqB?Y;!&?JZEKY zWq6YvC1gbw0000f0&)fb007zqY-M3)DFku}0001TF8}}laxee@0CFk-0043f0001T z4FCWDat;6h0Gt2-0CNHW000R9av%TzlaVJ{Hx>W@04V};1^@s6ax(w`04W4=2><{9 zaxVY?0CF$@0043-0001T3;+NCat#0g0CElh005-`lkg^DHx>W@04V};1^@s6ax(w` z04W4=2><{9axVY?0CF$@0043-0001T3;+NCat#0g0CElh006K6lkg^DHx>W@04V}; z1^@s6ax(w`04W4=2><{9axVY?0CF$@0043-0001T3;+NCat#0g0CElh006uIlkg^D zHx>W@04V};1^@s6ax(w`04W4=2><{9axVY?0CF$@0043-0001T3;+NCat#0g0CElh z007AVlkg^6SRDWW04V`-761SMDFSi^0001TGXMYpDFku}0001TF8}}laxee@0CFk- z0043f0001T4FCWDat;6h0LuUX0CNHW000R9at{Ci0CEoi0043i0001T50lj>AwJp# za$#;~WpWq*007zpb98dr18ik(avlHx0NMs&aByXAWZDC1ZgkoOX>Md?cybZ|0043V z0001T6aWAKlR+sl2XYet0043plYc217YP6W0CF$@0043?0002m2W(+tWo%_+ldLIl z1=<5^WqFe!DqMfs31eYyZ*FU4c`{lFV_|M@Zfj+EGg=35a%F2}c`{lDaB^j9WqC7l z3;+NCDF<>10001TF8}}lax4G<0CFt=0043@0001TDgXcgaxMS>0CEig007zyXJvF> zV_|M@Zfj+EBLDyZb0Pr%000680R{vR0TTfh0T}@#10$2|DqsNtlTIrq0RxkED`x=& zlin*KDo6kT0CF$@0043?0001TLjV8(V*mgE00RI301yBG01E&B00#g70CVz_WC58C z0sv$L0DJ%dWCZ|x0h0;>VSjuB0AvCH8GQ->d;7H0RVId0DKAnWCQ?w003kK0DJ)eWCH+v0sw;n1Bn0t zbO->5003$U0D}Mkg8=|^2mpuy0BQ*Ug8%@70RTXK4FGfj0BQ;VhkpP7W&r?X2LNOU z0EPend<_6Zg8=}80RVjt0CWKWY6<{{0RUzJ0AvXOWC#F;004Xq07QcU0D}PleGdS1 z0RU@~> z0000f4stmF007zoZgAQKdS!BNbJ_%SX>Db4Hvj+t+6{DaWo2w-bZcdDZe?uR1#og{ zZgg@;0002m19N3`a!CLH0NMs^VRUk7A$ZyYb9HUn17&z{+6HrRbzyR4DG+in0001T zF8}}lazm4m1|nc`EC2ui+5u|X2y%69Uvz10W!eHqF>(n2007zsYh`k7Wo&Xu00015 z2XGoS_gJzV{~tFGI9(6001cha!LRI0CGnF0043g0001TP5=M^?*IS*b1DD; z0004#5eF7F3IPfM1_T}f9s(i(A^{=+Dg|;*0000f5!wTDd2`waaA9<4Y-wV02mk;8 z+6HNEb7gXLa!r%8GcZUs0002m1#We1aCve|0001T1poj5au@&r0CGqG0044M0001T x3;+NCatr_f0CEff0043g0002m2s~|XWOZz1J^=s#0CN_TF*Fkf3= 2: - pattern_t = pattern[:] - pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] - for neighborx in G[pattern[i]]: - if neighborx != pattern[0]: - new_pattern = pattern_t + [ neighborx ] - patterns['7'].append(new_pattern) - canonkey['7'] = len(patterns['7']) + Return + ------ + canonkey/canonkey_l : dict + For unlabeled graphs, canonkey is a dictionary which records amount of every tree pattern. For labeled graphs, canonkey_l is one which keeps track of amount of every treelet. - # pattern 11 - patterns['11'] = [] - for pattern in patterns['4star']: + References + ---------- + [1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. + """ + patterns = {} # a dictionary which consists of lists of patterns for all graphlet. + canonkey = {} # canonical key, a dictionary which records amount of every tree pattern. + + ### structural analysis ### + ### In this section, a list of patterns is generated for each graphlet, where every pattern is represented by nodes ordered by + ### Morgan's extended labeling. + # linear patterns + patterns['0'] = G.nodes() + canonkey['0'] = nx.number_of_nodes(G) + for i in range(1, 6): + patterns[str(i)] = find_all_paths(G, i) + canonkey[str(i)] = len(patterns[str(i)]) + + # n-star patterns + patterns['3star'] = [ [node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3 ] + patterns['4star'] = [ [node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4 ] + patterns['5star'] = [ [node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5 ] + # n-star patterns + canonkey['6'] = len(patterns['3star']) + canonkey['8'] = len(patterns['4star']) + canonkey['d'] = len(patterns['5star']) + + # pattern 7 + patterns['7'] = [] # the 1st line of Table 1 in Ref [1] + for pattern in patterns['3star']: + for i in range(1, len(pattern)): # for each neighbor of node 0 + if G.degree(pattern[i]) >= 2: + pattern_t = pattern[:] + pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] # set the node with degree >= 2 as the 4th node + for neighborx in G[pattern[i]]: + if neighborx != pattern[0]: + new_pattern = pattern_t + [ neighborx ] + patterns['7'].append(new_pattern) + canonkey['7'] = len(patterns['7']) + + # pattern 11 + patterns['11'] = [] # the 4th line of Table 1 in Ref [1] + for pattern in patterns['4star']: + for i in range(1, len(pattern)): + if G.degree(pattern[i]) >= 2: + pattern_t = pattern[:] + pattern_t[i], pattern_t[4] = pattern_t[4], pattern_t[i] + for neighborx in G[pattern[i]]: + if neighborx != pattern[0]: + new_pattern = pattern_t + [ neighborx ] + patterns['11'].append(new_pattern) + canonkey['b'] = len(patterns['11']) + + # pattern 12 + patterns['12'] = [] # the 5th line of Table 1 in Ref [1] + rootlist = [] # a list of root nodes, whose extended labels are 3 + for pattern in patterns['3star']: + if pattern[0] not in rootlist: # prevent to count the same pattern twice from each of the two root nodes + rootlist.append(pattern[0]) for i in range(1, len(pattern)): - if G.degree(pattern[i]) >= 2: + if G.degree(pattern[i]) >= 3: + rootlist.append(pattern[i]) pattern_t = pattern[:] - pattern_t[i], pattern_t[4] = pattern_t[4], pattern_t[i] - for neighborx in G[pattern[i]]: - if neighborx != pattern[0]: - new_pattern = pattern_t + [ neighborx ] - patterns['11'].append(new_pattern) - canonkey['b'] = len(patterns['11']) - - # pattern 12 - patterns['12'] = [] - rootlist = [] - for pattern in patterns['3star']: - if pattern[0] not in rootlist: - rootlist.append(pattern[0]) - for i in range(1, len(pattern)): - if G.degree(pattern[i]) >= 3: - rootlist.append(pattern[i]) + pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] + for neighborx1 in G[pattern[i]]: + if neighborx1 != pattern[0]: + for neighborx2 in G[pattern[i]]: + if neighborx1 > neighborx2 and neighborx2 != pattern[0]: + new_pattern = pattern_t + [neighborx1] + [neighborx2] +# new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ] + patterns['12'].append(new_pattern) + canonkey['c'] = int(len(patterns['12']) / 2) + + # pattern 9 + patterns['9'] = [] # the 2nd line of Table 1 in Ref [1] + for pattern in patterns['3star']: + for pairs in [ [neighbor1, neighbor2] for neighbor1 in G[pattern[0]] if G.degree(neighbor1) >= 2 \ + for neighbor2 in G[pattern[0]] if G.degree(neighbor2) >= 2 if neighbor1 > neighbor2 ]: + pattern_t = pattern[:] + # move nodes with extended labels 4 to specific position to correspond to their children + pattern_t[pattern_t.index(pairs[0])], pattern_t[2] = pattern_t[2], pattern_t[pattern_t.index(pairs[0])] + pattern_t[pattern_t.index(pairs[1])], pattern_t[3] = pattern_t[3], pattern_t[pattern_t.index(pairs[1])] + for neighborx1 in G[pairs[0]]: + if neighborx1 != pattern[0]: + for neighborx2 in G[pairs[1]]: + if neighborx2 != pattern[0]: + new_pattern = pattern_t + [neighborx1] + [neighborx2] + patterns['9'].append(new_pattern) + canonkey['9'] = len(patterns['9']) + + # pattern 10 + patterns['10'] = [] # the 3rd line of Table 1 in Ref [1] + for pattern in patterns['3star']: + for i in range(1, len(pattern)): + if G.degree(pattern[i]) >= 2: + for neighborx in G[pattern[i]]: + if neighborx != pattern[0] and G.degree(neighborx) >= 2: pattern_t = pattern[:] pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] - for neighborx1 in G[pattern[i]]: - if neighborx1 != pattern[0]: - for neighborx2 in G[pattern[i]]: - if neighborx1 > neighborx2 and neighborx2 != pattern[0]: - new_pattern = pattern_t + [neighborx1] + [neighborx2] -# new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ] - patterns['12'].append(new_pattern) - canonkey['c'] = int(len(patterns['12']) / 2) - - # pattern 9 - patterns['9'] = [] - for pattern in patterns['3star']: - for pairs in [ [neighbor1, neighbor2] for neighbor1 in G[pattern[0]] if G.degree(neighbor1) >= 2 \ - for neighbor2 in G[pattern[0]] if G.degree(neighbor2) >= 2 if neighbor1 > neighbor2 ]: - pattern_t = pattern[:] - pattern_t[pattern_t.index(pairs[0])], pattern_t[2] = pattern_t[2], pattern_t[pattern_t.index(pairs[0])] - pattern_t[pattern_t.index(pairs[1])], pattern_t[3] = pattern_t[3], pattern_t[pattern_t.index(pairs[1])] - for neighborx1 in G[pairs[0]]: - if neighborx1 != pattern[0]: - for neighborx2 in G[pairs[1]]: - if neighborx2 != pattern[0]: - new_pattern = pattern_t + [neighborx1] + [neighborx2] - patterns['9'].append(new_pattern) - canonkey['9'] = len(patterns['9']) - - # pattern 10 - patterns['10'] = [] - for pattern in patterns['3star']: - for i in range(1, len(pattern)): - if G.degree(pattern[i]) >= 2: - for neighborx in G[pattern[i]]: - if neighborx != pattern[0] and G.degree(neighborx) >= 2: - pattern_t = pattern[:] - pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] - new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ] - patterns['10'].extend(new_patterns) - canonkey['a'] = len(patterns['10']) - - ### labeling information ### - if labeled == True: - canonkey_l = {} - - # linear patterns - canonkey_t = Counter(list(nx.get_node_attributes(G, node_label).values())) - for key in canonkey_t: - canonkey_l['0' + key] = canonkey_t[key] - - for i in range(1, 6): - treelet = [] - for pattern in patterns[str(i)]: - canonlist = list(chain.from_iterable((G.node[node][node_label], \ - G[node][pattern[idx+1]][edge_label]) for idx, node in enumerate(pattern[:-1]))) - canonlist.append(G.node[pattern[-1]][node_label]) - canonkey_t = ''.join(canonlist) - canonkey_t = canonkey_t if canonkey_t < canonkey_t[::-1] else canonkey_t[::-1] - treelet.append(str(i) + canonkey_t) - canonkey_l.update(Counter(treelet)) - - # n-star patterns - for i in range(3, 6): - treelet = [] - for pattern in patterns[str(i) + 'star']: - canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:] ] - canonlist.sort() - canonkey_t = ('d' if i == 5 else str(i * 2)) + G.node[pattern[0]][node_label] + ''.join(canonlist) - treelet.append(canonkey_t) - canonkey_l.update(Counter(treelet)) - - # pattern 7 - treelet = [] - for pattern in patterns['7']: - canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] - canonlist.sort() - canonkey_t = '7' + G.node[pattern[0]][node_label] + ''.join(canonlist) \ - + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \ - + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label] - treelet.append(canonkey_t) - canonkey_l.update(Counter(treelet)) - - # pattern 11 + new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ] + patterns['10'].extend(new_patterns) + canonkey['a'] = len(patterns['10']) + + ### labeling information ### + ### In this section, a list of canonical keys is generated for every pattern obtained in the structural analysis + ### section above, which is a string corresponding to a unique treelet. A dictionary is built to keep track of + ### the amount of every treelet. + if labeled == True: + canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. + + # linear patterns + canonkey_t = Counter(list(nx.get_node_attributes(G, node_label).values())) + for key in canonkey_t: + canonkey_l['0' + key] = canonkey_t[key] + + for i in range(1, 6): treelet = [] - for pattern in patterns['11']: - canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:4] ] - canonlist.sort() - canonkey_t = 'b' + G.node[pattern[0]][node_label] + ''.join(canonlist) \ - + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[0]][edge_label] \ - + G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label] - treelet.append(canonkey_t) + for pattern in patterns[str(i)]: + canonlist = list(chain.from_iterable((G.node[node][node_label], \ + G[node][pattern[idx+1]][edge_label]) for idx, node in enumerate(pattern[:-1]))) + canonlist.append(G.node[pattern[-1]][node_label]) + canonkey_t = ''.join(canonlist) + canonkey_t = canonkey_t if canonkey_t < canonkey_t[::-1] else canonkey_t[::-1] + treelet.append(str(i) + canonkey_t) canonkey_l.update(Counter(treelet)) - # pattern 10 + # n-star patterns + for i in range(3, 6): treelet = [] - for pattern in patterns['10']: - canonkey4 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label] - canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] + for pattern in patterns[str(i) + 'star']: + canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:] ] canonlist.sort() - canonkey0 = ''.join(canonlist) - canonkey_t = 'a' + G.node[pattern[3]][node_label] \ - + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label] \ - + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \ - + canonkey4 + canonkey0 + canonkey_t = ('d' if i == 5 else str(i * 2)) + G.node[pattern[0]][node_label] + ''.join(canonlist) treelet.append(canonkey_t) canonkey_l.update(Counter(treelet)) + + # pattern 7 + treelet = [] + for pattern in patterns['7']: + canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] + canonlist.sort() + canonkey_t = '7' + G.node[pattern[0]][node_label] + ''.join(canonlist) \ + + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \ + + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label] + treelet.append(canonkey_t) + canonkey_l.update(Counter(treelet)) + + # pattern 11 + treelet = [] + for pattern in patterns['11']: + canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:4] ] + canonlist.sort() + canonkey_t = 'b' + G.node[pattern[0]][node_label] + ''.join(canonlist) \ + + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[0]][edge_label] \ + + G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label] + treelet.append(canonkey_t) + canonkey_l.update(Counter(treelet)) + + # pattern 10 + treelet = [] + for pattern in patterns['10']: + canonkey4 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[4]][edge_label] + canonlist = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] + canonlist.sort() + canonkey0 = ''.join(canonlist) + canonkey_t = 'a' + G.node[pattern[3]][node_label] \ + + G.node[pattern[4]][node_label] + G[pattern[4]][pattern[3]][edge_label] \ + + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \ + + canonkey4 + canonkey0 + treelet.append(canonkey_t) + canonkey_l.update(Counter(treelet)) + + # pattern 12 + treelet = [] + for pattern in patterns['12']: + canonlist0 = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] + canonlist0.sort() + canonlist3 = [ G.node[leaf][node_label] + G[leaf][pattern[3]][edge_label] for leaf in pattern[4:6] ] + canonlist3.sort() - # pattern 12 - treelet = [] - for pattern in patterns['12']: - canonlist0 = [ G.node[leaf][node_label] + G[leaf][pattern[0]][edge_label] for leaf in pattern[1:3] ] - canonlist0.sort() - canonlist3 = [ G.node[leaf][node_label] + G[leaf][pattern[3]][edge_label] for leaf in pattern[4:6] ] - canonlist3.sort() - canonkey_t1 = 'c' + G.node[pattern[0]][node_label] \ - + ''.join(canonlist0) \ - + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \ - + ''.join(canonlist3) - - canonkey_t2 = 'c' + G.node[pattern[3]][node_label] \ - + ''.join(canonlist3) \ - + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \ - + ''.join(canonlist0) - - treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) - canonkey_l.update(Counter(treelet)) - - # pattern 9 - treelet = [] - for pattern in patterns['9']: - canonkey2 = G.node[pattern[4]][node_label] + G[pattern[4]][pattern[2]][edge_label] - canonkey3 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[3]][edge_label] - prekey2 = G.node[pattern[2]][node_label] + G[pattern[2]][pattern[0]][edge_label] - prekey3 = G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] - if prekey2 + canonkey2 < prekey3 + canonkey3: - canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \ - + prekey2 + prekey3 + canonkey2 + canonkey3 - else: - canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \ - + prekey3 + prekey2 + canonkey3 + canonkey2 - treelet.append('9' + G.node[pattern[0]][node_label] + canonkey_t) - canonkey_l.update(Counter(treelet)) - - return canonkey_l - - return canonkey - + # 2 possible key can be generated from 2 nodes with extended label 3, select the one with lower lexicographic order. + canonkey_t1 = 'c' + G.node[pattern[0]][node_label] \ + + ''.join(canonlist0) \ + + G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] \ + + ''.join(canonlist3) -def treeletkernel(*args, node_label = 'atom', edge_label = 'bond_type', labeled = True): - if len(args) == 1: # for a list of graphs - Gn = args[0] - Kmatrix = np.zeros((len(Gn), len(Gn))) + canonkey_t2 = 'c' + G.node[pattern[3]][node_label] \ + + ''.join(canonlist3) \ + + G.node[pattern[0]][node_label] + G[pattern[0]][pattern[3]][edge_label] \ + + ''.join(canonlist0) - start_time = time.time() - - for i in range(0, len(Gn)): - for j in range(i, len(Gn)): - Kmatrix[i][j] = treeletkernel(Gn[i], Gn[j], labeled = labeled, node_label = node_label, edge_label = edge_label) - Kmatrix[j][i] = Kmatrix[i][j] + treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) + canonkey_l.update(Counter(treelet)) - run_time = time.time() - start_time - print("\n --- treelet kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time)) - - return Kmatrix, run_time + # pattern 9 + treelet = [] + for pattern in patterns['9']: + canonkey2 = G.node[pattern[4]][node_label] + G[pattern[4]][pattern[2]][edge_label] + canonkey3 = G.node[pattern[5]][node_label] + G[pattern[5]][pattern[3]][edge_label] + prekey2 = G.node[pattern[2]][node_label] + G[pattern[2]][pattern[0]][edge_label] + prekey3 = G.node[pattern[3]][node_label] + G[pattern[3]][pattern[0]][edge_label] + if prekey2 + canonkey2 < prekey3 + canonkey3: + canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \ + + prekey2 + prekey3 + canonkey2 + canonkey3 + else: + canonkey_t = G.node[pattern[1]][node_label] + G[pattern[1]][pattern[0]][edge_label] \ + + prekey3 + prekey2 + canonkey3 + canonkey2 + treelet.append('9' + G.node[pattern[0]][node_label] + canonkey_t) + canonkey_l.update(Counter(treelet)) + + return canonkey_l + + return canonkey - else: # for only 2 graphs - - G1 = args[0] - G = args[1] - kernel = 0 - -# start_time = time.time() - - canonkey2 = get_canonkey(G, node_label = node_label, edge_label = edge_label, labeled = labeled) - canonkey1 = get_canonkey(G1, node_label = node_label, edge_label = edge_label, labeled = labeled) - - keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs - vector1 = np.matrix([ (canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys ]) - vector2 = np.matrix([ (canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys ]) - kernel = np.sum(np.exp(- np.square(vector1 - vector2) / 2)) + +def find_paths(G, source_node, length): + """Find all paths with a certain length those start from a source node. A recursive depth first search is applied. + + Parameters + ---------- + G : NetworkX graphs + The graph in which paths are searched. + source_node : integer + The number of the node from where all paths start. + length : integer + The length of paths. -# run_time = time.time() - start_time -# print("\n --- treelet kernel built in %s seconds ---" % (run_time)) + Return + ------ + path : list of list + List of paths retrieved, where each path is represented by a list of nodes. + """ + if length == 0: + return [[source_node]] + path = [ [source_node] + path for neighbor in G[source_node] \ + for path in find_paths(G, neighbor, length - 1) if source_node not in path ] + return path - return kernel#, run_time \ No newline at end of file + +def find_all_paths(G, length): + """Find all paths with a certain length in a graph. A recursive depth first search is applied. + + Parameters + ---------- + G : NetworkX graphs + The graph in which paths are searched. + length : integer + The length of paths. + + Return + ------ + path : list of list + List of paths retrieved, where each path is represented by a list of nodes. + """ + all_paths = [] + for node in G: + all_paths.extend(find_paths(G, node, length)) + all_paths_r = [ path[::-1] for path in all_paths ] + + # For each path, two presentation are retrieved from its two extremities. Remove one of them. + for idx, path in enumerate(all_paths[:-1]): + for path2 in all_paths_r[idx+1::]: + if path == path2: + all_paths[idx] = [] + break + + return list(filter(lambda a: a != [], all_paths)) \ No newline at end of file diff --git a/pygraph/kernels/weisfeilerLehmanKernel.py b/pygraph/kernels/weisfeilerLehmanKernel.py index 264ce21..e2d2bd2 100644 --- a/pygraph/kernels/weisfeilerLehmanKernel.py +++ b/pygraph/kernels/weisfeilerLehmanKernel.py @@ -9,8 +9,6 @@ import time from pygraph.kernels.spkernel import spkernel from pygraph.kernels.pathKernel import pathkernel -# test of WL subtree kernel on many graphs - import sys import pathlib from collections import Counter @@ -44,8 +42,8 @@ def weisfeilerlehmankernel(*args, node_label = 'atom', edge_label = 'bond_type', Return ------ - Kmatrix/Kernel : Numpy matrix/int - Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs. + Kmatrix/kernel : Numpy matrix/float + Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman kernel between 2 graphs. Notes ----- @@ -125,7 +123,7 @@ def _wl_subtreekernel_do(*args, node_label = 'atom', edge_label = 'bond_type', h Return ------ - Kmatrix/Kernel : Numpy matrix/int + Kmatrix/kernel : Numpy matrix/float Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ @@ -229,8 +227,8 @@ def _weisfeilerlehmankernel_do(G1, G2, height = 0): Return ------ - Kernel : int - Weisfeiler-Lehman Kernel between 2 graphs. + kernel : float + Weisfeiler-Lehman kernel between 2 graphs. """ # init. @@ -298,4 +296,4 @@ def relabel(G): # get the set of compressed labels labels_comp = list(nx.get_node_attributes(G, 'label').values()) - num_of_each_label.update(dict(Counter(labels_comp))) \ No newline at end of file + num_of_each_label.update(dict(Counter(labels_comp)))