You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_weisfeilerLehmankernel_acyclic-checkpoint.ipynb 89 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "name": "stdout",
  10. "output_type": "stream",
  11. "text": [
  12. "\n",
  13. " --- This is a regression problem ---\n",
  14. "\n",
  15. "\n",
  16. " #--- calculating kernel matrix when height = 0.0 ---#\n",
  17. "\n",
  18. " Loading dataset from file...\n",
  19. "\n",
  20. " Calculating kernel matrix, this could take a while...\n",
  21. "\n",
  22. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.3646550178527832 seconds ---\n",
  23. "[[ 5. 6. 4. ... 20. 20. 20.]\n",
  24. " [ 6. 8. 4. ... 20. 20. 20.]\n",
  25. " [ 4. 4. 5. ... 21. 21. 21.]\n",
  26. " ...\n",
  27. " [ 20. 20. 21. ... 101. 101. 101.]\n",
  28. " [ 20. 20. 21. ... 101. 101. 101.]\n",
  29. " [ 20. 20. 21. ... 101. 101. 101.]]\n",
  30. "\n",
  31. " Starting calculate accuracy/rmse...\n",
  32. "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 664.77it/s]\n",
  33. " Mean performance on train set: 17.681582\n",
  34. "With standard deviation: 0.713183\n",
  35. "\n",
  36. " Mean performance on test set: 15.685879\n",
  37. "With standard deviation: 4.139197\n",
  38. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 681.36it/s]\n",
  39. "\n",
  40. "\n",
  41. " #--- calculating kernel matrix when height = 1.0 ---#\n",
  42. "\n",
  43. " Loading dataset from file...\n",
  44. "\n",
  45. " Calculating kernel matrix, this could take a while...\n",
  46. "\n",
  47. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 0.7535510063171387 seconds ---\n",
  48. "[[ 10. 10. 4. ... 20. 20. 20.]\n",
  49. " [ 10. 16. 4. ... 20. 20. 20.]\n",
  50. " [ 4. 4. 10. ... 22. 22. 24.]\n",
  51. " ...\n",
  52. " [ 20. 20. 22. ... 130. 130. 122.]\n",
  53. " [ 20. 20. 22. ... 130. 130. 122.]\n",
  54. " [ 20. 20. 24. ... 122. 122. 154.]]\n",
  55. "\n",
  56. " Starting calculate accuracy/rmse...\n",
  57. "calculate performance: 94%|█████████▍| 945/1000 [00:01<00:00, 713.00it/s]\n",
  58. " Mean performance on train set: 6.270014\n",
  59. "With standard deviation: 0.654734\n",
  60. "\n",
  61. " Mean performance on test set: 7.550458\n",
  62. "With standard deviation: 2.331786\n",
  63. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.46it/s]\n",
  64. "\n",
  65. "\n",
  66. " #--- calculating kernel matrix when height = 2.0 ---#\n",
  67. "\n",
  68. " Loading dataset from file...\n",
  69. "\n",
  70. " Calculating kernel matrix, this could take a while...\n",
  71. "\n",
  72. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.3278343677520752 seconds ---\n",
  73. "[[ 15. 10. 4. ... 20. 20. 20.]\n",
  74. " [ 10. 24. 4. ... 20. 20. 20.]\n",
  75. " [ 4. 4. 15. ... 22. 22. 26.]\n",
  76. " ...\n",
  77. " [ 20. 20. 22. ... 159. 151. 124.]\n",
  78. " [ 20. 20. 22. ... 151. 153. 124.]\n",
  79. " [ 20. 20. 26. ... 124. 124. 185.]]\n",
  80. "\n",
  81. " Starting calculate accuracy/rmse...\n",
  82. "calculate performance: 95%|█████████▍| 949/1000 [00:01<00:00, 736.38it/s]\n",
  83. " Mean performance on train set: 4.450682\n",
  84. "With standard deviation: 0.882129\n",
  85. "\n",
  86. " Mean performance on test set: 9.728466\n",
  87. "With standard deviation: 2.057669\n",
  88. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 709.22it/s]\n",
  89. "\n",
  90. "\n",
  91. " #--- calculating kernel matrix when height = 3.0 ---#\n",
  92. "\n",
  93. " Loading dataset from file...\n",
  94. "\n",
  95. " Calculating kernel matrix, this could take a while...\n",
  96. "\n",
  97. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 1.7653727531433105 seconds ---\n",
  98. "[[ 20. 10. 4. ... 20. 20. 20.]\n",
  99. " [ 10. 32. 4. ... 20. 20. 20.]\n",
  100. " [ 4. 4. 20. ... 22. 22. 26.]\n",
  101. " ...\n",
  102. " [ 20. 20. 22. ... 188. 159. 124.]\n",
  103. " [ 20. 20. 22. ... 159. 168. 124.]\n",
  104. " [ 20. 20. 26. ... 124. 124. 202.]]\n",
  105. "\n",
  106. " Starting calculate accuracy/rmse...\n",
  107. "calculate performance: 96%|█████████▌| 959/1000 [00:01<00:00, 724.60it/s]\n",
  108. " Mean performance on train set: 2.270586\n",
  109. "With standard deviation: 0.481516\n",
  110. "\n",
  111. " Mean performance on test set: 11.296110\n",
  112. "With standard deviation: 2.799944\n",
  113. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 670.29it/s]\n",
  114. "\n",
  115. "\n",
  116. " #--- calculating kernel matrix when height = 4.0 ---#\n",
  117. "\n",
  118. " Loading dataset from file...\n",
  119. "\n",
  120. " Calculating kernel matrix, this could take a while...\n",
  121. "\n",
  122. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.2821996212005615 seconds ---\n",
  123. "[[ 25. 10. 4. ... 20. 20. 20.]\n",
  124. " [ 10. 40. 4. ... 20. 20. 20.]\n",
  125. " [ 4. 4. 25. ... 22. 22. 26.]\n",
  126. " ...\n",
  127. " [ 20. 20. 22. ... 217. 159. 124.]\n",
  128. " [ 20. 20. 22. ... 159. 183. 124.]\n",
  129. " [ 20. 20. 26. ... 124. 124. 213.]]\n",
  130. "\n",
  131. " Starting calculate accuracy/rmse...\n",
  132. "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 709.28it/s]\n",
  133. " Mean performance on train set: 1.074035\n",
  134. "With standard deviation: 0.637823\n",
  135. "\n",
  136. " Mean performance on test set: 12.808303\n",
  137. "With standard deviation: 3.446939\n",
  138. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 646.12it/s]\n",
  139. "\n",
  140. "\n",
  141. " #--- calculating kernel matrix when height = 5.0 ---#\n",
  142. "\n",
  143. " Loading dataset from file...\n",
  144. "\n",
  145. " Calculating kernel matrix, this could take a while...\n",
  146. "\n",
  147. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 2.706934928894043 seconds ---\n",
  148. "[[ 30. 10. 4. ... 20. 20. 20.]\n",
  149. " [ 10. 48. 4. ... 20. 20. 20.]\n",
  150. " [ 4. 4. 30. ... 22. 22. 26.]\n",
  151. " ...\n",
  152. " [ 20. 20. 22. ... 246. 159. 124.]\n",
  153. " [ 20. 20. 22. ... 159. 198. 124.]\n",
  154. " [ 20. 20. 26. ... 124. 124. 224.]]\n",
  155. "\n",
  156. " Starting calculate accuracy/rmse...\n",
  157. "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 553.49it/s]\n",
  158. " Mean performance on train set: 0.700602\n",
  159. "With standard deviation: 0.572640\n",
  160. "\n",
  161. " Mean performance on test set: 14.017923\n",
  162. "With standard deviation: 3.675042\n",
  163. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 621.01it/s]\n",
  164. "\n",
  165. "\n",
  166. " #--- calculating kernel matrix when height = 6.0 ---#\n",
  167. "\n",
  168. " Loading dataset from file...\n",
  169. "\n",
  170. " Calculating kernel matrix, this could take a while...\n",
  171. "\n",
  172. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.1140964031219482 seconds ---\n",
  173. "[[ 35. 10. 4. ... 20. 20. 20.]\n",
  174. " [ 10. 56. 4. ... 20. 20. 20.]\n",
  175. " [ 4. 4. 35. ... 22. 22. 26.]\n",
  176. " ...\n",
  177. " [ 20. 20. 22. ... 275. 159. 124.]\n",
  178. " [ 20. 20. 22. ... 159. 213. 124.]\n",
  179. " [ 20. 20. 26. ... 124. 124. 235.]]\n",
  180. "\n",
  181. " Starting calculate accuracy/rmse...\n",
  182. "calculate performance: 100%|█████████▉| 997/1000 [00:01<00:00, 595.50it/s]\n",
  183. " Mean performance on train set: 0.691515\n",
  184. "With standard deviation: 0.564620\n",
  185. "\n",
  186. " Mean performance on test set: 14.918434\n",
  187. "With standard deviation: 3.805352\n",
  188. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 586.05it/s]\n",
  189. "\n",
  190. "\n",
  191. " #--- calculating kernel matrix when height = 7.0 ---#\n",
  192. "\n",
  193. " Loading dataset from file...\n",
  194. "\n",
  195. " Calculating kernel matrix, this could take a while...\n",
  196. "\n",
  197. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 3.5894455909729004 seconds ---\n",
  198. "[[ 40. 10. 4. ... 20. 20. 20.]\n",
  199. " [ 10. 64. 4. ... 20. 20. 20.]\n",
  200. " [ 4. 4. 40. ... 22. 22. 26.]\n",
  201. " ...\n",
  202. " [ 20. 20. 22. ... 304. 159. 124.]\n",
  203. " [ 20. 20. 22. ... 159. 228. 124.]\n",
  204. " [ 20. 20. 26. ... 124. 124. 246.]]\n",
  205. "\n",
  206. " Starting calculate accuracy/rmse...\n",
  207. "calculate performance: 99%|█████████▉| 991/1000 [00:01<00:00, 663.55it/s]\n",
  208. " Mean performance on train set: 0.691516\n",
  209. "With standard deviation: 0.564620\n",
  210. "\n",
  211. " Mean performance on test set: 15.629476\n",
  212. "With standard deviation: 3.865387\n",
  213. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 627.59it/s]\n",
  214. "\n",
  215. "\n",
  216. " #--- calculating kernel matrix when height = 8.0 ---#\n",
  217. "\n",
  218. " Loading dataset from file...\n",
  219. "\n",
  220. " Calculating kernel matrix, this could take a while...\n",
  221. "\n",
  222. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.081295967102051 seconds ---\n",
  223. "[[ 45. 10. 4. ... 20. 20. 20.]\n",
  224. " [ 10. 72. 4. ... 20. 20. 20.]\n",
  225. " [ 4. 4. 45. ... 22. 22. 26.]\n",
  226. " ...\n",
  227. " [ 20. 20. 22. ... 333. 159. 124.]\n",
  228. " [ 20. 20. 22. ... 159. 243. 124.]\n",
  229. " [ 20. 20. 26. ... 124. 124. 257.]]\n",
  230. "\n",
  231. " Starting calculate accuracy/rmse...\n",
  232. "calculate performance: 96%|█████████▌| 961/1000 [00:01<00:00, 601.33it/s]\n",
  233. " Mean performance on train set: 0.691515\n",
  234. "With standard deviation: 0.564620\n",
  235. "\n",
  236. " Mean performance on test set: 16.214369\n",
  237. "With standard deviation: 3.928756\n",
  238. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 603.90it/s]\n",
  239. "\n",
  240. "\n",
  241. " #--- calculating kernel matrix when height = 9.0 ---#\n",
  242. "\n",
  243. " Loading dataset from file...\n",
  244. "\n",
  245. " Calculating kernel matrix, this could take a while...\n",
  246. "\n",
  247. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.497286796569824 seconds ---\n",
  248. "[[ 50. 10. 4. ... 20. 20. 20.]\n",
  249. " [ 10. 80. 4. ... 20. 20. 20.]\n",
  250. " [ 4. 4. 50. ... 22. 22. 26.]\n",
  251. " ...\n",
  252. " [ 20. 20. 22. ... 362. 159. 124.]\n",
  253. " [ 20. 20. 22. ... 159. 258. 124.]\n",
  254. " [ 20. 20. 26. ... 124. 124. 268.]]\n",
  255. "\n",
  256. " Starting calculate accuracy/rmse...\n",
  257. "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 511.55it/s]\n",
  258. " Mean performance on train set: 0.691515\n",
  259. "With standard deviation: 0.564620\n",
  260. "\n",
  261. " Mean performance on test set: 16.725744\n",
  262. "With standard deviation: 3.993095\n",
  263. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 550.66it/s]\n",
  264. "\n",
  265. "\n",
  266. " #--- calculating kernel matrix when height = 10.0 ---#\n",
  267. "\n",
  268. " Loading dataset from file...\n",
  269. "\n",
  270. " Calculating kernel matrix, this could take a while...\n"
  271. ]
  272. },
  273. {
  274. "name": "stdout",
  275. "output_type": "stream",
  276. "text": [
  277. "\n",
  278. " --- Weisfeiler-Lehman subtree kernel matrix of size 185 built in 4.984841585159302 seconds ---\n",
  279. "[[ 55. 10. 4. ... 20. 20. 20.]\n",
  280. " [ 10. 88. 4. ... 20. 20. 20.]\n",
  281. " [ 4. 4. 55. ... 22. 22. 26.]\n",
  282. " ...\n",
  283. " [ 20. 20. 22. ... 391. 159. 124.]\n",
  284. " [ 20. 20. 22. ... 159. 273. 124.]\n",
  285. " [ 20. 20. 26. ... 124. 124. 279.]]\n",
  286. "\n",
  287. " Starting calculate accuracy/rmse...\n",
  288. "calculate performance: 94%|█████████▍| 942/1000 [00:01<00:00, 708.78it/s]\n",
  289. " Mean performance on train set: 0.691516\n",
  290. "With standard deviation: 0.564621\n",
  291. "\n",
  292. " Mean performance on test set: 17.186401\n",
  293. "With standard deviation: 4.056724\n",
  294. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 711.43it/s]\n",
  295. "\n",
  296. "\n",
  297. " height rmse_test std_test rmse_train std_train k_time\n",
  298. "-------- ----------- ---------- ------------ ----------- --------\n",
  299. " 0 15.6859 4.1392 17.6816 0.713183 0.364655\n",
  300. " 1 7.55046 2.33179 6.27001 0.654734 0.753551\n",
  301. " 2 9.72847 2.05767 4.45068 0.882129 1.32783\n",
  302. " 3 11.2961 2.79994 2.27059 0.481516 1.76537\n",
  303. " 4 12.8083 3.44694 1.07403 0.637823 2.2822\n",
  304. " 5 14.0179 3.67504 0.700602 0.57264 2.70693\n",
  305. " 6 14.9184 3.80535 0.691515 0.56462 3.1141\n",
  306. " 7 15.6295 3.86539 0.691516 0.56462 3.58945\n",
  307. " 8 16.2144 3.92876 0.691515 0.56462 4.0813\n",
  308. " 9 16.7257 3.9931 0.691515 0.56462 4.49729\n",
  309. " 10 17.1864 4.05672 0.691516 0.564621 4.98484\n"
  310. ]
  311. }
  312. ],
  313. "source": [
  314. "# wl subtree kernel\n",
  315. "%load_ext line_profiler\n",
  316. "\n",
  317. "import numpy as np\n",
  318. "import sys\n",
  319. "sys.path.insert(0, \"../\")\n",
  320. "from pygraph.utils.utils import kernel_train_test\n",
  321. "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n",
  322. "\n",
  323. "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
  324. "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
  325. "\n",
  326. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type')\n",
  327. "\n",
  328. "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  329. " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n",
  330. "\n",
  331. "# %lprun -f _wl_subtreekernel_do \\\n",
  332. "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  333. "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)"
  334. ]
  335. },
  336. {
  337. "cell_type": "code",
  338. "execution_count": 1,
  339. "metadata": {},
  340. "outputs": [
  341. {
  342. "name": "stdout",
  343. "output_type": "stream",
  344. "text": [
  345. "\n",
  346. " --- This is a regression problem ---\n",
  347. "\n",
  348. "\n",
  349. " #--- calculating kernel matrix when height = 0.0 ---#\n",
  350. "\n",
  351. " Loading dataset from file...\n",
  352. "\n",
  353. " Calculating kernel matrix, this could take a while...\n",
  354. "\n",
  355. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 13.504083633422852 seconds ---\n",
  356. "[[ 3. 1. 3. ... 1. 1. 1.]\n",
  357. " [ 1. 6. 1. ... 0. 0. 3.]\n",
  358. " [ 3. 1. 3. ... 1. 1. 1.]\n",
  359. " ...\n",
  360. " [ 1. 0. 1. ... 55. 21. 7.]\n",
  361. " [ 1. 0. 1. ... 21. 55. 7.]\n",
  362. " [ 1. 3. 1. ... 7. 7. 55.]]\n",
  363. "\n",
  364. " Starting calculate accuracy/rmse...\n",
  365. "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 773.79it/s]\n",
  366. " Mean performance on train set: 28.360361\n",
  367. "With standard deviation: 1.357183\n",
  368. "\n",
  369. " Mean performance on test set: 35.191954\n",
  370. "With standard deviation: 4.495767\n",
  371. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 743.82it/s]\n",
  372. "\n",
  373. "\n",
  374. " #--- calculating kernel matrix when height = 1.0 ---#\n",
  375. "\n",
  376. " Loading dataset from file...\n",
  377. "\n",
  378. " Calculating kernel matrix, this could take a while...\n",
  379. "\n",
  380. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 26.82917618751526 seconds ---\n",
  381. "[[ 6. 2. 6. ... 2. 2. 2.]\n",
  382. " [ 2. 12. 2. ... 0. 0. 6.]\n",
  383. " [ 6. 2. 6. ... 2. 2. 2.]\n",
  384. " ...\n",
  385. " [ 2. 0. 2. ... 110. 42. 14.]\n",
  386. " [ 2. 0. 2. ... 42. 110. 14.]\n",
  387. " [ 2. 6. 2. ... 14. 14. 110.]]\n",
  388. "\n",
  389. " Starting calculate accuracy/rmse...\n",
  390. "calculate performance: 98%|█████████▊| 983/1000 [00:01<00:00, 751.78it/s]\n",
  391. " Mean performance on train set: 27.933534\n",
  392. "With standard deviation: 1.448359\n",
  393. "\n",
  394. " Mean performance on test set: 35.180815\n",
  395. "With standard deviation: 4.500453\n",
  396. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 744.44it/s]\n",
  397. "\n",
  398. "\n",
  399. " #--- calculating kernel matrix when height = 2.0 ---#\n",
  400. "\n",
  401. " Loading dataset from file...\n",
  402. "\n",
  403. " Calculating kernel matrix, this could take a while...\n",
  404. "\n",
  405. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 40.235626220703125 seconds ---\n",
  406. "[[ 9. 3. 9. ... 3. 3. 3.]\n",
  407. " [ 3. 18. 3. ... 0. 0. 9.]\n",
  408. " [ 9. 3. 9. ... 3. 3. 3.]\n",
  409. " ...\n",
  410. " [ 3. 0. 3. ... 165. 63. 21.]\n",
  411. " [ 3. 0. 3. ... 63. 165. 21.]\n",
  412. " [ 3. 9. 3. ... 21. 21. 165.]]\n",
  413. "\n",
  414. " Starting calculate accuracy/rmse...\n",
  415. "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 694.10it/s]\n",
  416. " Mean performance on train set: 28.111311\n",
  417. "With standard deviation: 1.508915\n",
  418. "\n",
  419. " Mean performance on test set: 35.163150\n",
  420. "With standard deviation: 4.502054\n",
  421. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 695.02it/s]\n",
  422. "\n",
  423. "\n",
  424. " #--- calculating kernel matrix when height = 3.0 ---#\n",
  425. "\n",
  426. " Loading dataset from file...\n",
  427. "\n",
  428. " Calculating kernel matrix, this could take a while...\n",
  429. "\n",
  430. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 54.67040753364563 seconds ---\n",
  431. "[[ 12. 4. 12. ... 4. 4. 4.]\n",
  432. " [ 4. 24. 4. ... 0. 0. 12.]\n",
  433. " [ 12. 4. 12. ... 4. 4. 4.]\n",
  434. " ...\n",
  435. " [ 4. 0. 4. ... 220. 84. 28.]\n",
  436. " [ 4. 0. 4. ... 84. 220. 28.]\n",
  437. " [ 4. 12. 4. ... 28. 28. 220.]]\n",
  438. "\n",
  439. " Starting calculate accuracy/rmse...\n",
  440. "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 748.03it/s]\n",
  441. " Mean performance on train set: 28.390274\n",
  442. "With standard deviation: 1.365711\n",
  443. "\n",
  444. " Mean performance on test set: 35.194634\n",
  445. "With standard deviation: 4.498007\n",
  446. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 726.68it/s]\n",
  447. "\n",
  448. "\n",
  449. " #--- calculating kernel matrix when height = 4.0 ---#\n",
  450. "\n",
  451. " Loading dataset from file...\n",
  452. "\n",
  453. " Calculating kernel matrix, this could take a while...\n",
  454. "\n",
  455. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 67.15217232704163 seconds ---\n",
  456. "[[ 15. 5. 15. ... 5. 5. 5.]\n",
  457. " [ 5. 30. 5. ... 0. 0. 15.]\n",
  458. " [ 15. 5. 15. ... 5. 5. 5.]\n",
  459. " ...\n",
  460. " [ 5. 0. 5. ... 275. 105. 35.]\n",
  461. " [ 5. 0. 5. ... 105. 275. 35.]\n",
  462. " [ 5. 15. 5. ... 35. 35. 275.]]\n",
  463. "\n",
  464. " Starting calculate accuracy/rmse...\n",
  465. "calculate performance: 95%|█████████▌| 950/1000 [00:01<00:00, 737.07it/s]\n",
  466. " Mean performance on train set: 27.974611\n",
  467. "With standard deviation: 1.462223\n",
  468. "\n",
  469. " Mean performance on test set: 35.175314\n",
  470. "With standard deviation: 4.501113\n",
  471. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 719.71it/s]\n",
  472. "\n",
  473. "\n",
  474. " #--- calculating kernel matrix when height = 5.0 ---#\n",
  475. "\n",
  476. " Loading dataset from file...\n",
  477. "\n",
  478. " Calculating kernel matrix, this could take a while...\n",
  479. "\n",
  480. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 80.08806300163269 seconds ---\n",
  481. "[[ 18. 6. 18. ... 6. 6. 6.]\n",
  482. " [ 6. 36. 6. ... 0. 0. 18.]\n",
  483. " [ 18. 6. 18. ... 6. 6. 6.]\n",
  484. " ...\n",
  485. " [ 6. 0. 6. ... 330. 126. 42.]\n",
  486. " [ 6. 0. 6. ... 126. 330. 42.]\n",
  487. " [ 6. 18. 6. ... 42. 42. 330.]]\n",
  488. "\n",
  489. " Starting calculate accuracy/rmse...\n",
  490. "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 735.71it/s]\n",
  491. " Mean performance on train set: 28.018415\n",
  492. "With standard deviation: 1.455644\n",
  493. "\n",
  494. " Mean performance on test set: 35.199713\n",
  495. "With standard deviation: 4.507104\n",
  496. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 738.55it/s]\n",
  497. "\n",
  498. "\n",
  499. " #--- calculating kernel matrix when height = 6.0 ---#\n",
  500. "\n",
  501. " Loading dataset from file...\n",
  502. "\n",
  503. " Calculating kernel matrix, this could take a while...\n",
  504. "\n",
  505. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 92.19254112243652 seconds ---\n",
  506. "[[ 21. 7. 21. ... 7. 7. 7.]\n",
  507. " [ 7. 42. 7. ... 0. 0. 21.]\n",
  508. " [ 21. 7. 21. ... 7. 7. 7.]\n",
  509. " ...\n",
  510. " [ 7. 0. 7. ... 385. 147. 49.]\n",
  511. " [ 7. 0. 7. ... 147. 385. 49.]\n",
  512. " [ 7. 21. 7. ... 49. 49. 385.]]\n",
  513. "\n",
  514. " Starting calculate accuracy/rmse...\n",
  515. "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 721.42it/s]\n",
  516. " Mean performance on train set: 28.373079\n",
  517. "With standard deviation: 1.600565\n",
  518. "\n",
  519. " Mean performance on test set: 35.164471\n",
  520. "With standard deviation: 4.498487\n",
  521. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 727.58it/s]\n",
  522. "\n",
  523. "\n",
  524. " #--- calculating kernel matrix when height = 7.0 ---#\n",
  525. "\n",
  526. " Loading dataset from file...\n",
  527. "\n",
  528. " Calculating kernel matrix, this could take a while...\n",
  529. "\n",
  530. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 105.81170415878296 seconds ---\n",
  531. "[[ 24. 8. 24. ... 8. 8. 8.]\n",
  532. " [ 8. 48. 8. ... 0. 0. 24.]\n",
  533. " [ 24. 8. 24. ... 8. 8. 8.]\n",
  534. " ...\n",
  535. " [ 8. 0. 8. ... 440. 168. 56.]\n",
  536. " [ 8. 0. 8. ... 168. 440. 56.]\n",
  537. " [ 8. 24. 8. ... 56. 56. 440.]]\n",
  538. "\n",
  539. " Starting calculate accuracy/rmse...\n",
  540. "calculate performance: 97%|█████████▋| 968/1000 [00:01<00:00, 739.67it/s]\n",
  541. " Mean performance on train set: 27.960421\n",
  542. "With standard deviation: 1.457425\n",
  543. "\n",
  544. " Mean performance on test set: 35.177115\n",
  545. "With standard deviation: 4.500904\n",
  546. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 733.61it/s]\n",
  547. "\n",
  548. "\n",
  549. " #--- calculating kernel matrix when height = 8.0 ---#\n",
  550. "\n",
  551. " Loading dataset from file...\n",
  552. "\n",
  553. " Calculating kernel matrix, this could take a while...\n",
  554. "\n",
  555. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 119.0216612815857 seconds ---\n",
  556. "[[ 27. 9. 27. ... 9. 9. 9.]\n",
  557. " [ 9. 54. 9. ... 0. 0. 27.]\n",
  558. " [ 27. 9. 27. ... 9. 9. 9.]\n",
  559. " ...\n",
  560. " [ 9. 0. 9. ... 495. 189. 63.]\n",
  561. " [ 9. 0. 9. ... 189. 495. 63.]\n",
  562. " [ 9. 27. 9. ... 63. 63. 495.]]\n",
  563. "\n",
  564. " Starting calculate accuracy/rmse...\n",
  565. "calculate performance: 93%|█████████▎| 931/1000 [00:01<00:00, 752.10it/s]\n",
  566. " Mean performance on train set: 28.199059\n",
  567. "With standard deviation: 1.514897\n",
  568. "\n",
  569. " Mean performance on test set: 35.196848\n",
  570. "With standard deviation: 4.505256\n",
  571. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 768.54it/s]\n",
  572. "\n",
  573. "\n",
  574. " #--- calculating kernel matrix when height = 9.0 ---#\n",
  575. "\n",
  576. " Loading dataset from file...\n",
  577. "\n",
  578. " Calculating kernel matrix, this could take a while...\n",
  579. "\n",
  580. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 131.22810459136963 seconds ---\n",
  581. "[[ 30. 10. 30. ... 10. 10. 10.]\n",
  582. " [ 10. 60. 10. ... 0. 0. 30.]\n",
  583. " [ 30. 10. 30. ... 10. 10. 10.]\n",
  584. " ...\n",
  585. " [ 10. 0. 10. ... 550. 210. 70.]\n",
  586. " [ 10. 0. 10. ... 210. 550. 70.]\n",
  587. " [ 10. 30. 10. ... 70. 70. 550.]]\n",
  588. "\n",
  589. " Starting calculate accuracy/rmse...\n",
  590. "calculate performance: 93%|█████████▎| 932/1000 [00:01<00:00, 763.55it/s]\n",
  591. " Mean performance on train set: 28.266520\n",
  592. "With standard deviation: 1.307686\n",
  593. "\n",
  594. " Mean performance on test set: 35.195635\n",
  595. "With standard deviation: 4.501972\n",
  596. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 764.12it/s]\n",
  597. "\n",
  598. "\n",
  599. " #--- calculating kernel matrix when height = 10.0 ---#\n",
  600. "\n",
  601. " Loading dataset from file...\n",
  602. "\n",
  603. " Calculating kernel matrix, this could take a while...\n"
  604. ]
  605. },
  606. {
  607. "name": "stdout",
  608. "output_type": "stream",
  609. "text": [
  610. "\n",
  611. " --- Weisfeiler-Lehman sp kernel matrix of size 185 built in 144.96362161636353 seconds ---\n",
  612. "[[ 33. 11. 33. ... 11. 11. 11.]\n",
  613. " [ 11. 66. 11. ... 0. 0. 33.]\n",
  614. " [ 33. 11. 33. ... 11. 11. 11.]\n",
  615. " ...\n",
  616. " [ 11. 0. 11. ... 605. 231. 77.]\n",
  617. " [ 11. 0. 11. ... 231. 605. 77.]\n",
  618. " [ 11. 33. 11. ... 77. 77. 605.]]\n",
  619. "\n",
  620. " Starting calculate accuracy/rmse...\n",
  621. "calculate performance: 100%|█████████▉| 996/1000 [00:01<00:00, 820.73it/s]\n",
  622. " Mean performance on train set: 28.416280\n",
  623. "With standard deviation: 1.615957\n",
  624. "\n",
  625. " Mean performance on test set: 35.167588\n",
  626. "With standard deviation: 4.497227\n",
  627. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 822.53it/s]\n",
  628. "\n",
  629. "\n",
  630. " height rmse_test std_test rmse_train std_train k_time\n",
  631. "-------- ----------- ---------- ------------ ----------- --------\n",
  632. " 0 35.192 4.49577 28.3604 1.35718 13.5041\n",
  633. " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n",
  634. " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n",
  635. " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n",
  636. " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n",
  637. " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n",
  638. " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n",
  639. " 7 35.1771 4.5009 27.9604 1.45742 105.812\n",
  640. " 8 35.1968 4.50526 28.1991 1.5149 119.022\n",
  641. " 9 35.1956 4.50197 28.2665 1.30769 131.228\n",
  642. " 10 35.1676 4.49723 28.4163 1.61596 144.964\n"
  643. ]
  644. }
  645. ],
  646. "source": [
  647. "# WL sp kernel\n",
  648. "%load_ext line_profiler\n",
  649. "\n",
  650. "import numpy as np\n",
  651. "import sys\n",
  652. "sys.path.insert(0, \"../\")\n",
  653. "from pygraph.utils.utils import kernel_train_test\n",
  654. "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n",
  655. "\n",
  656. "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
  657. "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
  658. "\n",
  659. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'sp')\n",
  660. "\n",
  661. "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  662. " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n",
  663. "\n",
  664. "# %lprun -f _wl_subtreekernel_do \\\n",
  665. "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  666. "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)"
  667. ]
  668. },
  669. {
  670. "cell_type": "code",
  671. "execution_count": 2,
  672. "metadata": {},
  673. "outputs": [
  674. {
  675. "name": "stdout",
  676. "output_type": "stream",
  677. "text": [
  678. "The line_profiler extension is already loaded. To reload it, use:\n",
  679. " %reload_ext line_profiler\n",
  680. "\n",
  681. " --- This is a regression problem ---\n",
  682. "\n",
  683. "\n",
  684. " #--- calculating kernel matrix when height = 0.0 ---#\n",
  685. "\n",
  686. " Loading dataset from file...\n",
  687. "\n",
  688. " Calculating kernel matrix, this could take a while...\n",
  689. "\n",
  690. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 0.8530018329620361 seconds ---\n",
  691. "[[ 2. 1. 2. ... 0. 0. 1.]\n",
  692. " [ 1. 3. 1. ... 0. 0. 2.]\n",
  693. " [ 2. 1. 2. ... 0. 0. 1.]\n",
  694. " ...\n",
  695. " [ 0. 0. 0. ... 10. 7. 0.]\n",
  696. " [ 0. 0. 0. ... 7. 10. 1.]\n",
  697. " [ 1. 2. 1. ... 0. 1. 10.]]\n",
  698. "\n",
  699. " Starting calculate accuracy/rmse...\n",
  700. "calculate performance: 95%|█████████▍| 947/1000 [00:01<00:00, 719.29it/s]\n",
  701. " Mean performance on train set: 29.997498\n",
  702. "With standard deviation: 0.902340\n",
  703. "\n",
  704. " Mean performance on test set: 33.407740\n",
  705. "With standard deviation: 4.732717\n",
  706. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 653.54it/s]\n",
  707. "\n",
  708. "\n",
  709. " #--- calculating kernel matrix when height = 1.0 ---#\n",
  710. "\n",
  711. " Loading dataset from file...\n",
  712. "\n",
  713. " Calculating kernel matrix, this could take a while...\n",
  714. "\n",
  715. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 1.717505931854248 seconds ---\n",
  716. "[[ 4. 2. 4. ... 0. 0. 2.]\n",
  717. " [ 2. 6. 2. ... 0. 0. 4.]\n",
  718. " [ 4. 2. 4. ... 0. 0. 2.]\n",
  719. " ...\n",
  720. " [ 0. 0. 0. ... 20. 14. 0.]\n",
  721. " [ 0. 0. 0. ... 14. 20. 2.]\n",
  722. " [ 2. 4. 2. ... 0. 2. 20.]]\n",
  723. "\n",
  724. " Starting calculate accuracy/rmse...\n",
  725. "calculate performance: 96%|█████████▌| 956/1000 [00:01<00:00, 721.27it/s]\n",
  726. " Mean performance on train set: 30.160338\n",
  727. "With standard deviation: 1.094235\n",
  728. "\n",
  729. " Mean performance on test set: 33.423458\n",
  730. "With standard deviation: 4.721311\n",
  731. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 723.53it/s]\n",
  732. "\n",
  733. "\n",
  734. " #--- calculating kernel matrix when height = 2.0 ---#\n",
  735. "\n",
  736. " Loading dataset from file...\n",
  737. "\n",
  738. " Calculating kernel matrix, this could take a while...\n",
  739. "\n",
  740. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 2.6603214740753174 seconds ---\n",
  741. "[[ 6. 3. 6. ... 0. 0. 3.]\n",
  742. " [ 3. 9. 3. ... 0. 0. 6.]\n",
  743. " [ 6. 3. 6. ... 0. 0. 3.]\n",
  744. " ...\n",
  745. " [ 0. 0. 0. ... 30. 21. 0.]\n",
  746. " [ 0. 0. 0. ... 21. 30. 3.]\n",
  747. " [ 3. 6. 3. ... 0. 3. 30.]]\n",
  748. "\n",
  749. " Starting calculate accuracy/rmse...\n",
  750. "calculate performance: 94%|█████████▍| 944/1000 [00:01<00:00, 650.98it/s]\n",
  751. " Mean performance on train set: 29.928570\n",
  752. "With standard deviation: 0.787941\n",
  753. "\n",
  754. " Mean performance on test set: 33.433014\n",
  755. "With standard deviation: 4.724408\n",
  756. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 688.71it/s]\n",
  757. "\n",
  758. "\n",
  759. " #--- calculating kernel matrix when height = 3.0 ---#\n",
  760. "\n",
  761. " Loading dataset from file...\n",
  762. "\n",
  763. " Calculating kernel matrix, this could take a while...\n",
  764. "\n",
  765. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 3.477631092071533 seconds ---\n",
  766. "[[ 8. 4. 8. ... 0. 0. 4.]\n",
  767. " [ 4. 12. 4. ... 0. 0. 8.]\n",
  768. " [ 8. 4. 8. ... 0. 0. 4.]\n",
  769. " ...\n",
  770. " [ 0. 0. 0. ... 40. 28. 0.]\n",
  771. " [ 0. 0. 0. ... 28. 40. 4.]\n",
  772. " [ 4. 8. 4. ... 0. 4. 40.]]\n",
  773. "\n",
  774. " Starting calculate accuracy/rmse...\n",
  775. "calculate performance: 95%|█████████▌| 954/1000 [00:01<00:00, 725.15it/s]\n",
  776. " Mean performance on train set: 30.011409\n",
  777. "With standard deviation: 0.909674\n",
  778. "\n",
  779. " Mean performance on test set: 33.407319\n",
  780. "With standard deviation: 4.732434\n",
  781. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 720.71it/s]\n",
  782. "\n",
  783. "\n",
  784. " #--- calculating kernel matrix when height = 4.0 ---#\n",
  785. "\n",
  786. " Loading dataset from file...\n",
  787. "\n",
  788. " Calculating kernel matrix, this could take a while...\n",
  789. "\n",
  790. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 4.5436692237854 seconds ---\n",
  791. "[[10. 5. 10. ... 0. 0. 5.]\n",
  792. " [ 5. 15. 5. ... 0. 0. 10.]\n",
  793. " [10. 5. 10. ... 0. 0. 5.]\n",
  794. " ...\n",
  795. " [ 0. 0. 0. ... 50. 35. 0.]\n",
  796. " [ 0. 0. 0. ... 35. 50. 5.]\n",
  797. " [ 5. 10. 5. ... 0. 5. 50.]]\n",
  798. "\n",
  799. " Starting calculate accuracy/rmse...\n",
  800. "calculate performance: 94%|█████████▎| 936/1000 [00:01<00:00, 568.04it/s]\n",
  801. " Mean performance on train set: 30.184162\n",
  802. "With standard deviation: 1.108902\n",
  803. "\n",
  804. " Mean performance on test set: 33.425625\n",
  805. "With standard deviation: 4.721660\n",
  806. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 564.24it/s]\n",
  807. "\n",
  808. "\n",
  809. " #--- calculating kernel matrix when height = 5.0 ---#\n",
  810. "\n",
  811. " Loading dataset from file...\n",
  812. "\n",
  813. " Calculating kernel matrix, this could take a while...\n",
  814. "\n",
  815. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 5.6617820262908936 seconds ---\n",
  816. "[[12. 6. 12. ... 0. 0. 6.]\n",
  817. " [ 6. 18. 6. ... 0. 0. 12.]\n",
  818. " [12. 6. 12. ... 0. 0. 6.]\n",
  819. " ...\n",
  820. " [ 0. 0. 0. ... 60. 42. 0.]\n",
  821. " [ 0. 0. 0. ... 42. 60. 6.]\n",
  822. " [ 6. 12. 6. ... 0. 6. 60.]]\n",
  823. "\n",
  824. " Starting calculate accuracy/rmse...\n",
  825. "calculate performance: 99%|█████████▉| 993/1000 [00:01<00:00, 519.25it/s]\n",
  826. " Mean performance on train set: 30.041068\n",
  827. "With standard deviation: 1.018451\n",
  828. "\n",
  829. " Mean performance on test set: 33.406717\n",
  830. "With standard deviation: 4.726409\n",
  831. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 548.91it/s]\n",
  832. "\n",
  833. "\n",
  834. " #--- calculating kernel matrix when height = 6.0 ---#\n",
  835. "\n",
  836. " Loading dataset from file...\n",
  837. "\n",
  838. " Calculating kernel matrix, this could take a while...\n",
  839. "\n",
  840. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 6.148027420043945 seconds ---\n",
  841. "[[14. 7. 14. ... 0. 0. 7.]\n",
  842. " [ 7. 21. 7. ... 0. 0. 14.]\n",
  843. " [14. 7. 14. ... 0. 0. 7.]\n",
  844. " ...\n",
  845. " [ 0. 0. 0. ... 70. 49. 0.]\n",
  846. " [ 0. 0. 0. ... 49. 70. 7.]\n",
  847. " [ 7. 14. 7. ... 0. 7. 70.]]\n",
  848. "\n",
  849. " Starting calculate accuracy/rmse...\n",
  850. "calculate performance: 98%|█████████▊| 985/1000 [00:01<00:00, 498.31it/s]\n",
  851. " Mean performance on train set: 29.905596\n",
  852. "With standard deviation: 0.782179\n",
  853. "\n",
  854. " Mean performance on test set: 33.418992\n",
  855. "With standard deviation: 4.730753\n",
  856. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 534.86it/s]\n",
  857. "\n",
  858. "\n",
  859. " #--- calculating kernel matrix when height = 7.0 ---#\n",
  860. "\n",
  861. " Loading dataset from file...\n",
  862. "\n",
  863. " Calculating kernel matrix, this could take a while...\n",
  864. "\n",
  865. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.603543519973755 seconds ---\n",
  866. "[[16. 8. 16. ... 0. 0. 8.]\n",
  867. " [ 8. 24. 8. ... 0. 0. 16.]\n",
  868. " [16. 8. 16. ... 0. 0. 8.]\n",
  869. " ...\n",
  870. " [ 0. 0. 0. ... 80. 56. 0.]\n",
  871. " [ 0. 0. 0. ... 56. 80. 8.]\n",
  872. " [ 8. 16. 8. ... 0. 8. 80.]]\n",
  873. "\n",
  874. " Starting calculate accuracy/rmse...\n",
  875. "calculate performance: 95%|█████████▌| 953/1000 [00:01<00:00, 586.15it/s]\n",
  876. " Mean performance on train set: 30.175921\n",
  877. "With standard deviation: 1.103820\n",
  878. "\n",
  879. " Mean performance on test set: 33.424820\n",
  880. "With standard deviation: 4.721550\n",
  881. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 546.00it/s]\n",
  882. "\n",
  883. "\n",
  884. " #--- calculating kernel matrix when height = 8.0 ---#\n",
  885. "\n",
  886. " Loading dataset from file...\n",
  887. "\n",
  888. " Calculating kernel matrix, this could take a while...\n",
  889. "\n",
  890. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 7.972221612930298 seconds ---\n",
  891. "[[18. 9. 18. ... 0. 0. 9.]\n",
  892. " [ 9. 27. 9. ... 0. 0. 18.]\n",
  893. " [18. 9. 18. ... 0. 0. 9.]\n",
  894. " ...\n",
  895. " [ 0. 0. 0. ... 90. 63. 0.]\n",
  896. " [ 0. 0. 0. ... 63. 90. 9.]\n",
  897. " [ 9. 18. 9. ... 0. 9. 90.]]\n",
  898. "\n",
  899. " Starting calculate accuracy/rmse...\n",
  900. "calculate performance: 98%|█████████▊| 980/1000 [00:01<00:00, 490.30it/s]\n",
  901. " Mean performance on train set: 30.136537\n",
  902. "With standard deviation: 1.074854\n",
  903. "\n",
  904. " Mean performance on test set: 33.412196\n",
  905. "With standard deviation: 4.715539\n",
  906. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 536.66it/s]\n",
  907. "\n",
  908. "\n",
  909. " #--- calculating kernel matrix when height = 9.0 ---#\n",
  910. "\n",
  911. " Loading dataset from file...\n",
  912. "\n",
  913. " Calculating kernel matrix, this could take a while...\n",
  914. "\n",
  915. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 9.070842504501343 seconds ---\n",
  916. "[[ 20. 10. 20. ... 0. 0. 10.]\n",
  917. " [ 10. 30. 10. ... 0. 0. 20.]\n",
  918. " [ 20. 10. 20. ... 0. 0. 10.]\n",
  919. " ...\n",
  920. " [ 0. 0. 0. ... 100. 70. 0.]\n",
  921. " [ 0. 0. 0. ... 70. 100. 10.]\n",
  922. " [ 10. 20. 10. ... 0. 10. 100.]]\n",
  923. "\n",
  924. " Starting calculate accuracy/rmse...\n",
  925. "calculate performance: 98%|█████████▊| 975/1000 [00:01<00:00, 527.13it/s]\n",
  926. " Mean performance on train set: 30.032887\n",
  927. "With standard deviation: 0.921065\n",
  928. "\n",
  929. " Mean performance on test set: 33.407050\n",
  930. "With standard deviation: 4.731928\n",
  931. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 600.62it/s]\n",
  932. "\n",
  933. "\n",
  934. " #--- calculating kernel matrix when height = 10.0 ---#\n",
  935. "\n",
  936. " Loading dataset from file...\n",
  937. "\n",
  938. " Calculating kernel matrix, this could take a while...\n",
  939. "\n",
  940. " --- Weisfeiler-Lehman edge kernel matrix of size 185 built in 10.02536916732788 seconds ---\n",
  941. "[[ 22. 11. 22. ... 0. 0. 11.]\n",
  942. " [ 11. 33. 11. ... 0. 0. 22.]\n",
  943. " [ 22. 11. 22. ... 0. 0. 11.]\n",
  944. " ...\n",
  945. " [ 0. 0. 0. ... 110. 77. 0.]\n",
  946. " [ 0. 0. 0. ... 77. 110. 11.]\n",
  947. " [ 11. 22. 11. ... 0. 11. 110.]]\n",
  948. "\n",
  949. " Starting calculate accuracy/rmse...\n"
  950. ]
  951. },
  952. {
  953. "name": "stdout",
  954. "output_type": "stream",
  955. "text": [
  956. "calculate performance: 97%|█████████▋| 970/1000 [00:01<00:00, 694.38it/s]\n",
  957. " Mean performance on train set: 29.924232\n",
  958. "With standard deviation: 0.790843\n",
  959. "\n",
  960. " Mean performance on test set: 33.416469\n",
  961. "With standard deviation: 4.731694\n",
  962. "calculate performance: 100%|██████████| 1000/1000 [00:01<00:00, 678.72it/s]\n",
  963. "\n",
  964. "\n",
  965. " height rmse_test std_test rmse_train std_train k_time\n",
  966. "-------- ----------- ---------- ------------ ----------- ---------\n",
  967. " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n",
  968. " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n",
  969. " 2 33.433 4.72441 29.9286 0.787941 2.66032\n",
  970. " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n",
  971. " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n",
  972. " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n",
  973. " 6 33.419 4.73075 29.9056 0.782179 6.14803\n",
  974. " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n",
  975. " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n",
  976. " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n",
  977. " 10 33.4165 4.73169 29.9242 0.790843 10.0254\n"
  978. ]
  979. }
  980. ],
  981. "source": [
  982. "# WL edge kernel\n",
  983. "%load_ext line_profiler\n",
  984. "\n",
  985. "import numpy as np\n",
  986. "import sys\n",
  987. "sys.path.insert(0, \"../\")\n",
  988. "from pygraph.utils.utils import kernel_train_test\n",
  989. "from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel, _wl_subtreekernel_do\n",
  990. "\n",
  991. "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
  992. "kernel_file_path = 'kernelmatrices_weisfeilerlehman_subtree_acyclic/'\n",
  993. "\n",
  994. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', base_kernel = 'edge')\n",
  995. "\n",
  996. "kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  997. " hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)\n",
  998. "\n",
  999. "# %lprun -f _wl_subtreekernel_do \\\n",
  1000. "# kernel_train_test(datafile, kernel_file_path, weisfeilerlehmankernel, kernel_para, \\\n",
  1001. "# hyper_name = 'height', hyper_range = np.linspace(0, 10, 11), normalize = False)"
  1002. ]
  1003. },
  1004. {
  1005. "cell_type": "code",
  1006. "execution_count": null,
  1007. "metadata": {},
  1008. "outputs": [],
  1009. "source": [
  1010. "# results\n",
  1011. "\n",
  1012. "# subtree with y normalization\n",
  1013. " height RMSE_test std_test RMSE_train std_train k_time\n",
  1014. "-------- ----------- ---------- ------------ ----------- --------\n",
  1015. " 0 36.2108 7.33179 38.6059 1.57064 0.379475\n",
  1016. " 1 9.00098 6.37145 6.76379 1.96568 0.844898\n",
  1017. " 2 19.8113 4.04911 5.28757 1.81899 1.35308\n",
  1018. " 3 25.0455 4.94276 2.3274 0.805733 1.81136\n",
  1019. " 4 28.2255 6.5212 0.85156 0.423465 2.23098\n",
  1020. " 5 30.6354 6.73647 3.35947 8.17561 2.71575\n",
  1021. " 6 32.1027 6.85601 3.54105 8.71922 3.11459\n",
  1022. " 7 32.9709 6.89606 6.94372 9.94045 3.55571\n",
  1023. " 8 33.5112 6.90753 6.97339 9.76975 3.79657\n",
  1024. " 9 33.8502 6.91427 11.8345 11.6213 4.41555\n",
  1025. " 10 34.0963 6.93115 11.4257 11.2624 4.94888\n",
  1026. "\n",
  1027. "# subtree without y normalization\n",
  1028. " height RMSE_test std_test RMSE_train std_train k_time\n",
  1029. "-------- ----------- ---------- ------------ ----------- --------\n",
  1030. " 0 15.6859 4.1392 17.6816 0.713183 0.360443\n",
  1031. " 1 7.55046 2.33179 6.27001 0.654734 0.837389\n",
  1032. " 2 9.72847 2.05767 4.45068 0.882129 1.25317\n",
  1033. " 3 11.2961 2.79994 2.27059 0.481516 1.79971\n",
  1034. " 4 12.8083 3.44694 1.07403 0.637823 2.35346\n",
  1035. " 5 14.0179 3.67504 0.700602 0.57264 2.78285\n",
  1036. " 6 14.9184 3.80535 0.691515 0.56462 3.20764\n",
  1037. " 7 15.6295 3.86539 0.691516 0.56462 3.71648\n",
  1038. " 8 16.2144 3.92876 0.691515 0.56462 3.99213\n",
  1039. " 9 16.7257 3.9931 0.691515 0.56462 4.26315\n",
  1040. " 10 17.1864 4.05672 0.691516 0.564621 5.00918\n",
  1041. " \n",
  1042. "# sp\n",
  1043. " height rmse_test std_test rmse_train std_train k_time\n",
  1044. "-------- ----------- ---------- ------------ ----------- --------\n",
  1045. " 0 35.192 4.49577 28.3604 1.35718 13.5041\n",
  1046. " 1 35.1808 4.50045 27.9335 1.44836 26.8292\n",
  1047. " 2 35.1632 4.50205 28.1113 1.50891 40.2356\n",
  1048. " 3 35.1946 4.49801 28.3903 1.36571 54.6704\n",
  1049. " 4 35.1753 4.50111 27.9746 1.46222 67.1522\n",
  1050. " 5 35.1997 4.5071 28.0184 1.45564 80.0881\n",
  1051. " 6 35.1645 4.49849 28.3731 1.60057 92.1925\n",
  1052. " 7 35.1771 4.5009 27.9604 1.45742 105.812\n",
  1053. " 8 35.1968 4.50526 28.1991 1.5149 119.022\n",
  1054. " 9 35.1956 4.50197 28.2665 1.30769 131.228\n",
  1055. " 10 35.1676 4.49723 28.4163 1.61596 144.964\n",
  1056. " \n",
  1057. "# path\n",
  1058. " height rmse_test std_test rmse_train std_train k_time\n",
  1059. "-------- ----------- ---------- ------------ ----------- ---------\n",
  1060. " 0 33.4077 4.73272 29.9975 0.90234 0.853002\n",
  1061. " 1 33.4235 4.72131 30.1603 1.09423 1.71751\n",
  1062. " 2 33.433 4.72441 29.9286 0.787941 2.66032\n",
  1063. " 3 33.4073 4.73243 30.0114 0.909674 3.47763\n",
  1064. " 4 33.4256 4.72166 30.1842 1.1089 4.54367\n",
  1065. " 5 33.4067 4.72641 30.0411 1.01845 5.66178\n",
  1066. " 6 33.419 4.73075 29.9056 0.782179 6.14803\n",
  1067. " 7 33.4248 4.72155 30.1759 1.10382 7.60354\n",
  1068. " 8 33.4122 4.71554 30.1365 1.07485 7.97222\n",
  1069. " 9 33.4071 4.73193 30.0329 0.921065 9.07084\n",
  1070. " 10 33.4165 4.73169 29.9242 0.790843 10.0254"
  1071. ]
  1072. },
  1073. {
  1074. "cell_type": "code",
  1075. "execution_count": 4,
  1076. "metadata": {
  1077. "scrolled": true
  1078. },
  1079. "outputs": [
  1080. {
  1081. "name": "stdout",
  1082. "output_type": "stream",
  1083. "text": [
  1084. "{'O', 'C'}\n",
  1085. "{'O', 'C'}\n"
  1086. ]
  1087. },
  1088. {
  1089. "ename": "TypeError",
  1090. "evalue": "'int' object is not iterable",
  1091. "output_type": "error",
  1092. "traceback": [
  1093. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  1094. "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
  1095. "\u001b[0;32m<ipython-input-4-e54963002171>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabelset2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m \u001b[0mkernel\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mspkernel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mG2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 69\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkernel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  1096. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36mspkernel\u001b[0;34m(edge_weight, *args)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1097. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/kernels/spKernel.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 41\u001b[0;31m \u001b[0mGn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m \u001b[0mgetSPGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mG\u001b[0m \u001b[0;32min\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m]\u001b[0m \u001b[0;31m# get shortest path graphs of Gn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 42\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1098. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mgetSPGraph\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \"\"\"\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfloydTransformation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medge_weight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'bond_type'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1099. "\u001b[0;32m/media/ljia/DATA/research-repo/codes/Linlin/py-graph/pygraph/utils/utils.py\u001b[0m in \u001b[0;36mfloydTransformation\u001b[0;34m(G, edge_weight)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0mBorgwardt\u001b[0m \u001b[0mKM\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mKriegel\u001b[0m \u001b[0mHP\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mShortest\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpath\u001b[0m \u001b[0mkernels\u001b[0m \u001b[0mon\u001b[0m \u001b[0mgraphs\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mInData\u001b[0m \u001b[0mMining\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFifth\u001b[0m \u001b[0mIEEE\u001b[0m \u001b[0mInternational\u001b[0m \u001b[0mConference\u001b[0m \u001b[0mon\u001b[0m \u001b[0;36m2005\u001b[0m \u001b[0mNov\u001b[0m \u001b[0;36m27\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;36m8\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mpp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mIEEE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \"\"\"\n\u001b[0;32m---> 58\u001b[0;31m \u001b[0mspMatrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloyd_warshall_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0medge_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGraph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_nodes_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnodes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1100. "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/algorithms/shortest_paths/dense.py\u001b[0m in \u001b[0;36mfloyd_warshall_numpy\u001b[0;34m(G, nodelist, weight)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m# nonedges are not given the value 0 as well.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m A = nx.to_numpy_matrix(G, nodelist=nodelist, multigraph_weight=min,\n\u001b[0;32m---> 54\u001b[0;31m weight=weight, nonedge=np.inf)\n\u001b[0m\u001b[1;32m 55\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mI\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midentity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1101. "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_matrix\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 446\u001b[0m A = to_numpy_array(G, nodelist=nodelist, dtype=dtype, order=order,\n\u001b[1;32m 447\u001b[0m \u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmultigraph_weight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 448\u001b[0;31m nonedge=nonedge)\n\u001b[0m\u001b[1;32m 449\u001b[0m \u001b[0mM\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masmatrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mM\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1102. "\u001b[0;32m/usr/local/lib/python3.5/dist-packages/networkx/convert_matrix.py\u001b[0m in \u001b[0;36mto_numpy_array\u001b[0;34m(G, nodelist, dtype, order, multigraph_weight, weight, nonedge)\u001b[0m\n\u001b[1;32m 1061\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1062\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnodelist\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1063\u001b[0;31m \u001b[0mnodelist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mG\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1064\u001b[0m \u001b[0mnodeset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1065\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodelist\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnodeset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  1103. "\u001b[0;31mTypeError\u001b[0m: 'int' object is not iterable"
  1104. ]
  1105. }
  1106. ],
  1107. "source": [
  1108. "import sys\n",
  1109. "import networkx as nx\n",
  1110. "sys.path.insert(0, \"../\")\n",
  1111. "from pygraph.utils.graphfiles import loadDataset\n",
  1112. "from pygraph.kernels.spkernel import spkernel\n",
  1113. "\n",
  1114. "import matplotlib.pyplot as plt\n",
  1115. "\n",
  1116. "\n",
  1117. "def weisfeilerlehman_test(G):\n",
  1118. " '''\n",
  1119. " Weisfeiler-Lehman test of graph isomorphism.\n",
  1120. " '''\n",
  1121. "\n",
  1122. " nx.draw_networkx(G)\n",
  1123. " plt.show()\n",
  1124. " nx.draw_networkx_labels(G, nx.spring_layout(G), labels = nx.get_node_attributes(G,'label'))\n",
  1125. " print(G.nodes(data = True))\n",
  1126. " \n",
  1127. " set_multisets = []\n",
  1128. " for node in G.nodes(data = True):\n",
  1129. " # Multiset-label determination.\n",
  1130. " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n",
  1131. " # sorting each multiset\n",
  1132. " multiset.sort()\n",
  1133. " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n",
  1134. " set_multisets.append(multiset)\n",
  1135. " \n",
  1136. " # label compression\n",
  1137. "# set_multisets.sort() # this is unnecessary\n",
  1138. " set_unique = list(set(set_multisets)) # set of unique multiset labels\n",
  1139. " set_compressed = { value : str(set_unique.index(value)) for value in set_unique } # assign indices as the new labels\n",
  1140. "# print(set_compressed)\n",
  1141. "# print(set_multisets)\n",
  1142. " \n",
  1143. " # relabel nodes with multisets\n",
  1144. " for node in G.nodes(data = True):\n",
  1145. " node[1]['label'] = set_multisets[node[0]]\n",
  1146. " print(' -> ')\n",
  1147. " nx.draw_networkx(G)\n",
  1148. " plt.show()\n",
  1149. " print(G.nodes(data = True))\n",
  1150. "\n",
  1151. " \n",
  1152. " # relabel nodes\n",
  1153. " for node in G.nodes(data = True):\n",
  1154. " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n",
  1155. " \n",
  1156. " print(' -> ')\n",
  1157. " nx.draw_networkx(G)\n",
  1158. " plt.show()\n",
  1159. " print(G.nodes(data = True))\n",
  1160. "\n",
  1161. "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
  1162. "G1 = dataset[12]\n",
  1163. "G2 = dataset[55]\n",
  1164. "\n",
  1165. "# init.\n",
  1166. "kernel = 0 # init kernel\n",
  1167. "num_nodes1 = G1.number_of_nodes()\n",
  1168. "num_nodes2 = G2.number_of_nodes()\n",
  1169. "\n",
  1170. "# the first iteration.\n",
  1171. "labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1172. "labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1173. "print(labelset1)\n",
  1174. "print(labelset2)\n",
  1175. "kernel += spkernel(G1, G2)\n",
  1176. "print(kernel)\n",
  1177. "\n",
  1178. "\n",
  1179. "\n",
  1180. "for height in range(0, min(num_nodes1, num_nodes2)): #Q how to determine the upper bound of the height?\n",
  1181. " if labelset1 != labelset2:\n",
  1182. " break\n",
  1183. " \n",
  1184. " # Weisfeiler-Lehman test of graph isomorphism.\n",
  1185. " weisfeilerlehman_test(G1)\n",
  1186. " weisfeilerlehman_test(G2)\n",
  1187. " \n",
  1188. " # calculate kernel\n",
  1189. " kernel += spkernel(G1, G2)\n",
  1190. " \n",
  1191. " # get label sets of both graphs\n",
  1192. " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1193. " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1194. "# print(labelset1)\n",
  1195. "# print(labelset2)\n",
  1196. "\n",
  1197. "print(kernel)"
  1198. ]
  1199. },
  1200. {
  1201. "cell_type": "code",
  1202. "execution_count": 20,
  1203. "metadata": {
  1204. "scrolled": false
  1205. },
  1206. "outputs": [
  1207. {
  1208. "name": "stdout",
  1209. "output_type": "stream",
  1210. "text": [
  1211. "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'O', 6: 'O'}\n",
  1212. "{0: 'C', 1: 'C', 2: 'C', 3: 'C', 4: 'C', 5: 'C', 6: 'S', 7: 'S'}\n",
  1213. "\n",
  1214. " --- height = 0 --- \n",
  1215. "\n",
  1216. " --- for graph 0 --- \n",
  1217. "\n",
  1218. "labels_ori: ['C', 'C', 'C', 'C', 'C', 'O', 'O']\n",
  1219. "all_labels_ori: {'C', 'O'}\n",
  1220. "num_of_each_label: {'C': 5, 'O': 2}\n",
  1221. "all_num_of_each_label: [{'C': 5, 'O': 2}]\n",
  1222. "num_of_labels: 2\n",
  1223. "all_labels_ori: {'C', 'O'}\n",
  1224. "\n",
  1225. " --- for graph 1 --- \n",
  1226. "\n",
  1227. "labels_ori: ['C', 'C', 'C', 'C', 'C', 'C', 'S', 'S']\n",
  1228. "all_labels_ori: {'C', 'O', 'S'}\n",
  1229. "num_of_each_label: {'C': 6, 'S': 2}\n",
  1230. "all_num_of_each_label: [{'C': 5, 'O': 2}, {'C': 6, 'S': 2}]\n",
  1231. "num_of_labels: 2\n",
  1232. "all_labels_ori: {'C', 'O', 'S'}\n",
  1233. "\n",
  1234. " all_num_of_labels_occured: 3\n",
  1235. "\n",
  1236. " --- calculating kernel matrix ---\n",
  1237. "\n",
  1238. " labels: {'C', 'O'}\n",
  1239. "vector1: [[5 2]]\n",
  1240. "vector2: [[5 2]]\n",
  1241. "Kmatrix: [[ 29. 0.]\n",
  1242. " [ 0. 0.]]\n",
  1243. "\n",
  1244. " labels: {'C', 'O', 'S'}\n",
  1245. "vector1: [[5 2 0]]\n",
  1246. "vector2: [[6 0 2]]\n",
  1247. "Kmatrix: [[ 29. 30.]\n",
  1248. " [ 30. 0.]]\n",
  1249. "\n",
  1250. " labels: {'C', 'S'}\n",
  1251. "vector1: [[6 2]]\n",
  1252. "vector2: [[6 2]]\n",
  1253. "Kmatrix: [[ 29. 30.]\n",
  1254. " [ 30. 40.]]\n",
  1255. "\n",
  1256. " --- height = 1 --- \n",
  1257. "\n",
  1258. " --- for graph 0 --- \n",
  1259. "\n",
  1260. "multiset: ['CC', 'CC', 'CCO', 'CCO', 'COO', 'OCC', 'OCC']\n",
  1261. "set_unique: ['OCC', 'COO', 'CCO', 'CC']\n",
  1262. "set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
  1263. "all_set_compressed: {'OCC': '4', 'COO': '5', 'CCO': '6', 'CC': '7'}\n",
  1264. "num_of_labels_occured: 7\n",
  1265. "\n",
  1266. " compressed labels: {0: '7', 1: '7', 2: '6', 3: '6', 4: '5', 5: '4', 6: '4'}\n",
  1267. "labels_comp: ['7', '7', '6', '6', '5', '4', '4']\n",
  1268. "all_labels_ori: {'5', '4', '6', '7'}\n",
  1269. "num_of_each_label: {'5': 1, '4': 2, '6': 2, '7': 2}\n",
  1270. "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}]\n",
  1271. "\n",
  1272. " --- for graph 1 --- \n",
  1273. "\n",
  1274. "multiset: ['CC', 'CC', 'CC', 'CCS', 'CCS', 'CCSS', 'SCC', 'SCC']\n",
  1275. "set_unique: ['SCC', 'CC', 'CCS', 'CCSS']\n",
  1276. "set_compressed: {'SCC': '8', 'CC': '7', 'CCS': '9', 'CCSS': '10'}\n",
  1277. "all_set_compressed: {'SCC': '8', 'COO': '5', 'CCS': '9', 'OCC': '4', 'CCO': '6', 'CCSS': '10', 'CC': '7'}\n",
  1278. "num_of_labels_occured: 10\n",
  1279. "\n",
  1280. " compressed labels: {0: '7', 1: '7', 2: '7', 3: '9', 4: '9', 5: '10', 6: '8', 7: '8'}\n",
  1281. "labels_comp: ['7', '7', '7', '9', '9', '10', '8', '8']\n",
  1282. "all_labels_ori: {'10', '4', '7', '9', '6', '5', '8'}\n",
  1283. "num_of_each_label: {'10': 1, '9': 2, '7': 3, '8': 2}\n",
  1284. "all_num_of_each_label: [{'5': 1, '4': 2, '6': 2, '7': 2}, {'10': 1, '9': 2, '7': 3, '8': 2}]\n",
  1285. "\n",
  1286. " all_num_of_labels_occured: 10\n",
  1287. "\n",
  1288. " --- calculating kernel matrix ---\n",
  1289. "\n",
  1290. " labels: {'5', '4', '6', '7'}\n",
  1291. "vector1: [[1 2 2 2]]\n",
  1292. "vector2: [[1 2 2 2]]\n",
  1293. "\n",
  1294. " labels: {'10', '4', '7', '9', '6', '5', '8'}\n",
  1295. "vector1: [[0 2 2 0 2 1 0]]\n",
  1296. "vector2: [[1 0 3 2 0 0 2]]\n",
  1297. "\n",
  1298. " labels: {'8', '10', '7', '9'}\n",
  1299. "vector1: [[2 1 3 2]]\n",
  1300. "vector2: [[2 1 3 2]]\n",
  1301. "\n",
  1302. " Kmatrix: [[ 42. 36.]\n",
  1303. " [ 36. 58.]]\n",
  1304. "\n",
  1305. " --- height = 2 --- \n",
  1306. "\n",
  1307. " --- for graph 0 --- \n",
  1308. "\n",
  1309. "multiset: ['76', '76', '647', '647', '544', '456', '456']\n",
  1310. "set_unique: ['647', '76', '456', '544']\n",
  1311. "set_compressed: {'647': '11', '76': '12', '544': '14', '456': '13'}\n",
  1312. "all_set_compressed: {'647': '11', '76': '12', '456': '13', '544': '14'}\n",
  1313. "num_of_labels_occured: 14\n",
  1314. "\n",
  1315. " compressed labels: {0: '12', 1: '12', 2: '11', 3: '11', 4: '14', 5: '13', 6: '13'}\n",
  1316. "labels_comp: ['12', '12', '11', '11', '14', '13', '13']\n",
  1317. "all_labels_ori: {'14', '12', '11', '13'}\n",
  1318. "num_of_each_label: {'14': 1, '13': 2, '12': 2, '11': 2}\n",
  1319. "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}]\n",
  1320. "\n",
  1321. " --- for graph 1 --- \n",
  1322. "\n",
  1323. "multiset: ['79', '79', '710', '978', '978', '10788', '8109', '8109']\n",
  1324. "set_unique: ['710', '8109', '79', '10788', '978']\n",
  1325. "set_compressed: {'710': '15', '79': '17', '8109': '16', '978': '19', '10788': '18'}\n",
  1326. "all_set_compressed: {'710': '15', '79': '17', '978': '19', '10788': '18', '8109': '16', '456': '13', '544': '14', '647': '11', '76': '12'}\n",
  1327. "num_of_labels_occured: 19\n",
  1328. "\n",
  1329. " compressed labels: {0: '17', 1: '17', 2: '15', 3: '19', 4: '19', 5: '18', 6: '16', 7: '16'}\n",
  1330. "labels_comp: ['17', '17', '15', '19', '19', '18', '16', '16']\n",
  1331. "all_labels_ori: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
  1332. "num_of_each_label: {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}\n",
  1333. "all_num_of_each_label: [{'14': 1, '13': 2, '12': 2, '11': 2}, {'15': 1, '17': 2, '19': 2, '16': 2, '18': 1}]\n",
  1334. "\n",
  1335. " all_num_of_labels_occured: 19\n",
  1336. "\n",
  1337. " --- calculating kernel matrix ---\n",
  1338. "\n",
  1339. " labels: {'14', '12', '11', '13'}\n",
  1340. "vector1: [[1 2 2 2]]\n",
  1341. "vector2: [[1 2 2 2]]\n",
  1342. "\n",
  1343. " labels: {'18', '19', '12', '13', '17', '11', '14', '16', '15'}\n",
  1344. "vector1: [[0 0 2 2 0 2 1 0 0]]\n",
  1345. "vector2: [[1 2 0 0 2 0 0 2 1]]\n",
  1346. "\n",
  1347. " labels: {'18', '17', '15', '16', '19'}\n",
  1348. "vector1: [[1 2 1 2 2]]\n",
  1349. "vector2: [[1 2 1 2 2]]\n",
  1350. "\n",
  1351. " Kmatrix: [[ 55. 36.]\n",
  1352. " [ 36. 72.]]\n",
  1353. "\n",
  1354. " --- Weisfeiler-Lehman subtree kernel built in 0.0034377574920654297 seconds ---\n"
  1355. ]
  1356. },
  1357. {
  1358. "data": {
  1359. "text/plain": [
  1360. "array([[ 55., 36.],\n",
  1361. " [ 36., 72.]])"
  1362. ]
  1363. },
  1364. "execution_count": 20,
  1365. "metadata": {},
  1366. "output_type": "execute_result"
  1367. }
  1368. ],
  1369. "source": [
  1370. "# test of WL subtree kernel on many graphs\n",
  1371. "\n",
  1372. "import sys\n",
  1373. "import pathlib\n",
  1374. "from collections import Counter\n",
  1375. "sys.path.insert(0, \"../\")\n",
  1376. "\n",
  1377. "import networkx as nx\n",
  1378. "import numpy as np\n",
  1379. "import time\n",
  1380. "\n",
  1381. "from pygraph.kernels.spkernel import spkernel\n",
  1382. "from pygraph.kernels.pathKernel import pathkernel\n",
  1383. "\n",
  1384. "def weisfeilerlehmankernel(*args, height = 0, base_kernel = 'subtree'):\n",
  1385. " \"\"\"Calculate Weisfeiler-Lehman kernels between graphs.\n",
  1386. " \n",
  1387. " Parameters\n",
  1388. " ----------\n",
  1389. " Gn : List of NetworkX graph\n",
  1390. " List of graphs between which the kernels are calculated.\n",
  1391. " /\n",
  1392. " G1, G2 : NetworkX graphs\n",
  1393. " 2 graphs between which the kernel is calculated.\n",
  1394. " \n",
  1395. " height : subtree height\n",
  1396. " \n",
  1397. " base_kernel : base kernel used in each iteration of WL kernel\n",
  1398. " the default base kernel is subtree kernel\n",
  1399. " \n",
  1400. " Return\n",
  1401. " ------\n",
  1402. " Kmatrix/Kernel : Numpy matrix/int\n",
  1403. " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. / Weisfeiler-Lehman Kernel between 2 graphs.\n",
  1404. " \n",
  1405. " Notes\n",
  1406. " -----\n",
  1407. " This function now supports WL subtree kernel and WL shortest path kernel.\n",
  1408. " \n",
  1409. " References\n",
  1410. " ----------\n",
  1411. " [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61.\n",
  1412. " \"\"\"\n",
  1413. " if len(args) == 1: # for a list of graphs\n",
  1414. "\n",
  1415. "# print(args)\n",
  1416. " start_time = time.time()\n",
  1417. " \n",
  1418. " # for WL subtree kernel\n",
  1419. " if base_kernel == 'subtree': \n",
  1420. " Kmatrix = _wl_subtreekernel_do(args[0], height = height, base_kernel = 'subtree')\n",
  1421. " \n",
  1422. " # for WL edge kernel\n",
  1423. " elif base_kernel == 'edge':\n",
  1424. " print('edge')\n",
  1425. " \n",
  1426. " # for WL shortest path kernel\n",
  1427. " elif base_kernel == 'sp':\n",
  1428. " Gn = args[0]\n",
  1429. " Kmatrix = np.zeros((len(Gn), len(Gn)))\n",
  1430. " \n",
  1431. " for i in range(0, len(Gn)):\n",
  1432. " for j in range(i, len(Gn)):\n",
  1433. " Kmatrix[i][j] = _weisfeilerlehmankernel_do(Gn[i], Gn[j])\n",
  1434. " Kmatrix[j][i] = Kmatrix[i][j]\n",
  1435. "\n",
  1436. " print(\"\\n --- Weisfeiler-Lehman %s kernel matrix of size %d built in %s seconds ---\" % (base_kernel, len(args[0]), (time.time() - start_time)))\n",
  1437. " \n",
  1438. " return Kmatrix\n",
  1439. " \n",
  1440. " else: # for only 2 graphs\n",
  1441. " \n",
  1442. " start_time = time.time()\n",
  1443. " \n",
  1444. " # for WL subtree kernel\n",
  1445. " if base_kernel == 'subtree':\n",
  1446. " \n",
  1447. " args = [args[0], args[1]]\n",
  1448. "# print(args)\n",
  1449. " kernel = _wl_subtreekernel_do(args, height = height, base_kernel = 'subtree')\n",
  1450. " \n",
  1451. " # for WL edge kernel\n",
  1452. " elif base_kernel == 'edge':\n",
  1453. " print('edge')\n",
  1454. " \n",
  1455. " # for WL shortest path kernel\n",
  1456. " elif base_kernel == 'sp':\n",
  1457. " \n",
  1458. "\n",
  1459. " kernel = _pathkernel_do(args[0], args[1])\n",
  1460. "\n",
  1461. " print(\"\\n --- Weisfeiler-Lehman %s kernel built in %s seconds ---\" % (base_kernel, time.time() - start_time))\n",
  1462. " \n",
  1463. " return kernel\n",
  1464. " \n",
  1465. " \n",
  1466. "def _weisfeilerlehmankernel_do(G1, G2):\n",
  1467. " \"\"\"Calculate Weisfeiler-Lehman kernels between 2 graphs. This kernel use shortest path kernel to calculate kernel between two graphs in each iteration.\n",
  1468. " \n",
  1469. " Parameters\n",
  1470. " ----------\n",
  1471. " G1, G2 : NetworkX graphs\n",
  1472. " 2 graphs between which the kernel is calculated.\n",
  1473. " \n",
  1474. " Return\n",
  1475. " ------\n",
  1476. " Kernel : int\n",
  1477. " Weisfeiler-Lehman Kernel between 2 graphs.\n",
  1478. " \"\"\"\n",
  1479. " \n",
  1480. " # init.\n",
  1481. " kernel = 0 # init kernel\n",
  1482. " num_nodes1 = G1.number_of_nodes()\n",
  1483. " num_nodes2 = G2.number_of_nodes()\n",
  1484. " height = 12 #min(num_nodes1, num_nodes2)) #Q how to determine the upper bound of the height?\n",
  1485. " \n",
  1486. " # the first iteration.\n",
  1487. " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1488. " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1489. " kernel += pathkernel(G1, G2) # change your base kernel here (and one more below)\n",
  1490. " \n",
  1491. " for h in range(0, height):\n",
  1492. "# if labelset1 != labelset2:\n",
  1493. "# break\n",
  1494. "\n",
  1495. " # Weisfeiler-Lehman test of graph isomorphism.\n",
  1496. " relabel(G1)\n",
  1497. " relabel(G2)\n",
  1498. "\n",
  1499. " # calculate kernel\n",
  1500. " kernel += pathkernel(G1, G2) # change your base kernel here (and one more before)\n",
  1501. "\n",
  1502. " # get label sets of both graphs\n",
  1503. " labelset1 = { G1.nodes(data = True)[i]['label'] for i in range(num_nodes1) }\n",
  1504. " labelset2 = { G2.nodes(data = True)[i]['label'] for i in range(num_nodes2) }\n",
  1505. " \n",
  1506. " return kernel\n",
  1507. "\n",
  1508. "\n",
  1509. "def relabel(G):\n",
  1510. " '''\n",
  1511. " Relabel nodes in graph G in one iteration of the 1-dim. WL test of graph isomorphism.\n",
  1512. " \n",
  1513. " Parameters\n",
  1514. " ----------\n",
  1515. " G : NetworkX graph\n",
  1516. " The graphs whose nodes are relabeled.\n",
  1517. " '''\n",
  1518. " \n",
  1519. " # get the set of original labels\n",
  1520. " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
  1521. " print(labels_ori)\n",
  1522. " num_of_each_label = dict(Counter(labels_ori))\n",
  1523. " print(num_of_each_label)\n",
  1524. " num_of_labels = len(num_of_each_label)\n",
  1525. " print(num_of_labels)\n",
  1526. " \n",
  1527. " set_multisets = []\n",
  1528. " for node in G.nodes(data = True):\n",
  1529. " # Multiset-label determination.\n",
  1530. " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n",
  1531. " # sorting each multiset\n",
  1532. " multiset.sort()\n",
  1533. " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n",
  1534. " set_multisets.append(multiset)\n",
  1535. " print(set_multisets)\n",
  1536. " \n",
  1537. " # label compression\n",
  1538. "# set_multisets.sort() # this is unnecessary\n",
  1539. " set_unique = list(set(set_multisets)) # set of unique multiset labels\n",
  1540. " print(set_unique)\n",
  1541. " set_compressed = { value : str(set_unique.index(value) + num_of_labels + 1) for value in set_unique } # assign new labels\n",
  1542. " print(set_compressed)\n",
  1543. " \n",
  1544. " # relabel nodes\n",
  1545. "# nx.relabel_nodes(G, set_compressed, copy = False)\n",
  1546. " for node in G.nodes(data = True):\n",
  1547. " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n",
  1548. " print(nx.get_node_attributes(G, 'label'))\n",
  1549. "\n",
  1550. " # get the set of compressed labels\n",
  1551. " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n",
  1552. " print(labels_comp)\n",
  1553. " num_of_each_label.update(dict(Counter(labels_comp)))\n",
  1554. " print(num_of_each_label)\n",
  1555. " \n",
  1556. " \n",
  1557. "def _wl_subtreekernel_do(*args, height = 0, base_kernel = 'subtree'):\n",
  1558. " \"\"\"Calculate Weisfeiler-Lehman subtree kernels between graphs.\n",
  1559. " \n",
  1560. " Parameters\n",
  1561. " ----------\n",
  1562. " Gn : List of NetworkX graph\n",
  1563. " List of graphs between which the kernels are calculated.\n",
  1564. " \n",
  1565. " Return\n",
  1566. " ------\n",
  1567. " Kmatrix/Kernel : Numpy matrix/int\n",
  1568. " Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.\n",
  1569. " \"\"\"\n",
  1570. " \n",
  1571. "# print(args)\n",
  1572. " Gn = args[0]\n",
  1573. "# print(Gn)\n",
  1574. "\n",
  1575. " Kmatrix = np.zeros((len(Gn), len(Gn)))\n",
  1576. " all_num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs\n",
  1577. " \n",
  1578. " # initial for height = 0\n",
  1579. " print('\\n --- height = 0 --- ')\n",
  1580. " all_labels_ori = set() # all unique orignal labels in all graphs in this iteration\n",
  1581. " all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration\n",
  1582. " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n",
  1583. " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n",
  1584. "\n",
  1585. " # for each graph\n",
  1586. " for idx, G in enumerate(Gn):\n",
  1587. " # get the set of original labels\n",
  1588. " print('\\n --- for graph %d --- \\n' % (idx))\n",
  1589. " labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
  1590. " print('labels_ori: %s' % (labels_ori))\n",
  1591. " all_labels_ori.update(labels_ori)\n",
  1592. " print('all_labels_ori: %s' % (all_labels_ori))\n",
  1593. " num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
  1594. " print('num_of_each_label: %s' % (num_of_each_label))\n",
  1595. " all_num_of_each_label.append(num_of_each_label)\n",
  1596. " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n",
  1597. " num_of_labels = len(num_of_each_label) # number of all unique labels\n",
  1598. " print('num_of_labels: %s' % (num_of_labels))\n",
  1599. " \n",
  1600. "\n",
  1601. " all_labels_ori.update(labels_ori)\n",
  1602. " print('all_labels_ori: %s' % (all_labels_ori))\n",
  1603. " \n",
  1604. " all_num_of_labels_occured += len(all_labels_ori)\n",
  1605. " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
  1606. " \n",
  1607. " # calculate subtree kernel with the 0th iteration and add it to the final kernel\n",
  1608. " print('\\n --- calculating kernel matrix ---')\n",
  1609. " for i in range(0, len(Gn)):\n",
  1610. " for j in range(i, len(Gn)):\n",
  1611. " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
  1612. " print('\\n labels: %s' % (labels))\n",
  1613. " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
  1614. " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
  1615. " print('vector1: %s' % (vector1))\n",
  1616. " print('vector2: %s' % (vector2))\n",
  1617. " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
  1618. " Kmatrix[j][i] = Kmatrix[i][j]\n",
  1619. " print('Kmatrix: %s' % (Kmatrix))\n",
  1620. "\n",
  1621. " \n",
  1622. " # iterate each height\n",
  1623. " for h in range(1, height + 1):\n",
  1624. " print('\\n --- height = %d --- ' % (h))\n",
  1625. " all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration\n",
  1626. " num_of_labels_occured = all_num_of_labels_occured # number of the set of letters that occur before as node labels at least once in all graphs\n",
  1627. " all_labels_ori = set()\n",
  1628. " all_num_of_each_label = []\n",
  1629. " \n",
  1630. " # for each graph\n",
  1631. " for idx, G in enumerate(Gn):\n",
  1632. "# # get the set of original labels\n",
  1633. " print('\\n --- for graph %d --- \\n' % (idx))\n",
  1634. "# labels_ori = list(nx.get_node_attributes(G, 'label').values())\n",
  1635. "# print('labels_ori: %s' % (labels_ori))\n",
  1636. "# num_of_each_label = dict(Counter(labels_ori)) # number of occurence of each label in graph\n",
  1637. "# print('num_of_each_label: %s' % (num_of_each_label))\n",
  1638. "# num_of_labels = len(num_of_each_label) # number of all unique labels\n",
  1639. "# print('num_of_labels: %s' % (num_of_labels))\n",
  1640. " \n",
  1641. "# all_labels_ori.update(labels_ori)\n",
  1642. "# print('all_labels_ori: %s' % (all_labels_ori))\n",
  1643. "# # num_of_labels_occured += num_of_labels #@todo not precise\n",
  1644. "# num_of_labels_occured = all_num_of_labels_occured + len(all_labels_ori) + len(all_set_compressed)\n",
  1645. "# print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
  1646. " \n",
  1647. " set_multisets = []\n",
  1648. " for node in G.nodes(data = True):\n",
  1649. " # Multiset-label determination.\n",
  1650. " multiset = [ G.node[neighbors]['label'] for neighbors in G[node[0]] ]\n",
  1651. " # sorting each multiset\n",
  1652. " multiset.sort()\n",
  1653. " multiset = node[1]['label'] + ''.join(multiset) # concatenate to a string and add the prefix \n",
  1654. " set_multisets.append(multiset)\n",
  1655. " print('multiset: %s' % (set_multisets))\n",
  1656. "\n",
  1657. " # label compression\n",
  1658. " # set_multisets.sort() # this is unnecessary\n",
  1659. " set_unique = list(set(set_multisets)) # set of unique multiset labels\n",
  1660. " print('set_unique: %s' % (set_unique))\n",
  1661. " # a dictionary mapping original labels to new ones. \n",
  1662. " set_compressed = {}\n",
  1663. " # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label \n",
  1664. " for value in set_unique:\n",
  1665. " if value in all_set_compressed.keys():\n",
  1666. " set_compressed.update({ value : all_set_compressed[value] })\n",
  1667. " else:\n",
  1668. " set_compressed.update({ value : str(num_of_labels_occured + 1) })\n",
  1669. " num_of_labels_occured += 1\n",
  1670. "# set_compressed = { value : (all_set_compressed[value] if value in all_set_compressed.keys() else str(set_unique.index(value) + num_of_labels_occured + 1)) for value in set_unique }\n",
  1671. " print('set_compressed: %s' % (set_compressed))\n",
  1672. " \n",
  1673. " all_set_compressed.update(set_compressed)\n",
  1674. " print('all_set_compressed: %s' % (all_set_compressed))\n",
  1675. "# num_of_labels_occured += len(set_compressed) #@todo not precise\n",
  1676. " print('num_of_labels_occured: %s' % (num_of_labels_occured))\n",
  1677. " \n",
  1678. " # relabel nodes\n",
  1679. " # nx.relabel_nodes(G, set_compressed, copy = False)\n",
  1680. " for node in G.nodes(data = True):\n",
  1681. " node[1]['label'] = set_compressed[set_multisets[node[0]]]\n",
  1682. " print('\\n compressed labels: %s' % (nx.get_node_attributes(G, 'label')))\n",
  1683. "\n",
  1684. " # get the set of compressed labels\n",
  1685. " labels_comp = list(nx.get_node_attributes(G, 'label').values())\n",
  1686. " print('labels_comp: %s' % (labels_comp))\n",
  1687. " all_labels_ori.update(labels_comp)\n",
  1688. " print('all_labels_ori: %s' % (all_labels_ori))\n",
  1689. " num_of_each_label = dict(Counter(labels_comp))\n",
  1690. " print('num_of_each_label: %s' % (num_of_each_label))\n",
  1691. " all_num_of_each_label.append(num_of_each_label)\n",
  1692. " print('all_num_of_each_label: %s' % (all_num_of_each_label))\n",
  1693. " \n",
  1694. " all_num_of_labels_occured += len(all_labels_ori)\n",
  1695. " print('\\n all_num_of_labels_occured: %s' % (all_num_of_labels_occured))\n",
  1696. " \n",
  1697. " # calculate subtree kernel with h iterations and add it to the final kernel\n",
  1698. " print('\\n --- calculating kernel matrix ---')\n",
  1699. " for i in range(0, len(Gn)):\n",
  1700. " for j in range(i, len(Gn)):\n",
  1701. " labels = set(list(all_num_of_each_label[i].keys()) + list(all_num_of_each_label[j].keys()))\n",
  1702. " print('\\n labels: %s' % (labels))\n",
  1703. " vector1 = np.matrix([ (all_num_of_each_label[i][label] if (label in all_num_of_each_label[i].keys()) else 0) for label in labels ])\n",
  1704. " vector2 = np.matrix([ (all_num_of_each_label[j][label] if (label in all_num_of_each_label[j].keys()) else 0) for label in labels ])\n",
  1705. " print('vector1: %s' % (vector1))\n",
  1706. " print('vector2: %s' % (vector2))\n",
  1707. " Kmatrix[i][j] += np.dot(vector1, vector2.transpose())\n",
  1708. " Kmatrix[j][i] = Kmatrix[i][j]\n",
  1709. " \n",
  1710. " print('\\n Kmatrix: %s' % (Kmatrix))\n",
  1711. "\n",
  1712. " return Kmatrix\n",
  1713. "\n",
  1714. " \n",
  1715. "# main\n",
  1716. "import sys\n",
  1717. "from collections import Counter\n",
  1718. "import networkx as nx\n",
  1719. "sys.path.insert(0, \"../\")\n",
  1720. "from pygraph.utils.graphfiles import loadDataset\n",
  1721. "from pygraph.kernels.spkernel import spkernel\n",
  1722. "\n",
  1723. "dataset, y = loadDataset(\"../../../../datasets/acyclic/Acyclic/dataset_bps.ds\")\n",
  1724. "G1 = dataset[15]\n",
  1725. "print(nx.get_node_attributes(G1, 'label'))\n",
  1726. "G2 = dataset[80]\n",
  1727. "print(nx.get_node_attributes(G2, 'label'))\n",
  1728. "\n",
  1729. "weisfeilerlehmankernel(G1, G2, height = 2)\n",
  1730. "# Kmatrix = weisfeilerlehmankernel(G1, G2)"
  1731. ]
  1732. }
  1733. ],
  1734. "metadata": {
  1735. "kernelspec": {
  1736. "display_name": "Python 3",
  1737. "language": "python",
  1738. "name": "python3"
  1739. },
  1740. "language_info": {
  1741. "codemirror_mode": {
  1742. "name": "ipython",
  1743. "version": 3
  1744. },
  1745. "file_extension": ".py",
  1746. "mimetype": "text/x-python",
  1747. "name": "python",
  1748. "nbconvert_exporter": "python",
  1749. "pygments_lexer": "ipython3",
  1750. "version": "3.5.2"
  1751. }
  1752. },
  1753. "nbformat": 4,
  1754. "nbformat_minor": 2
  1755. }

A Python package for graph kernels, graph edit distances and graph pre-image problem.