You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_cyclicpatternkernel.ipynb 116 kB


  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "name": "stdout",
  10. "output_type": "stream",
  11. "text": [
  12. "\n",
  13. " --- This is a classification problem ---\n",
  14. "\n",
  15. "\n",
  16. " #--- calculating kernel matrix when cycle_bound = 0.0 ---#\n",
  17. "\n",
  18. " Loading dataset from file...\n",
  19. "\n",
  20. " Calculating kernel matrix, this could take a while...\n",
  21. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 373.39it/s]\n",
  22. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 26367.08it/s]\n",
  23. "\n",
  24. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.18705153465270996 seconds ---\n",
  25. "[[0. 0. 0. ... 0. 0. 0.]\n",
  26. " [0. 0. 0. ... 0. 0. 0.]\n",
  27. " [0. 0. 0. ... 0. 0. 0.]\n",
  28. " ...\n",
  29. " [0. 0. 0. ... 0. 0. 0.]\n",
  30. " [0. 0. 0. ... 0. 0. 0.]\n",
  31. " [0. 0. 0. ... 0. 0. 0.]]\n",
  32. "\n",
  33. " Starting calculate accuracy/rmse...\n",
  34. "calculate performance: 83%|████████▎ | 834/1000 [00:00<00:00, 2077.02it/s]\n",
  35. " Mean performance on train set: 0.549180\n",
  36. "With standard deviation: 0.016798\n",
  37. "\n",
  38. " Mean performance on test set: 0.642857\n",
  39. "With standard deviation: 0.146385\n",
  40. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2083.52it/s]\n",
  41. "\n",
  42. "\n",
  43. " #--- calculating kernel matrix when cycle_bound = 50.0 ---#\n",
  44. "\n",
  45. " Loading dataset from file...\n",
  46. "\n",
  47. " Calculating kernel matrix, this could take a while...\n",
  48. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 231.33it/s]\n",
  49. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 15078.65it/s]\n",
  50. "\n",
  51. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3006291389465332 seconds ---\n",
  52. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  53. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  54. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  55. " ...\n",
  56. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  57. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  58. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  59. "\n",
  60. " Starting calculate accuracy/rmse...\n",
  61. "calculate performance: 81%|████████ | 808/1000 [00:00<00:00, 2005.12it/s]\n",
  62. " Mean performance on train set: 0.698361\n",
  63. "With standard deviation: 0.116889\n",
  64. "\n",
  65. " Mean performance on test set: 0.871429\n",
  66. "With standard deviation: 0.100000\n",
  67. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2024.59it/s]\n",
  68. "\n",
  69. "\n",
  70. " #--- calculating kernel matrix when cycle_bound = 100.0 ---#\n",
  71. "\n",
  72. " Loading dataset from file...\n",
  73. "\n",
  74. " Calculating kernel matrix, this could take a while...\n",
  75. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 224.68it/s]\n",
  76. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13144.65it/s]\n",
  77. "\n",
  78. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30983662605285645 seconds ---\n",
  79. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  80. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  81. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  82. " ...\n",
  83. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  84. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  85. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  86. "\n",
  87. " Starting calculate accuracy/rmse...\n",
  88. "calculate performance: 82%|████████▏ | 821/1000 [00:00<00:00, 2050.17it/s]\n",
  89. " Mean performance on train set: 0.732787\n",
  90. "With standard deviation: 0.082637\n",
  91. "\n",
  92. " Mean performance on test set: 0.900000\n",
  93. "With standard deviation: 0.111575\n",
  94. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2050.63it/s]\n",
  95. "\n",
  96. "\n",
  97. " #--- calculating kernel matrix when cycle_bound = 150.0 ---#\n",
  98. "\n",
  99. " Loading dataset from file...\n",
  100. "\n",
  101. " Calculating kernel matrix, this could take a while...\n",
  102. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.10it/s]\n",
  103. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12644.09it/s]\n",
  104. "\n",
  105. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31808018684387207 seconds ---\n",
  106. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  107. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  108. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  109. " ...\n",
  110. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  111. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  112. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  113. "\n",
  114. " Starting calculate accuracy/rmse...\n",
  115. "calculate performance: 99%|█████████▉| 993/1000 [00:00<00:00, 1993.90it/s]\n",
  116. " Mean performance on train set: 0.732787\n",
  117. "With standard deviation: 0.082637\n",
  118. "\n",
  119. " Mean performance on test set: 0.900000\n",
  120. "With standard deviation: 0.111575\n",
  121. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1977.95it/s]\n",
  122. "\n",
  123. "\n",
  124. " #--- calculating kernel matrix when cycle_bound = 200.0 ---#\n",
  125. "\n",
  126. " Loading dataset from file...\n",
  127. "\n",
  128. " Calculating kernel matrix, this could take a while...\n",
  129. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.08it/s]\n",
  130. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14177.69it/s]\n",
  131. "\n",
  132. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31757450103759766 seconds ---\n",
  133. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  134. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  135. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  136. " ...\n",
  137. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  138. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  139. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  140. "\n",
  141. " Starting calculate accuracy/rmse...\n",
  142. "calculate performance: 98%|█████████▊| 980/1000 [00:00<00:00, 1969.03it/s]\n",
  143. " Mean performance on train set: 0.732787\n",
  144. "With standard deviation: 0.082637\n",
  145. "\n",
  146. " Mean performance on test set: 0.900000\n",
  147. "With standard deviation: 0.111575\n",
  148. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1951.39it/s]\n",
  149. "\n",
  150. "\n",
  151. " #--- calculating kernel matrix when cycle_bound = 250.0 ---#\n",
  152. "\n",
  153. " Loading dataset from file...\n",
  154. "\n",
  155. " Calculating kernel matrix, this could take a while...\n",
  156. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 218.22it/s]\n",
  157. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12697.56it/s]\n",
  158. "\n",
  159. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3192298412322998 seconds ---\n",
  160. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  161. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  162. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  163. " ...\n",
  164. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  165. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  166. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  167. "\n",
  168. " Starting calculate accuracy/rmse...\n",
  169. "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1878.10it/s]\n",
  170. " Mean performance on train set: 0.732787\n",
  171. "With standard deviation: 0.082637\n",
  172. "\n",
  173. " Mean performance on test set: 0.900000\n",
  174. "With standard deviation: 0.111575\n",
  175. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1875.67it/s]\n",
  176. "\n",
  177. "\n",
  178. " #--- calculating kernel matrix when cycle_bound = 300.0 ---#\n",
  179. "\n",
  180. " Loading dataset from file...\n",
  181. "\n",
  182. " Calculating kernel matrix, this could take a while...\n",
  183. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 206.81it/s]\n",
  184. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12364.00it/s]\n",
  185. "\n",
  186. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33614420890808105 seconds ---\n",
  187. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  188. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  189. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  190. " ...\n",
  191. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  192. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  193. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  194. "\n",
  195. " Starting calculate accuracy/rmse...\n",
  196. "calculate performance: 97%|█████████▋| 970/1000 [00:00<00:00, 1947.13it/s]\n",
  197. " Mean performance on train set: 0.732787\n",
  198. "With standard deviation: 0.082637\n",
  199. "\n",
  200. " Mean performance on test set: 0.900000\n",
  201. "With standard deviation: 0.111575\n",
  202. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1934.26it/s]\n",
  203. "\n",
  204. "\n",
  205. " #--- calculating kernel matrix when cycle_bound = 350.0 ---#\n",
  206. "\n",
  207. " Loading dataset from file...\n",
  208. "\n",
  209. " Calculating kernel matrix, this could take a while...\n",
  210. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 189.65it/s]\n",
  211. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13989.93it/s]\n",
  212. "\n",
  213. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3654501438140869 seconds ---\n",
  214. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  215. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  216. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  217. " ...\n",
  218. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  219. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  220. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  221. "\n",
  222. " Starting calculate accuracy/rmse...\n",
  223. "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1875.81it/s]\n",
  224. " Mean performance on train set: 0.732787\n",
  225. "With standard deviation: 0.082637\n",
  226. "\n",
  227. " Mean performance on test set: 0.900000\n",
  228. "With standard deviation: 0.111575\n",
  229. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1881.94it/s]\n",
  230. "\n",
  231. "\n",
  232. " #--- calculating kernel matrix when cycle_bound = 400.0 ---#\n",
  233. "\n",
  234. " Loading dataset from file...\n",
  235. "\n",
  236. " Calculating kernel matrix, this could take a while...\n",
  237. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 220.95it/s]\n",
  238. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14281.34it/s]\n",
  239. "\n",
  240. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3142852783203125 seconds ---\n",
  241. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  242. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  243. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  244. " ...\n",
  245. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  246. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  247. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  248. "\n",
  249. " Starting calculate accuracy/rmse...\n",
  250. "calculate performance: 95%|█████████▌| 952/1000 [00:00<00:00, 1900.77it/s]\n",
  251. " Mean performance on train set: 0.732787\n",
  252. "With standard deviation: 0.082637\n",
  253. "\n",
  254. " Mean performance on test set: 0.900000\n",
  255. "With standard deviation: 0.111575\n"
  256. ]
  257. },
  258. {
  259. "name": "stdout",
  260. "output_type": "stream",
  261. "text": [
  262. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1900.46it/s]\n",
  263. "\n",
  264. "\n",
  265. " #--- calculating kernel matrix when cycle_bound = 450.0 ---#\n",
  266. "\n",
  267. " Loading dataset from file...\n",
  268. "\n",
  269. " Calculating kernel matrix, this could take a while...\n",
  270. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 212.09it/s]\n",
  271. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 11357.62it/s]\n",
  272. "\n",
  273. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3287320137023926 seconds ---\n",
  274. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  275. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  276. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  277. " ...\n",
  278. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  279. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  280. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  281. "\n",
  282. " Starting calculate accuracy/rmse...\n",
  283. "calculate performance: 98%|█████████▊| 981/1000 [00:00<00:00, 1956.30it/s]\n",
  284. " Mean performance on train set: 0.732787\n",
  285. "With standard deviation: 0.082637\n",
  286. "\n",
  287. " Mean performance on test set: 0.900000\n",
  288. "With standard deviation: 0.111575\n",
  289. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1952.54it/s]\n",
  290. "\n",
  291. "\n",
  292. " #--- calculating kernel matrix when cycle_bound = 500.0 ---#\n",
  293. "\n",
  294. " Loading dataset from file...\n",
  295. "\n",
  296. " Calculating kernel matrix, this could take a while...\n",
  297. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.14it/s]\n",
  298. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12536.27it/s]\n",
  299. "\n",
  300. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3347315788269043 seconds ---\n",
  301. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  302. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  303. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  304. " ...\n",
  305. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  306. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  307. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  308. "\n",
  309. " Starting calculate accuracy/rmse...\n",
  310. "calculate performance: 98%|█████████▊| 979/1000 [00:00<00:00, 1970.30it/s]\n",
  311. " Mean performance on train set: 0.732787\n",
  312. "With standard deviation: 0.082637\n",
  313. "\n",
  314. " Mean performance on test set: 0.900000\n",
  315. "With standard deviation: 0.111575\n",
  316. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1950.19it/s]\n",
  317. "\n",
  318. "\n",
  319. " #--- calculating kernel matrix when cycle_bound = 550.0 ---#\n",
  320. "\n",
  321. " Loading dataset from file...\n",
  322. "\n",
  323. " Calculating kernel matrix, this could take a while...\n",
  324. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.06it/s]\n",
  325. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13816.44it/s]\n",
  326. "\n",
  327. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3341798782348633 seconds ---\n",
  328. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  329. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  330. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  331. " ...\n",
  332. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  333. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  334. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  335. "\n",
  336. " Starting calculate accuracy/rmse...\n",
  337. "calculate performance: 97%|█████████▋| 974/1000 [00:00<00:00, 1930.44it/s]\n",
  338. " Mean performance on train set: 0.732787\n",
  339. "With standard deviation: 0.082637\n",
  340. "\n",
  341. " Mean performance on test set: 0.900000\n",
  342. "With standard deviation: 0.111575\n",
  343. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1937.89it/s]\n",
  344. "\n",
  345. "\n",
  346. " #--- calculating kernel matrix when cycle_bound = 600.0 ---#\n",
  347. "\n",
  348. " Loading dataset from file...\n",
  349. "\n",
  350. " Calculating kernel matrix, this could take a while...\n",
  351. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 213.56it/s]\n",
  352. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13048.43it/s]\n",
  353. "\n",
  354. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32569050788879395 seconds ---\n",
  355. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  356. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  357. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  358. " ...\n",
  359. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  360. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  361. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  362. "\n",
  363. " Starting calculate accuracy/rmse...\n",
  364. "calculate performance: 97%|█████████▋| 972/1000 [00:00<00:00, 1924.82it/s]\n",
  365. " Mean performance on train set: 0.732787\n",
  366. "With standard deviation: 0.082637\n",
  367. "\n",
  368. " Mean performance on test set: 0.900000\n",
  369. "With standard deviation: 0.111575\n",
  370. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1935.68it/s]\n",
  371. "\n",
  372. "\n",
  373. " #--- calculating kernel matrix when cycle_bound = 650.0 ---#\n",
  374. "\n",
  375. " Loading dataset from file...\n",
  376. "\n",
  377. " Calculating kernel matrix, this could take a while...\n",
  378. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 216.51it/s]\n",
  379. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 9669.54it/s]\n",
  380. "\n",
  381. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3229689598083496 seconds ---\n",
  382. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  383. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  384. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  385. " ...\n",
  386. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  387. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  388. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  389. "\n",
  390. " Starting calculate accuracy/rmse...\n",
  391. "calculate performance: 98%|█████████▊| 983/1000 [00:00<00:00, 1963.08it/s]\n",
  392. " Mean performance on train set: 0.732787\n",
  393. "With standard deviation: 0.082637\n",
  394. "\n",
  395. " Mean performance on test set: 0.900000\n",
  396. "With standard deviation: 0.111575\n",
  397. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1960.32it/s]\n",
  398. "\n",
  399. "\n",
  400. " #--- calculating kernel matrix when cycle_bound = 700.0 ---#\n",
  401. "\n",
  402. " Loading dataset from file...\n",
  403. "\n",
  404. " Calculating kernel matrix, this could take a while...\n",
  405. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.61it/s]\n",
  406. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13485.23it/s]\n",
  407. "\n",
  408. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33377623558044434 seconds ---\n",
  409. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  410. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  411. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  412. " ...\n",
  413. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  414. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  415. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  416. "\n",
  417. " Starting calculate accuracy/rmse...\n",
  418. "calculate performance: 81%|████████ | 812/1000 [00:00<00:00, 2020.52it/s]\n",
  419. " Mean performance on train set: 0.732787\n",
  420. "With standard deviation: 0.082637\n",
  421. "\n",
  422. " Mean performance on test set: 0.900000\n",
  423. "With standard deviation: 0.111575\n",
  424. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2029.28it/s]\n",
  425. "\n",
  426. "\n",
  427. " #--- calculating kernel matrix when cycle_bound = 750.0 ---#\n",
  428. "\n",
  429. " Loading dataset from file...\n",
  430. "\n",
  431. " Calculating kernel matrix, this could take a while...\n",
  432. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.54it/s]\n",
  433. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13952.29it/s]\n",
  434. "\n",
  435. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31093406677246094 seconds ---\n",
  436. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  437. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  438. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  439. " ...\n",
  440. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  441. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  442. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  443. "\n",
  444. " Starting calculate accuracy/rmse...\n",
  445. "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2053.32it/s]\n",
  446. " Mean performance on train set: 0.732787\n",
  447. "With standard deviation: 0.082637\n",
  448. "\n",
  449. " Mean performance on test set: 0.900000\n",
  450. "With standard deviation: 0.111575\n",
  451. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2055.77it/s]\n",
  452. "\n",
  453. "\n",
  454. " #--- calculating kernel matrix when cycle_bound = 800.0 ---#\n",
  455. "\n",
  456. " Loading dataset from file...\n",
  457. "\n",
  458. " Calculating kernel matrix, this could take a while...\n",
  459. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.35it/s]\n",
  460. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13220.82it/s]\n",
  461. "\n",
  462. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31124091148376465 seconds ---\n",
  463. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  464. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  465. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  466. " ...\n",
  467. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  468. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  469. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  470. "\n",
  471. " Starting calculate accuracy/rmse...\n",
  472. "calculate performance: 96%|█████████▌| 959/1000 [00:00<00:00, 1925.40it/s]\n",
  473. " Mean performance on train set: 0.732787\n",
  474. "With standard deviation: 0.082637\n",
  475. "\n",
  476. " Mean performance on test set: 0.900000\n",
  477. "With standard deviation: 0.111575\n",
  478. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1912.78it/s]\n",
  479. "\n",
  480. "\n",
  481. " #--- calculating kernel matrix when cycle_bound = 850.0 ---#\n",
  482. "\n",
  483. " Loading dataset from file...\n",
  484. "\n",
  485. " Calculating kernel matrix, this could take a while...\n",
  486. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 202.00it/s]\n",
  487. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12487.42it/s]\n",
  488. "\n",
  489. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.34392237663269043 seconds ---\n",
  490. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  491. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  492. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  493. " ...\n",
  494. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  495. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  496. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  497. "\n",
  498. " Starting calculate accuracy/rmse...\n",
  499. "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1869.41it/s]\n",
  500. " Mean performance on train set: 0.732787\n",
  501. "With standard deviation: 0.082637\n",
  502. "\n",
  503. " Mean performance on test set: 0.900000\n",
  504. "With standard deviation: 0.111575\n"
  505. ]
  506. },
  507. {
  508. "name": "stdout",
  509. "output_type": "stream",
  510. "text": [
  511. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1883.23it/s]\n",
  512. "\n",
  513. "\n",
  514. " #--- calculating kernel matrix when cycle_bound = 900.0 ---#\n",
  515. "\n",
  516. " Loading dataset from file...\n",
  517. "\n",
  518. " Calculating kernel matrix, this could take a while...\n",
  519. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 217.23it/s]\n",
  520. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13956.38it/s]\n",
  521. "\n",
  522. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32010626792907715 seconds ---\n",
  523. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  524. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  525. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  526. " ...\n",
  527. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  528. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  529. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  530. "\n",
  531. " Starting calculate accuracy/rmse...\n",
  532. "calculate performance: 99%|█████████▉| 989/1000 [00:00<00:00, 1978.29it/s]\n",
  533. " Mean performance on train set: 0.732787\n",
  534. "With standard deviation: 0.082637\n",
  535. "\n",
  536. " Mean performance on test set: 0.900000\n",
  537. "With standard deviation: 0.111575\n",
  538. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1968.44it/s]\n",
  539. "\n",
  540. "\n",
  541. " #--- calculating kernel matrix when cycle_bound = 950.0 ---#\n",
  542. "\n",
  543. " Loading dataset from file...\n",
  544. "\n",
  545. " Calculating kernel matrix, this could take a while...\n",
  546. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 228.56it/s]\n",
  547. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14794.72it/s]\n",
  548. "\n",
  549. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30414795875549316 seconds ---\n",
  550. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  551. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  552. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  553. " ...\n",
  554. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  555. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  556. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  557. "\n",
  558. " Starting calculate accuracy/rmse...\n",
  559. "calculate performance: 83%|████████▎ | 829/1000 [00:00<00:00, 2063.72it/s]\n",
  560. " Mean performance on train set: 0.732787\n",
  561. "With standard deviation: 0.082637\n",
  562. "\n",
  563. " Mean performance on test set: 0.900000\n",
  564. "With standard deviation: 0.111575\n",
  565. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2068.06it/s]\n",
  566. "\n",
  567. "\n",
  568. " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n",
  569. "\n",
  570. " Loading dataset from file...\n",
  571. "\n",
  572. " Calculating kernel matrix, this could take a while...\n",
  573. "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.02it/s]\n",
  574. "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13702.27it/s]\n",
  575. "\n",
  576. " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3120880126953125 seconds ---\n",
  577. "[[ 8. 8. 8. ... 8. 8. 8.]\n",
  578. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  579. " [ 8. 8. 8. ... 8. 8. 8.]\n",
  580. " ...\n",
  581. " [ 8. 8. 8. ... 10. 9. 9.]\n",
  582. " [ 8. 8. 8. ... 9. 10. 10.]\n",
  583. " [ 8. 8. 8. ... 9. 10. 10.]]\n",
  584. "\n",
  585. " Starting calculate accuracy/rmse...\n",
  586. "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2054.81it/s]\n",
  587. " Mean performance on train set: 0.732787\n",
  588. "With standard deviation: 0.082637\n",
  589. "\n",
  590. " Mean performance on test set: 0.900000\n",
  591. "With standard deviation: 0.111575\n",
  592. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2022.62it/s]\n",
  593. "\n",
  594. "\n",
  595. " cycle_bound accur_test std_test accur_train std_train k_time\n",
  596. "------------- ------------ ---------- ------------- ----------- --------\n",
  597. " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n",
  598. " 50 0.871429 0.1 0.698361 0.116889 0.300629\n",
  599. " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n",
  600. " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n",
  601. " 200 0.9 0.111575 0.732787 0.0826366 0.317575\n",
  602. " 250 0.9 0.111575 0.732787 0.0826366 0.31923\n",
  603. " 300 0.9 0.111575 0.732787 0.0826366 0.336144\n",
  604. " 350 0.9 0.111575 0.732787 0.0826366 0.36545\n",
  605. " 400 0.9 0.111575 0.732787 0.0826366 0.314285\n",
  606. " 450 0.9 0.111575 0.732787 0.0826366 0.328732\n",
  607. " 500 0.9 0.111575 0.732787 0.0826366 0.334732\n",
  608. " 550 0.9 0.111575 0.732787 0.0826366 0.33418\n",
  609. " 600 0.9 0.111575 0.732787 0.0826366 0.325691\n",
  610. " 650 0.9 0.111575 0.732787 0.0826366 0.322969\n",
  611. " 700 0.9 0.111575 0.732787 0.0826366 0.333776\n",
  612. " 750 0.9 0.111575 0.732787 0.0826366 0.310934\n",
  613. " 800 0.9 0.111575 0.732787 0.0826366 0.311241\n",
  614. " 850 0.9 0.111575 0.732787 0.0826366 0.343922\n",
  615. " 900 0.9 0.111575 0.732787 0.0826366 0.320106\n",
  616. " 950 0.9 0.111575 0.732787 0.0826366 0.304148\n",
  617. " 1000 0.9 0.111575 0.732787 0.0826366 0.312088\n"
  618. ]
  619. }
  620. ],
  621. "source": [
  622. "# MAO dataset (node labeled, edge labeled, undirected, cyclic + linear, classification)\n",
  623. "%load_ext line_profiler\n",
  624. "\n",
  625. "import sys\n",
  626. "sys.path.insert(0, \"../\")\n",
  627. "from pygraph.utils.utils import kernel_train_test\n",
  628. "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n",
  629. "\n",
  630. "import numpy as np\n",
  631. "\n",
  632. "datafile = '../../../../datasets/MAO/dataset.ds'\n",
  633. "kernel_file_path = 'kernelmatrices_cyclicpattern_mao/'\n",
  634. "\n",
  635. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n",
  636. "\n",
  637. "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n",
  638. " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 500, 21), normalize = False,\n",
  639. " model_type = 'classification')"
  640. ]
  641. },
  642. {
  643. "cell_type": "code",
  644. "execution_count": 2,
  645. "metadata": {},
  646. "outputs": [
  647. {
  648. "name": "stdout",
  649. "output_type": "stream",
  650. "text": [
  651. "The line_profiler extension is already loaded. To reload it, use:\n",
  652. " %reload_ext line_profiler\n",
  653. "\n",
  654. " --- This is a classification problem ---\n",
  655. "\n",
  656. "\n",
  657. " #--- calculating kernel matrix when cycle_bound = 0.0 ---#\n",
  658. "\n",
  659. " Loading dataset from file...\n",
  660. "\n",
  661. " Calculating kernel matrix, this could take a while...\n",
  662. "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 176.07it/s]\n",
  663. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 18331.07it/s]\n",
  664. "\n",
  665. " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5411422252655029 seconds ---\n",
  666. "[[0. 0. 0. ... 0. 0. 0.]\n",
  667. " [0. 0. 0. ... 0. 0. 0.]\n",
  668. " [0. 0. 0. ... 0. 0. 0.]\n",
  669. " ...\n",
  670. " [0. 0. 0. ... 0. 0. 0.]\n",
  671. " [0. 0. 0. ... 0. 0. 0.]\n",
  672. " [0. 0. 0. ... 0. 0. 0.]]\n",
  673. "\n",
  674. " Starting calculate accuracy/rmse...\n",
  675. "calculate performance: 95%|█████████▌| 951/1000 [00:00<00:00, 1898.18it/s]\n",
  676. " Mean performance on train set: 0.629762\n",
  677. "With standard deviation: 0.013521\n",
  678. "\n",
  679. " Mean performance on test set: 0.610000\n",
  680. "With standard deviation: 0.113578\n",
  681. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1889.49it/s]\n",
  682. "\n",
  683. "\n",
  684. " #--- calculating kernel matrix when cycle_bound = 10.0 ---#\n",
  685. "\n",
  686. " Loading dataset from file...\n",
  687. "\n",
  688. " Calculating kernel matrix, this could take a while...\n",
  689. "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 165.16it/s]\n",
  690. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 16217.54it/s]\n",
  691. "\n",
  692. " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5770719051361084 seconds ---\n",
  693. "[[0. 0. 0. ... 0. 0. 0.]\n",
  694. " [0. 0. 0. ... 0. 0. 0.]\n",
  695. " [0. 0. 0. ... 0. 0. 0.]\n",
  696. " ...\n",
  697. " [0. 0. 0. ... 0. 0. 0.]\n",
  698. " [0. 0. 0. ... 0. 0. 0.]\n",
  699. " [0. 0. 0. ... 0. 0. 0.]]\n",
  700. "\n",
  701. " Starting calculate accuracy/rmse...\n",
  702. "calculate performance: 94%|█████████▍| 940/1000 [00:00<00:00, 1876.61it/s]\n",
  703. " Mean performance on train set: 0.629762\n",
  704. "With standard deviation: 0.013521\n",
  705. "\n",
  706. " Mean performance on test set: 0.610000\n",
  707. "With standard deviation: 0.113578\n",
  708. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1866.80it/s]\n",
  709. "\n",
  710. "\n",
  711. " #--- calculating kernel matrix when cycle_bound = 20.0 ---#\n",
  712. "\n",
  713. " Loading dataset from file...\n",
  714. "\n",
  715. " Calculating kernel matrix, this could take a while...\n",
  716. "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 165.21it/s]\n",
  717. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 16888.61it/s]\n",
  718. "\n",
  719. " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5768516063690186 seconds ---\n",
  720. "[[3. 0. 0. ... 0. 0. 0.]\n",
  721. " [0. 0. 0. ... 0. 0. 0.]\n",
  722. " [0. 0. 0. ... 0. 0. 0.]\n",
  723. " ...\n",
  724. " [0. 0. 0. ... 0. 0. 0.]\n",
  725. " [0. 0. 0. ... 0. 0. 0.]\n",
  726. " [0. 0. 0. ... 0. 0. 0.]]\n",
  727. "\n",
  728. " Starting calculate accuracy/rmse...\n",
  729. "calculate performance: 93%|█████████▎| 926/1000 [00:00<00:00, 1837.36it/s]\n",
  730. " Mean performance on train set: 0.629762\n",
  731. "With standard deviation: 0.013521\n",
  732. "\n",
  733. " Mean performance on test set: 0.610000\n",
  734. "With standard deviation: 0.113578\n",
  735. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1841.13it/s]\n",
  736. "\n",
  737. "\n",
  738. " #--- calculating kernel matrix when cycle_bound = 30.0 ---#\n",
  739. "\n",
  740. " Loading dataset from file...\n",
  741. "\n",
  742. " Calculating kernel matrix, this could take a while...\n",
  743. "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 171.51it/s]\n",
  744. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 17701.46it/s]\n",
  745. "\n",
  746. " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5560076236724854 seconds ---\n",
  747. "[[3. 3. 3. ... 0. 0. 0.]\n",
  748. " [3. 4. 4. ... 0. 0. 0.]\n",
  749. " [3. 4. 4. ... 0. 0. 0.]\n",
  750. " ...\n",
  751. " [0. 0. 0. ... 0. 0. 0.]\n",
  752. " [0. 0. 0. ... 0. 0. 0.]\n",
  753. " [0. 0. 0. ... 0. 0. 0.]]\n",
  754. "\n",
  755. " Starting calculate accuracy/rmse...\n",
  756. "calculate performance: 92%|█████████▏| 923/1000 [00:00<00:00, 1845.18it/s]\n",
  757. " Mean performance on train set: 0.633333\n",
  758. "With standard deviation: 0.015793\n",
  759. "\n",
  760. " Mean performance on test set: 0.640000\n",
  761. "With standard deviation: 0.111355\n",
  762. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1836.56it/s]\n",
  763. "\n",
  764. "\n",
  765. " #--- calculating kernel matrix when cycle_bound = 40.0 ---#\n",
  766. "\n",
  767. " Loading dataset from file...\n",
  768. "\n",
  769. " Calculating kernel matrix, this could take a while...\n",
  770. "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 159.66it/s]\n",
  771. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 17703.84it/s]\n",
  772. "\n",
  773. " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5963354110717773 seconds ---\n",
  774. "[[3. 3. 3. ... 0. 0. 0.]\n",
  775. " [3. 4. 4. ... 0. 0. 0.]\n",
  776. " [3. 4. 4. ... 0. 0. 0.]\n",
  777. " ...\n",
  778. " [0. 0. 0. ... 0. 0. 0.]\n",
  779. " [0. 0. 0. ... 0. 0. 0.]\n",
  780. " [0. 0. 0. ... 0. 0. 0.]]\n",
  781. "\n",
  782. " Starting calculate accuracy/rmse...\n",
  783. "calculate performance: 84%|████████▍ | 845/1000 [00:00<00:00, 1694.10it/s]\n",
  784. " Mean performance on train set: 0.633333\n",
  785. "With standard deviation: 0.015793\n",
  786. "\n",
  787. " Mean performance on test set: 0.640000\n",
  788. "With standard deviation: 0.111355\n",
  789. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1694.34it/s]\n",
  790. "\n",
  791. "\n",
  792. " #--- calculating kernel matrix when cycle_bound = 50.0 ---#\n",
  793. "\n",
  794. " Loading dataset from file...\n",
  795. "\n",
  796. " Calculating kernel matrix, this could take a while...\n",
  797. "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 126.36it/s]\n",
  798. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14863.89it/s]\n",
  799. "\n",
  800. " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.7526798248291016 seconds ---\n",
  801. "[[3. 3. 3. ... 0. 0. 0.]\n",
  802. " [3. 4. 4. ... 0. 0. 0.]\n",
  803. " [3. 4. 4. ... 0. 0. 0.]\n",
  804. " ...\n",
  805. " [0. 0. 0. ... 0. 0. 0.]\n",
  806. " [0. 0. 0. ... 0. 0. 0.]\n",
  807. " [0. 0. 0. ... 0. 0. 0.]]\n",
  808. "\n",
  809. " Starting calculate accuracy/rmse...\n",
  810. "calculate performance: 84%|████████▍ | 842/1000 [00:00<00:00, 1670.86it/s]\n",
  811. " Mean performance on train set: 0.658333\n",
  812. "With standard deviation: 0.034524\n",
  813. "\n",
  814. " Mean performance on test set: 0.670000\n",
  815. "With standard deviation: 0.090000\n",
  816. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1665.11it/s]\n",
  817. "\n",
  818. "\n",
  819. " #--- calculating kernel matrix when cycle_bound = 60.0 ---#\n",
  820. "\n",
  821. " Loading dataset from file...\n",
  822. "\n",
  823. " Calculating kernel matrix, this could take a while...\n",
  824. "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 107.33it/s]\n",
  825. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 13937.03it/s]\n",
  826. "\n",
  827. " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.8846912384033203 seconds ---\n",
  828. "[[3. 3. 3. ... 0. 0. 0.]\n",
  829. " [3. 4. 4. ... 0. 0. 0.]\n",
  830. " [3. 4. 4. ... 0. 0. 0.]\n",
  831. " ...\n",
  832. " [0. 0. 0. ... 0. 0. 0.]\n",
  833. " [0. 0. 0. ... 0. 0. 0.]\n",
  834. " [0. 0. 0. ... 0. 0. 0.]]\n",
  835. "\n",
  836. " Starting calculate accuracy/rmse...\n",
  837. "calculate performance: 83%|████████▎ | 829/1000 [00:00<00:00, 1653.86it/s]\n",
  838. " Mean performance on train set: 0.671429\n",
  839. "With standard deviation: 0.036577\n",
  840. "\n",
  841. " Mean performance on test set: 0.680000\n",
  842. "With standard deviation: 0.107703\n",
  843. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1673.57it/s]\n",
  844. "\n",
  845. "\n",
  846. " #--- calculating kernel matrix when cycle_bound = 70.0 ---#\n",
  847. "\n",
  848. " Loading dataset from file...\n",
  849. "\n",
  850. " Calculating kernel matrix, this could take a while...\n",
  851. "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 81.45it/s] \n",
  852. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14275.64it/s]\n",
  853. "\n",
  854. " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1631414890289307 seconds ---\n",
  855. "[[3. 3. 3. ... 3. 3. 3.]\n",
  856. " [3. 4. 4. ... 4. 4. 4.]\n",
  857. " [3. 4. 4. ... 4. 4. 4.]\n",
  858. " ...\n",
  859. " [3. 4. 4. ... 7. 7. 7.]\n",
  860. " [3. 4. 4. ... 7. 7. 7.]\n",
  861. " [3. 4. 4. ... 7. 7. 7.]]\n",
  862. "\n",
  863. " Starting calculate accuracy/rmse...\n",
  864. "calculate performance: 88%|████████▊ | 876/1000 [00:00<00:00, 1761.00it/s]\n",
  865. " Mean performance on train set: 0.666667\n",
  866. "With standard deviation: 0.038021\n",
  867. "\n",
  868. " Mean performance on test set: 0.670000\n",
  869. "With standard deviation: 0.100499\n",
  870. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1754.20it/s]\n",
  871. "\n",
  872. "\n",
  873. " #--- calculating kernel matrix when cycle_bound = 80.0 ---#\n",
  874. "\n",
  875. " Loading dataset from file...\n",
  876. "\n",
  877. " Calculating kernel matrix, this could take a while...\n",
  878. "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 79.93it/s] \n",
  879. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14789.73it/s]\n",
  880. "\n",
  881. " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1846554279327393 seconds ---\n",
  882. "[[3. 3. 3. ... 3. 3. 3.]\n",
  883. " [3. 4. 4. ... 4. 4. 4.]\n",
  884. " [3. 4. 4. ... 4. 4. 4.]\n",
  885. " ...\n",
  886. " [3. 4. 4. ... 7. 7. 7.]\n",
  887. " [3. 4. 4. ... 7. 7. 7.]\n",
  888. " [3. 4. 4. ... 7. 7. 7.]]\n",
  889. "\n",
  890. " Starting calculate accuracy/rmse...\n",
  891. "calculate performance: 93%|█████████▎| 926/1000 [00:00<00:00, 1854.59it/s]\n",
  892. " Mean performance on train set: 0.709524\n",
  893. "With standard deviation: 0.058853\n",
  894. "\n",
  895. " Mean performance on test set: 0.780000\n",
  896. "With standard deviation: 0.107703\n",
  897. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1844.77it/s]\n",
  898. "\n",
  899. "\n",
  900. " #--- calculating kernel matrix when cycle_bound = 90.0 ---#\n",
  901. "\n",
  902. " Loading dataset from file...\n",
  903. "\n",
  904. " Calculating kernel matrix, this could take a while...\n"
  905. ]
  906. },
  907. {
  908. "name": "stdout",
  909. "output_type": "stream",
  910. "text": [
  911. "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 83.75it/s] \n",
  912. "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14169.95it/s]\n",
  913. "\n",
  914. " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1314406394958496 seconds ---\n",
  915. "[[3. 3. 3. ... 3. 3. 3.]\n",
  916. " [3. 4. 4. ... 4. 4. 4.]\n",
  917. " [3. 4. 4. ... 4. 4. 4.]\n",
  918. " ...\n",
  919. " [3. 4. 4. ... 7. 7. 7.]\n",
  920. " [3. 4. 4. ... 7. 7. 7.]\n",
  921. " [3. 4. 4. ... 7. 7. 7.]]\n",
  922. "\n",
  923. " Starting calculate accuracy/rmse...\n",
  924. "calculate performance: 94%|█████████▍| 943/1000 [00:00<00:00, 1878.69it/s]\n",
  925. " Mean performance on train set: 0.709524\n",
  926. "With standard deviation: 0.058853\n",
  927. "\n",
  928. " Mean performance on test set: 0.780000\n",
  929. "With standard deviation: 0.107703\n",
  930. "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1872.55it/s]\n",
  931. "\n",
  932. "\n",
  933. " cycle_bound accur_test std_test accur_train std_train k_time\n",
  934. "------------- ------------ ---------- ------------- ----------- --------\n",
  935. " 0 0.61 0.113578 0.629762 0.0135212 0.541142\n",
  936. " 10 0.61 0.113578 0.629762 0.0135212 0.577072\n",
  937. " 20 0.61 0.113578 0.629762 0.0135212 0.576852\n",
  938. " 30 0.64 0.111355 0.633333 0.0157935 0.556008\n",
  939. " 40 0.64 0.111355 0.633333 0.0157935 0.596335\n",
  940. " 50 0.67 0.09 0.658333 0.0345238 0.75268\n",
  941. " 60 0.68 0.107703 0.671429 0.0365769 0.884691\n",
  942. " 70 0.67 0.100499 0.666667 0.0380208 1.16314\n",
  943. " 80 0.78 0.107703 0.709524 0.0588534 1.18466\n",
  944. " 90 0.78 0.107703 0.709524 0.0588534 1.13144\n"
  945. ]
  946. }
  947. ],
  948. "source": [
  949. "# PAH dataset (node and edge unlabeled, undirected, cyclic, classification)\n",
  950. "%load_ext line_profiler\n",
  951. "\n",
  952. "import sys\n",
  953. "sys.path.insert(0, \"../\")\n",
  954. "from pygraph.utils.utils import kernel_train_test\n",
  955. "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n",
  956. "\n",
  957. "import numpy as np\n",
  958. "\n",
  959. "datafile = '../../../../datasets/PAH/dataset.ds'\n",
  960. "kernel_file_path = 'kernelmatrices_cyclicpattern_pah/'\n",
  961. "\n",
  962. "\n",
  963. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = False)\n",
  964. "\n",
  965. "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n",
  966. " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 90, 10), normalize = False, \\\n",
  967. " model_type = 'classification')"
  968. ]
  969. },
  970. {
  971. "cell_type": "code",
  972. "execution_count": null,
  973. "metadata": {},
  974. "outputs": [],
  975. "source": [
  976. "# results\n",
  977. "\n",
  978. "# MAO dataset\n",
  979. "cycle_bound accur_test std_test accur_train std_train k_time\n",
  980. "------------- ------------ ---------- ------------- ----------- --------\n",
  981. " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n",
  982. " 50 0.871429 0.1 0.698361 0.116889 0.300629\n",
  983. " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n",
  984. " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n",
  985. " 200 0.9 0.111575 0.732787 0.0826366 0.317575\n",
  986. " \n",
  987. "# PAH dataset\n",
  988. " cycle_bound accur_test std_test accur_train std_train k_time\n",
  989. "------------- ------------ ---------- ------------- ----------- --------\n",
  990. " 0 0.61 0.113578 0.629762 0.0135212 0.521801\n",
  991. " 10 0.61 0.113578 0.629762 0.0135212 0.52589\n",
  992. " 20 0.61 0.113578 0.629762 0.0135212 0.548528\n",
  993. " 30 0.64 0.111355 0.633333 0.0157935 0.535311\n",
  994. " 40 0.64 0.111355 0.633333 0.0157935 0.61764\n",
  995. " 50 0.67 0.09 0.658333 0.0345238 0.733868\n",
  996. " 60 0.68 0.107703 0.671429 0.0365769 0.871147\n",
  997. " 70 0.67 0.100499 0.666667 0.0380208 1.12625\n",
  998. " 80 0.78 0.107703 0.709524 0.0588534 1.19828\n",
  999. " 90 0.78 0.107703 0.709524 0.0588534 1.21182"
  1000. ]
  1001. },
  1002. {
  1003. "cell_type": "code",
  1004. "execution_count": null,
  1005. "metadata": {},
  1006. "outputs": [
  1007. {
  1008. "name": "stdout",
  1009. "output_type": "stream",
  1010. "text": [
  1011. "\n",
  1012. " --- This is a classification problem ---\n",
  1013. "\n",
  1014. "\n",
  1015. " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n",
  1016. "\n",
  1017. " Loading dataset from file...\n",
  1018. "load SDF: 100%|██████████| 4457424/4457424 [00:10<00:00, 408299.51it/s]\n",
  1019. "ajust data: 100%|██████████| 42687/42687 [00:10<00:00, 4092.17it/s] \n",
  1020. "\n",
  1021. " Calculating kernel matrix, this could take a while...\n",
  1022. "retrieve patterns: 100%|██████████| 42682/42682 [19:36<00:00, 36.27it/s]\n",
  1023. "calculate kernels: 100%|██████████| 42682/42682 [37:05<00:00, 19.18it/s] \n",
  1024. "\n",
  1025. " --- kernel matrix of cyclic pattern kernel of size 42682 built in 3402.171978712082 seconds ---\n",
  1026. "[[ 9. 9. 3. ... 4. 3. 4.]\n",
  1027. " [ 9. 11. 5. ... 6. 5. 6.]\n",
  1028. " [ 3. 5. 16. ... 6. 6. 6.]\n",
  1029. " ...\n",
  1030. " [ 4. 6. 6. ... 30. 29. 6.]\n",
  1031. " [ 3. 5. 6. ... 29. 29. 6.]\n",
  1032. " [ 4. 6. 6. ... 6. 6. 11.]]\n",
  1033. "\n",
  1034. " Starting calculate accuracy/rmse...\n",
  1035. "calculate performance: 7%|▋ | 70/1000 [1:34:57<227:25:45, 880.37s/it]"
  1036. ]
  1037. }
  1038. ],
  1039. "source": [
  1040. "# NCI-HIV dataset (labeled?, directed?, cyclic, classification)\n",
  1041. "%load_ext line_profiler\n",
  1042. "\n",
  1043. "import sys\n",
  1044. "sys.path.insert(0, \"../\")\n",
  1045. "from pygraph.utils.utils import kernel_train_test\n",
  1046. "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n",
  1047. "\n",
  1048. "import numpy as np\n",
  1049. "\n",
  1050. "datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n",
  1051. "datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n",
  1052. "kernel_file_path = 'kernelmatrices_path_acyclic/'\n",
  1053. "\n",
  1054. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n",
  1055. "\n",
  1056. "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n",
  1057. " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 1000, 21), normalize = False, \\\n",
  1058. " datafile_y = datafile_y, model_type = 'classification')\n",
  1059. "\n",
  1060. "# kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n",
  1061. "\n",
  1062. "# kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para,\n",
  1063. "# normalize = False, datafile_y = datafile_y, model_type = 'classification')\n",
  1064. "\n",
  1065. "# kernel_para['k_func'] = 'minmax'\n",
  1066. "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n",
  1067. "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n",
  1068. "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n",
  1069. "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n",
  1070. "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n",
  1071. "\n",
  1072. "# kernel_para['depth'] = 10\n",
  1073. "# %lprun -f untildpathkernel \\\n",
  1074. "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)"
  1075. ]
  1076. },
  1077. {
  1078. "cell_type": "code",
  1079. "execution_count": 19,
  1080. "metadata": {},
  1081. "outputs": [
  1082. {
  1083. "name": "stdout",
  1084. "output_type": "stream",
  1085. "text": [
  1086. "The line_profiler extension is already loaded. To reload it, use:\n",
  1087. " %reload_ext line_profiler\n"
  1088. ]
  1089. },
  1090. {
  1091. "data": {
  1092. "image/png": "\n",
  1093. "text/plain": [
  1094. "<matplotlib.figure.Figure at 0x7f71ccb78400>"
  1095. ]
  1096. },
  1097. "metadata": {},
  1098. "output_type": "display_data"
  1099. },
  1100. {
  1101. "data": {
  1102. "image/png": "\n",
  1103. "text/plain": [
  1104. "<matplotlib.figure.Figure at 0x7f71ccb339b0>"
  1105. ]
  1106. },
  1107. "metadata": {},
  1108. "output_type": "display_data"
  1109. },
  1110. {
  1111. "name": "stdout",
  1112. "output_type": "stream",
  1113. "text": [
  1114. "\n",
  1115. " --- kernel matrix of cyclic pattern kernel of size 999 built in 18.78946042060852 seconds ---\n",
  1116. "(array([[11., 5., 5., ..., 6., 7., 3.],\n",
  1117. " [ 5., 16., 6., ..., 5., 5., 3.],\n",
  1118. " [ 5., 6., 8., ..., 4., 5., 3.],\n",
  1119. " ...,\n",
  1120. " [ 6., 5., 4., ..., 17., 7., 4.],\n",
  1121. " [ 7., 5., 5., ..., 7., 15., 4.],\n",
  1122. " [ 3., 3., 3., ..., 4., 4., 11.]]), 18.78946042060852)\n"
  1123. ]
  1124. }
  1125. ],
  1126. "source": [
  1127. "%load_ext line_profiler\n",
  1128. "\n",
  1129. "import networkx as nx\n",
  1130. "import matplotlib.pyplot as plt\n",
  1131. "import sys\n",
  1132. "sys.path.insert(0, \"../\")\n",
  1133. "from pygraph.utils.graphfiles import loadDataset\n",
  1134. "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n",
  1135. "\n",
  1136. "# datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n",
  1137. "# datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n",
  1138. "# dataset, y = loadDataset(datafile, datafile_y)\n",
  1139. "G1 = dataset[1]\n",
  1140. "G2 = dataset[2]\n",
  1141. "G3 = dataset[3]\n",
  1142. "G4 = dataset[4]\n",
  1143. "G5 = dataset[5]\n",
  1144. "data = [G1, G2, G3, G4, G5]\n",
  1145. "nx.draw_networkx(G1)\n",
  1146. "plt.show()\n",
  1147. "nx.draw_networkx(G2)\n",
  1148. "plt.show()\n",
  1149. "\n",
  1150. "kernel = cyclicpatternkernel(dataset[1:1000], cycle_bound = 1000)\n",
  1151. "print(kernel)"
  1152. ]
  1153. },
  1154. {
  1155. "cell_type": "code",
  1156. "execution_count": 1,
  1157. "metadata": {},
  1158. "outputs": [
  1159. {
  1160. "name": "stdout",
  1161. "output_type": "stream",
  1162. "text": [
  1163. "\n",
  1164. " --- This is a classification problem ---\n",
  1165. "\n",
  1166. "\n",
  1167. " Loading dataset from file...\n",
  1168. "\n",
  1169. " Calculating kernel matrix, this could take a while...\n",
  1170. "retrieve patterns: 100%|██████████| 185/185 [00:00<00:00, 2064.69it/s]\n",
  1171. "calculate kernels: 100%|██████████| 185/185 [00:00<00:00, 11170.00it/s]\n",
  1172. "\n",
  1173. " --- kernel matrix of cyclic pattern kernel of size 185 built in 0.10836505889892578 seconds ---\n",
  1174. "[[0. 0. 0. ... 0. 0. 0.]\n",
  1175. " [0. 0. 0. ... 0. 0. 0.]\n",
  1176. " [0. 0. 0. ... 0. 0. 0.]\n",
  1177. " ...\n",
  1178. " [0. 0. 0. ... 0. 0. 0.]\n",
  1179. " [0. 0. 0. ... 0. 0. 0.]\n",
  1180. " [0. 0. 0. ... 0. 0. 0.]]\n",
  1181. "\n",
  1182. " Starting calculate accuracy/rmse...\n",
  1183. "calculate performance: 100%|██████████| 1000/1000 [00:24<00:00, 36.41it/s]\n",
  1184. " Mean performance on train set: 0.018072\n",
  1185. "With standard deviation: 0.000000\n",
  1186. "\n",
  1187. " Mean performance on test set: 0.000000\n",
  1188. "With standard deviation: 0.000000\n",
  1189. "\n",
  1190. "\n",
  1191. " accur_test std_test accur_train std_train k_time\n",
  1192. "------------ ---------- ------------- ----------- --------\n",
  1193. " 0 0 0.0180723 0 0.108365\n"
  1194. ]
  1195. }
  1196. ],
  1197. "source": [
  1198. "# acyclic dataset (node labeled, edge labeled, undirected, linear + non-linear, regression)\n",
  1199. "%load_ext line_profiler\n",
  1200. "\n",
  1201. "import sys\n",
  1202. "sys.path.insert(0, \"../\")\n",
  1203. "from pygraph.utils.utils import kernel_train_test\n",
  1204. "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n",
  1205. "\n",
  1206. "import numpy as np\n",
  1207. "\n",
  1208. "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n",
  1209. "kernel_file_path = 'kernelmatrices_path_acyclic/'\n",
  1210. "\n",
  1211. "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n",
  1212. "\n",
  1213. "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n",
  1214. "\n",
  1215. "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n",
  1216. " normalize = False , model_type = 'classification')\n",
  1217. "\n",
  1218. "# kernel_para['k_func'] = 'minmax'\n",
  1219. "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n",
  1220. "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n",
  1221. "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n",
  1222. "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n",
  1223. "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n",
  1224. "\n",
  1225. "# kernel_para['depth'] = 10\n",
  1226. "# %lprun -f untildpathkernel \\\n",
  1227. "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)"
  1228. ]
  1229. }
  1230. ],
  1231. "metadata": {
  1232. "kernelspec": {
  1233. "display_name": "Python 3",
  1234. "language": "python",
  1235. "name": "python3"
  1236. },
  1237. "language_info": {
  1238. "codemirror_mode": {
  1239. "name": "ipython",
  1240. "version": 3
  1241. },
  1242. "file_extension": ".py",
  1243. "mimetype": "text/x-python",
  1244. "name": "python",
  1245. "nbconvert_exporter": "python",
  1246. "pygments_lexer": "ipython3",
  1247. "version": "3.5.2"
  1248. }
  1249. },
  1250. "nbformat": 4,
  1251. "nbformat_minor": 2
  1252. }

A Python package for graph kernels, graph edit distances and graph pre-image problem.