{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- This is a classification problem ---\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 0.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 373.39it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 26367.08it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.18705153465270996 seconds ---\n", "[[0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 83%|████████▎ | 834/1000 [00:00<00:00, 2077.02it/s]\n", " Mean performance on train set: 0.549180\n", "With standard deviation: 0.016798\n", "\n", " Mean performance on test set: 0.642857\n", "With standard deviation: 0.146385\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2083.52it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 50.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 231.33it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 15078.65it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3006291389465332 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 81%|████████ | 808/1000 [00:00<00:00, 2005.12it/s]\n", " Mean performance on train set: 0.698361\n", "With standard deviation: 0.116889\n", "\n", " Mean performance on test set: 0.871429\n", "With standard deviation: 0.100000\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2024.59it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 100.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 224.68it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13144.65it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30983662605285645 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 82%|████████▏ | 821/1000 [00:00<00:00, 2050.17it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2050.63it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 150.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.10it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12644.09it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31808018684387207 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 99%|█████████▉| 993/1000 [00:00<00:00, 1993.90it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1977.95it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 200.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 219.08it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14177.69it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31757450103759766 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 980/1000 [00:00<00:00, 1969.03it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1951.39it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 250.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 218.22it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12697.56it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3192298412322998 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1878.10it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1875.67it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 300.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 206.81it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12364.00it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33614420890808105 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 97%|█████████▋| 970/1000 [00:00<00:00, 1947.13it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1934.26it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 350.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 189.65it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13989.93it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3654501438140869 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1875.81it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1881.94it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 400.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 220.95it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14281.34it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3142852783203125 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▌| 952/1000 [00:00<00:00, 1900.77it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1900.46it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 450.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 212.09it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 11357.62it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3287320137023926 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 981/1000 [00:00<00:00, 1956.30it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1952.54it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 500.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.14it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12536.27it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3347315788269043 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 979/1000 [00:00<00:00, 1970.30it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1950.19it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 550.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.06it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13816.44it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3341798782348633 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 97%|█████████▋| 974/1000 [00:00<00:00, 1930.44it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1937.89it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 600.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 213.56it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13048.43it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32569050788879395 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 97%|█████████▋| 972/1000 [00:00<00:00, 1924.82it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1935.68it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 650.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 216.51it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 9669.54it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3229689598083496 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 98%|█████████▊| 983/1000 [00:00<00:00, 1963.08it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1960.32it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 700.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 208.61it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13485.23it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.33377623558044434 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 81%|████████ | 812/1000 [00:00<00:00, 2020.52it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2029.28it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 750.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.54it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13952.29it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31093406677246094 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2053.32it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2055.77it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 800.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.35it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13220.82it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.31124091148376465 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 96%|█████████▌| 959/1000 [00:00<00:00, 1925.40it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1912.78it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 850.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 202.00it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 12487.42it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.34392237663269043 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▍| 946/1000 [00:00<00:00, 1869.41it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1883.23it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 900.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 217.23it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13956.38it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.32010626792907715 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 99%|█████████▉| 989/1000 [00:00<00:00, 1978.29it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1968.44it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 950.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 228.56it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 14794.72it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.30414795875549316 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 83%|████████▎ | 829/1000 [00:00<00:00, 2063.72it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2068.06it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 68/68 [00:00<00:00, 223.02it/s]\n", "calculate kernels: 100%|██████████| 68/68 [00:00<00:00, 13702.27it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 68 built in 0.3120880126953125 seconds ---\n", "[[ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " [ 8. 8. 8. ... 8. 8. 8.]\n", " ...\n", " [ 8. 8. 8. ... 10. 9. 9.]\n", " [ 8. 8. 8. ... 9. 10. 10.]\n", " [ 8. 8. 8. ... 9. 10. 10.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 82%|████████▎ | 825/1000 [00:00<00:00, 2054.81it/s]\n", " Mean performance on train set: 0.732787\n", "With standard deviation: 0.082637\n", "\n", " Mean performance on test set: 0.900000\n", "With standard deviation: 0.111575\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 2022.62it/s]\n", "\n", "\n", " cycle_bound accur_test std_test accur_train std_train k_time\n", "------------- ------------ ---------- ------------- ----------- --------\n", " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n", " 50 0.871429 0.1 0.698361 0.116889 0.300629\n", " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n", " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n", " 200 0.9 0.111575 0.732787 0.0826366 0.317575\n", " 250 0.9 0.111575 0.732787 0.0826366 0.31923\n", " 300 0.9 0.111575 0.732787 0.0826366 0.336144\n", " 350 0.9 0.111575 0.732787 0.0826366 0.36545\n", " 400 0.9 0.111575 0.732787 0.0826366 0.314285\n", " 450 0.9 0.111575 0.732787 0.0826366 0.328732\n", " 500 0.9 0.111575 0.732787 0.0826366 0.334732\n", " 550 0.9 0.111575 0.732787 0.0826366 0.33418\n", " 600 0.9 0.111575 0.732787 0.0826366 0.325691\n", " 650 0.9 0.111575 0.732787 0.0826366 0.322969\n", " 700 0.9 0.111575 0.732787 0.0826366 0.333776\n", " 750 0.9 0.111575 0.732787 0.0826366 0.310934\n", " 800 0.9 0.111575 0.732787 0.0826366 0.311241\n", " 850 0.9 0.111575 0.732787 0.0826366 0.343922\n", " 900 0.9 0.111575 0.732787 0.0826366 0.320106\n", " 950 0.9 0.111575 0.732787 0.0826366 0.304148\n", " 1000 0.9 0.111575 0.732787 0.0826366 0.312088\n" ] } ], "source": [ "# MAO dataset (node labeled, edge labeled, undirected, cyclic + linear, classification)\n", "%load_ext line_profiler\n", "\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", "\n", "import numpy as np\n", "\n", "datafile = '../../../../datasets/MAO/dataset.ds'\n", "kernel_file_path = 'kernelmatrices_cyclicpattern_mao/'\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", "\n", "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 500, 21), normalize = False,\n", " model_type = 'classification')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The line_profiler extension is already loaded. To reload it, use:\n", " %reload_ext line_profiler\n", "\n", " --- This is a classification problem ---\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 0.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 176.07it/s]\n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 18331.07it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5411422252655029 seconds ---\n", "[[0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 95%|█████████▌| 951/1000 [00:00<00:00, 1898.18it/s]\n", " Mean performance on train set: 0.629762\n", "With standard deviation: 0.013521\n", "\n", " Mean performance on test set: 0.610000\n", "With standard deviation: 0.113578\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1889.49it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 10.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 165.16it/s]\n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 16217.54it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5770719051361084 seconds ---\n", "[[0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 94%|█████████▍| 940/1000 [00:00<00:00, 1876.61it/s]\n", " Mean performance on train set: 0.629762\n", "With standard deviation: 0.013521\n", "\n", " Mean performance on test set: 0.610000\n", "With standard deviation: 0.113578\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1866.80it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 20.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 165.21it/s]\n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 16888.61it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5768516063690186 seconds ---\n", "[[3. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 93%|█████████▎| 926/1000 [00:00<00:00, 1837.36it/s]\n", " Mean performance on train set: 0.629762\n", "With standard deviation: 0.013521\n", "\n", " Mean performance on test set: 0.610000\n", "With standard deviation: 0.113578\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1841.13it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 30.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 171.51it/s]\n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 17701.46it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5560076236724854 seconds ---\n", "[[3. 3. 3. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 92%|█████████▏| 923/1000 [00:00<00:00, 1845.18it/s]\n", " Mean performance on train set: 0.633333\n", "With standard deviation: 0.015793\n", "\n", " Mean performance on test set: 0.640000\n", "With standard deviation: 0.111355\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1836.56it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 40.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 159.66it/s]\n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 17703.84it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.5963354110717773 seconds ---\n", "[[3. 3. 3. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 84%|████████▍ | 845/1000 [00:00<00:00, 1694.10it/s]\n", " Mean performance on train set: 0.633333\n", "With standard deviation: 0.015793\n", "\n", " Mean performance on test set: 0.640000\n", "With standard deviation: 0.111355\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1694.34it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 50.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 126.36it/s]\n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14863.89it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.7526798248291016 seconds ---\n", "[[3. 3. 3. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 84%|████████▍ | 842/1000 [00:00<00:00, 1670.86it/s]\n", " Mean performance on train set: 0.658333\n", "With standard deviation: 0.034524\n", "\n", " Mean performance on test set: 0.670000\n", "With standard deviation: 0.090000\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1665.11it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 60.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:00<00:00, 107.33it/s]\n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 13937.03it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 0.8846912384033203 seconds ---\n", "[[3. 3. 3. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " [3. 4. 4. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 83%|████████▎ | 829/1000 [00:00<00:00, 1653.86it/s]\n", " Mean performance on train set: 0.671429\n", "With standard deviation: 0.036577\n", "\n", " Mean performance on test set: 0.680000\n", "With standard deviation: 0.107703\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1673.57it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 70.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 81.45it/s] \n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14275.64it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1631414890289307 seconds ---\n", "[[3. 3. 3. ... 3. 3. 3.]\n", " [3. 4. 4. ... 4. 4. 4.]\n", " [3. 4. 4. ... 4. 4. 4.]\n", " ...\n", " [3. 4. 4. ... 7. 7. 7.]\n", " [3. 4. 4. ... 7. 7. 7.]\n", " [3. 4. 4. ... 7. 7. 7.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 88%|████████▊ | 876/1000 [00:00<00:00, 1761.00it/s]\n", " Mean performance on train set: 0.666667\n", "With standard deviation: 0.038021\n", "\n", " Mean performance on test set: 0.670000\n", "With standard deviation: 0.100499\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1754.20it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 80.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 79.93it/s] \n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14789.73it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1846554279327393 seconds ---\n", "[[3. 3. 3. ... 3. 3. 3.]\n", " [3. 4. 4. ... 4. 4. 4.]\n", " [3. 4. 4. ... 4. 4. 4.]\n", " ...\n", " [3. 4. 4. ... 7. 7. 7.]\n", " [3. 4. 4. ... 7. 7. 7.]\n", " [3. 4. 4. ... 7. 7. 7.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 93%|█████████▎| 926/1000 [00:00<00:00, 1854.59it/s]\n", " Mean performance on train set: 0.709524\n", "With standard deviation: 0.058853\n", "\n", " Mean performance on test set: 0.780000\n", "With standard deviation: 0.107703\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1844.77it/s]\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 90.0 ---#\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "retrieve patterns: 100%|██████████| 94/94 [00:01<00:00, 83.75it/s] \n", "calculate kernels: 100%|██████████| 94/94 [00:00<00:00, 14169.95it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 94 built in 1.1314406394958496 seconds ---\n", "[[3. 3. 3. ... 3. 3. 3.]\n", " [3. 4. 4. ... 4. 4. 4.]\n", " [3. 4. 4. ... 4. 4. 4.]\n", " ...\n", " [3. 4. 4. ... 7. 7. 7.]\n", " [3. 4. 4. ... 7. 7. 7.]\n", " [3. 4. 4. ... 7. 7. 7.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 94%|█████████▍| 943/1000 [00:00<00:00, 1878.69it/s]\n", " Mean performance on train set: 0.709524\n", "With standard deviation: 0.058853\n", "\n", " Mean performance on test set: 0.780000\n", "With standard deviation: 0.107703\n", "calculate performance: 100%|██████████| 1000/1000 [00:00<00:00, 1872.55it/s]\n", "\n", "\n", " cycle_bound accur_test std_test accur_train std_train k_time\n", "------------- ------------ ---------- ------------- ----------- --------\n", " 0 0.61 0.113578 0.629762 0.0135212 0.541142\n", " 10 0.61 0.113578 0.629762 0.0135212 0.577072\n", " 20 0.61 0.113578 0.629762 0.0135212 0.576852\n", " 30 0.64 0.111355 0.633333 0.0157935 0.556008\n", " 40 0.64 0.111355 0.633333 0.0157935 0.596335\n", " 50 0.67 0.09 0.658333 0.0345238 0.75268\n", " 60 0.68 0.107703 0.671429 0.0365769 0.884691\n", " 70 0.67 0.100499 0.666667 0.0380208 1.16314\n", " 80 0.78 0.107703 0.709524 0.0588534 1.18466\n", " 90 0.78 0.107703 0.709524 0.0588534 1.13144\n" ] } ], "source": [ "# PAH dataset (node and edge unlabeled, undirected, cyclic, classification)\n", "%load_ext line_profiler\n", "\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", "\n", "import numpy as np\n", "\n", "datafile = '../../../../datasets/PAH/dataset.ds'\n", "kernel_file_path = 'kernelmatrices_cyclicpattern_pah/'\n", "\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = False)\n", "\n", "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 90, 10), normalize = False, \\\n", " model_type = 'classification')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# results\n", "\n", "# MAO dataset\n", "cycle_bound accur_test std_test accur_train std_train k_time\n", "------------- ------------ ---------- ------------- ----------- --------\n", " 0 0.642857 0.146385 0.54918 0.0167983 0.187052\n", " 50 0.871429 0.1 0.698361 0.116889 0.300629\n", " 100 0.9 0.111575 0.732787 0.0826366 0.309837\n", " 150 0.9 0.111575 0.732787 0.0826366 0.31808\n", " 200 0.9 0.111575 0.732787 0.0826366 0.317575\n", " \n", "# PAH dataset\n", " cycle_bound accur_test std_test accur_train std_train k_time\n", "------------- ------------ ---------- ------------- ----------- --------\n", " 0 0.61 0.113578 0.629762 0.0135212 0.521801\n", " 10 0.61 0.113578 0.629762 0.0135212 0.52589\n", " 20 0.61 0.113578 0.629762 0.0135212 0.548528\n", " 30 0.64 0.111355 0.633333 0.0157935 0.535311\n", " 40 0.64 0.111355 0.633333 0.0157935 0.61764\n", " 50 0.67 0.09 0.658333 0.0345238 0.733868\n", " 60 0.68 0.107703 0.671429 0.0365769 0.871147\n", " 70 0.67 0.100499 0.666667 0.0380208 1.12625\n", " 80 0.78 0.107703 0.709524 0.0588534 1.19828\n", " 90 0.78 0.107703 0.709524 0.0588534 1.21182" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- This is a classification problem ---\n", "\n", "\n", " #--- calculating kernel matrix when cycle_bound = 1000.0 ---#\n", "\n", " Loading dataset from file...\n", "load SDF: 100%|██████████| 4457424/4457424 [00:10<00:00, 408299.51it/s]\n", "ajust data: 100%|██████████| 42687/42687 [00:10<00:00, 4092.17it/s] \n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 42682/42682 [19:36<00:00, 36.27it/s]\n", "calculate kernels: 100%|██████████| 42682/42682 [37:05<00:00, 19.18it/s] \n", "\n", " --- kernel matrix of cyclic pattern kernel of size 42682 built in 3402.171978712082 seconds ---\n", "[[ 9. 9. 3. ... 4. 3. 4.]\n", " [ 9. 11. 5. ... 6. 5. 6.]\n", " [ 3. 5. 16. ... 6. 6. 6.]\n", " ...\n", " [ 4. 6. 6. ... 30. 29. 6.]\n", " [ 3. 5. 6. ... 29. 29. 6.]\n", " [ 4. 6. 6. ... 6. 6. 11.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 7%|▋ | 70/1000 [1:34:57<227:25:45, 880.37s/it]" ] } ], "source": [ "# NCI-HIV dataset (labeled?, directed?, cyclic, classification)\n", "%load_ext line_profiler\n", "\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", "\n", "import numpy as np\n", "\n", "datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n", "datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n", "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True)\n", "\n", "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", " hyper_name = 'cycle_bound', hyper_range = np.linspace(0, 1000, 21), normalize = False, \\\n", " datafile_y = datafile_y, model_type = 'classification')\n", "\n", "# kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", "\n", "# kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para,\n", "# normalize = False, datafile_y = datafile_y, model_type = 'classification')\n", "\n", "# kernel_para['k_func'] = 'minmax'\n", "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", "\n", "# kernel_para['depth'] = 10\n", "# %lprun -f untildpathkernel \\\n", "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The line_profiler extension is already loaded. To reload it, use:\n", " %reload_ext line_profiler\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- kernel matrix of cyclic pattern kernel of size 999 built in 18.78946042060852 seconds ---\n", "(array([[11., 5., 5., ..., 6., 7., 3.],\n", " [ 5., 16., 6., ..., 5., 5., 3.],\n", " [ 5., 6., 8., ..., 4., 5., 3.],\n", " ...,\n", " [ 6., 5., 4., ..., 17., 7., 4.],\n", " [ 7., 5., 5., ..., 7., 15., 4.],\n", " [ 3., 3., 3., ..., 4., 4., 11.]]), 18.78946042060852)\n" ] } ], "source": [ "%load_ext line_profiler\n", "\n", "import networkx as nx\n", "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.graphfiles import loadDataset\n", "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", "\n", "# datafile = '../../../../datasets/NCI-HIV/AIDO99SD.sdf'\n", "# datafile_y = '../../../../datasets/NCI-HIV/aids_conc_may04.txt'\n", "# dataset, y = loadDataset(datafile, datafile_y)\n", "G1 = dataset[1]\n", "G2 = dataset[2]\n", "G3 = dataset[3]\n", "G4 = dataset[4]\n", "G5 = dataset[5]\n", "data = [G1, G2, G3, G4, G5]\n", "nx.draw_networkx(G1)\n", "plt.show()\n", "nx.draw_networkx(G2)\n", "plt.show()\n", "\n", "kernel = cyclicpatternkernel(dataset[1:1000], cycle_bound = 1000)\n", "print(kernel)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " --- This is a classification problem ---\n", "\n", "\n", " Loading dataset from file...\n", "\n", " Calculating kernel matrix, this could take a while...\n", "retrieve patterns: 100%|██████████| 185/185 [00:00<00:00, 2064.69it/s]\n", "calculate kernels: 100%|██████████| 185/185 [00:00<00:00, 11170.00it/s]\n", "\n", " --- kernel matrix of cyclic pattern kernel of size 185 built in 0.10836505889892578 seconds ---\n", "[[0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " ...\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]\n", " [0. 0. 0. ... 0. 0. 0.]]\n", "\n", " Starting calculate accuracy/rmse...\n", "calculate performance: 100%|██████████| 1000/1000 [00:24<00:00, 36.41it/s]\n", " Mean performance on train set: 0.018072\n", "With standard deviation: 0.000000\n", "\n", " Mean performance on test set: 0.000000\n", "With standard deviation: 0.000000\n", "\n", "\n", " accur_test std_test accur_train std_train k_time\n", "------------ ---------- ------------- ----------- --------\n", " 0 0 0.0180723 0 0.108365\n" ] } ], "source": [ "# acyclic dataset (node labeled, edge labeled, undirected, linear + non-linear, regression)\n", "%load_ext line_profiler\n", "\n", "import sys\n", "sys.path.insert(0, \"../\")\n", "from pygraph.utils.utils import kernel_train_test\n", "from pygraph.kernels.cyclicPatternKernel import cyclicpatternkernel\n", "\n", "import numpy as np\n", "\n", "datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'\n", "kernel_file_path = 'kernelmatrices_path_acyclic/'\n", "\n", "kernel_para = dict(node_label = 'atom', edge_label = 'bond_type', labeled = True, cycle_bound = 200)\n", "\n", "# kernel_train_test(datafile, kernel_file_path, treeletkernel, kernel_para, normalize = False)\n", "\n", "kernel_train_test(datafile, kernel_file_path, cyclicpatternkernel, kernel_para, \\\n", " normalize = False , model_type = 'classification')\n", "\n", "# kernel_para['k_func'] = 'minmax'\n", "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = True)\n", "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, \\\n", "# hyper_name = 'depth', hyper_range = np.linspace(0, 10, 11), normalize = False)\n", "# # kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)\n", "\n", "# kernel_para['depth'] = 10\n", "# %lprun -f untildpathkernel \\\n", "# kernel_train_test(datafile, kernel_file_path, untildpathkernel, kernel_para, normalize = False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }