@@ -3,16 +3,24 @@ A python package for graph kernels. | |||
## Requirements | |||
numpy==1.15.1 | |||
python==3.6.5 | |||
numpy==1.15.2 | |||
scipy==1.1.0 | |||
matplotlib==2.2.2 | |||
networkx==2.1 | |||
scikit-learn==0.19.1 | |||
matplotlib==3.0.0 | |||
networkx==2.2 | |||
scikit-learn==0.20.0 | |||
tabulate==0.8.2 | |||
tqdm==4.23.4 | |||
control==0.7.0 (for generalized random walk kernels only) | |||
tqdm==4.26.0 | |||
suffix-tree==0.0.6 (for the path kernel up to length h only) | |||
control==0.8.0 (for generalized random walk kernels only) | |||
slycot===0.3.2.dev-5263ada (for generalized random walk kernels only, requires fortran compiler, gfortran for example) | |||
Modules better be upgraded. | |||
ipython==7.0.1 | |||
ipykernel=5.1.0 | |||
ipython-genutils=0.2.0 | |||
ipywidgets==7.4.2 | |||
## Results with minimal test RMSE for each kernel on dataset Asyclic | |||
All kernels expect for Cyclic pattern kernel are tested on dataset Asyclic, which consists of 185 molecules (graphs). (Cyclic pattern kernel is tested on dataset MAO and PAH.) | |||
@@ -0,0 +1,40 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Test gram matrices. | |||
Created on Wed Sep 19 15:32:29 2018 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
from numpy.linalg import eig | |||
# read gram matrices from file. | |||
results_dir = 'results/marginalizedkernel/myria' | |||
ds_name = 'MUTAG' | |||
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | |||
#print('gm time: ', gmfile['gmtime']) | |||
# a list to store gram matrices for all param_grid_precomputed | |||
gram_matrices = gmfile['gms'] | |||
# param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones | |||
#y = gmfile['y'].tolist() | |||
#x = gram_matrices[0] | |||
for x in gram_matrices: | |||
plt.imshow(x) | |||
plt.colorbar() | |||
plt.savefig('check_gm/' + ds_name + '.gm.eps', format='eps', dpi=300) | |||
# print(np.transpose(x)) | |||
print('if symmetric: ', np.array_equal(x, np.transpose(x))) | |||
print('diag: ', np.diag(x)) | |||
print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1)) | |||
print('min, max diag: ', min(np.diag(x)), max(np.diag(x))) | |||
print('mean x: ', np.mean(np.mean(x))) | |||
[lamnda, v] = eig(x) | |||
print('min, max lambda: ', min(lamnda), max(lamnda)) | |||
if -1e-10 > min(lamnda): | |||
raise Exception('wrong eigen values.') |
@@ -0,0 +1,15 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.627451206897581 | |||
min, max lambda: (-3.1105220613135224e-15+0j) (118.43667381920945+0j) |
@@ -0,0 +1,14 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9275876964656552 | |||
min, max lambda: (-6.000888652084434e-15+0j) (140.45172231003968+0j) |
@@ -0,0 +1,65 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.47604228590012404 | |||
min, max lambda: 0.0001002381061317695 299.17895175532897 | |||
jstsp: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.8766424893590421 | |||
min, max lambda: (-4.04460628433013e-14+0j) (529.3691973508182+0j) |
@@ -0,0 +1,18 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. ... 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.40955939575371497 | |||
min, max lambda: 2.196833029054622e-10 943.9175660197347 | |||
jstsp: | |||
if symmetric: False | |||
diag: [1. 1. 1. ... 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: nan | |||
LinAlgError: Array must not contain infs or NaNs |
@@ -0,0 +1,22 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9449639567258331 | |||
min, max lambda: (-1.1563146193980238e-15+0j) (64.31844814063015+0j) | |||
jstsp: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9632407703418899 | |||
min, max lambda: (-7.502900269338164e-16-3.0606423294452126e-17j) (65.53092059526354+0j) |
@@ -0,0 +1,15 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.8881281122679946 | |||
min, max lambda: (-1.136106712787655e-14+0j) (177.00000000000003+0j) |
@@ -0,0 +1,13 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. ... 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.633201506393603 | |||
min, max lambda: (-3.568957793283026e-14-1.2061062712627808e-14j) (2848.6624188252435+0j) | |||
jstsp: | |||
@@ -0,0 +1,11 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9586479796584891 | |||
min, max lambda: (-1.3363208262303617e-15+0j) (90.2113226507757+0j) |
@@ -0,0 +1,32 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.7656098059302923 | |||
min, max lambda (-3.425001366427846e-15-2.236521917797024e-16j) (142.86649135778595+0j) | |||
jstsp: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9412011742906522 | |||
min, max lambda (-9.433792343294819e-15-2.5978926329723254e-15j) (172.4203026547106+0j) |
@@ -0,0 +1,32 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9372426749560228 | |||
min, max lambda (-5.513191435356332e-15+0j) (140.08307372708344+0j) | |||
jstsp: | |||
if symmetric: False | |||
diag: [nan 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: nan nan | |||
mean x: nan | |||
LinAlgError: Array must not contain infs or NaNs |
@@ -0,0 +1,65 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.47604228590012404 | |||
min, max lambda: 0.0001002381061317695 299.17895175532897 | |||
jstsp: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.8766424893590421 | |||
min, max lambda: (-4.04460628433013e-14+0j) (529.3691973508182+0j) |
@@ -0,0 +1,18 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. ... 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.40955939575371497 | |||
min, max lambda: 2.196833029054622e-10 943.9175660197347 | |||
jstsp: | |||
if symmetric: False | |||
diag: [1. 1. 1. ... 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: nan | |||
LinAlgError: Array must not contain infs or NaNs |
@@ -0,0 +1,22 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9449639567258331 | |||
min, max lambda: (-1.1563146193980238e-15+0j) (64.31844814063015+0j) | |||
jstsp: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9632407703418899 | |||
min, max lambda: (-7.502900269338164e-16-3.0606423294452126e-17j) (65.53092059526354+0j) |
@@ -0,0 +1,32 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.8501340720551299 | |||
min, max lambda: (-5.811474553224136e-15+0j) (160.72585558445357+0j) | |||
jstsp: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9591293919401286 | |||
min, max lambda: (-8.73626400337456e-15+0j) (180.374192331094+0j) |
@@ -0,0 +1,13 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. ... 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.633201506393603 | |||
min, max lambda: (-3.568957793283026e-14-1.2061062712627808e-14j) (2848.6624188252435+0j) | |||
jstsp: | |||
@@ -0,0 +1,24 @@ | |||
mine: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9825763767199961 | |||
min, max lambda: (-1.3999833987183273e-15+0j) (92.38382991977493+0j) | |||
jstsp: | |||
if symmetric: True | |||
diag: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. | |||
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] | |||
sum diag < 0.1: 0 | |||
min, max diag: 1.0 1.0 | |||
mean x: 0.9825763767199961 | |||
min, max lambda: (-1.3999833987183273e-15+0j) (92.38382991977493+0j) |
@@ -0,0 +1,134 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
compute gm with load_data.py and test them. | |||
Created on Wed Sep 19 16:12:13 2018 | |||
@author: ljia | |||
""" | |||
"""Shortest-Path graph kernel. | |||
Python implementation based on: "Shortest-path kernels on graphs", by | |||
Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE | |||
International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005 | |||
doi: 10.1109/ICDM.2005.132 | |||
Author : Sandro Vega-Pons, Emanuele Olivetti | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
import numpy as np | |||
import networkx as nx | |||
from pygraph.utils.graphfiles import loadDataset | |||
import matplotlib.pyplot as plt | |||
from numpy.linalg import eig | |||
class GK_SP: | |||
""" | |||
Shorthest path graph kernel. | |||
""" | |||
def compare(self, g_1, g_2, verbose=False): | |||
"""Compute the kernel value (similarity) between two graphs. | |||
Parameters | |||
---------- | |||
g1 : networkx.Graph | |||
First graph. | |||
g2 : networkx.Graph | |||
Second graph. | |||
Returns | |||
------- | |||
k : The similarity value between g1 and g2. | |||
""" | |||
# Diagonal superior matrix of the floyd warshall shortest | |||
# paths: | |||
fwm1 = np.array(nx.floyd_warshall_numpy(g_1)) | |||
fwm1 = np.where(fwm1 == np.inf, 0, fwm1) | |||
fwm1 = np.where(fwm1 == np.nan, 0, fwm1) | |||
fwm1 = np.triu(fwm1, k=1) | |||
bc1 = np.bincount(fwm1.reshape(-1).astype(int)) | |||
fwm2 = np.array(nx.floyd_warshall_numpy(g_2)) | |||
fwm2 = np.where(fwm2 == np.inf, 0, fwm2) | |||
fwm2 = np.where(fwm2 == np.nan, 0, fwm2) | |||
fwm2 = np.triu(fwm2, k=1) | |||
bc2 = np.bincount(fwm2.reshape(-1).astype(int)) | |||
# Copy into arrays with the same length the non-zero shortests | |||
# paths: | |||
v1 = np.zeros(max(len(bc1), len(bc2)) - 1) | |||
v1[range(0, len(bc1)-1)] = bc1[1:] | |||
v2 = np.zeros(max(len(bc1), len(bc2)) - 1) | |||
v2[range(0, len(bc2)-1)] = bc2[1:] | |||
return np.sum(v1 * v2) | |||
def compare_normalized(self, g_1, g_2, verbose=False): | |||
"""Compute the normalized kernel value between two graphs. | |||
A normalized version of the kernel is given by the equation: | |||
k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2)) | |||
Parameters | |||
---------- | |||
g1 : networkx.Graph | |||
First graph. | |||
g2 : networkx.Graph | |||
Second graph. | |||
Returns | |||
------- | |||
k : The similarity value between g1 and g2. | |||
""" | |||
return self.compare(g_1, g_2) / (np.sqrt(self.compare(g_1, g_1) * | |||
self.compare(g_2, g_2))) | |||
def compare_list(self, graph_list, verbose=False): | |||
"""Compute the all-pairs kernel values for a list of graphs. | |||
This function can be used to directly compute the kernel | |||
matrix for a list of graphs. The direct computation of the | |||
kernel matrix is faster than the computation of all individual | |||
pairwise kernel values. | |||
Parameters | |||
---------- | |||
graph_list: list | |||
A list of graphs (list of networkx graphs) | |||
Return | |||
------ | |||
K: numpy.array, shape = (len(graph_list), len(graph_list)) | |||
The similarity matrix of all graphs in graph_list. | |||
""" | |||
n = len(graph_list) | |||
k = np.zeros((n, n)) | |||
for i in range(n): | |||
for j in range(i, n): | |||
k[i, j] = self.compare(graph_list[i], graph_list[j]) | |||
k[j, i] = k[i, j] | |||
k_norm = np.zeros(k.shape) | |||
for i in range(k.shape[0]): | |||
for j in range(k.shape[1]): | |||
k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) | |||
return k_norm | |||
ds_name = 'PAH' | |||
datafile = '../datasets/PAH/dataset.ds' | |||
dataset, y = loadDataset(datafile, filename_y=None, extra_params=None) | |||
gk_sp = GK_SP() | |||
x = gk_sp.compare_list(dataset) | |||
np.savez('check_gm/' + ds_name + '.gm.jstsp', gms=x) | |||
plt.imshow(x) | |||
plt.colorbar() | |||
plt.savefig('check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300) | |||
# print(np.transpose(x)) | |||
print('if symmetric: ', np.array_equal(x, np.transpose(x))) | |||
print('diag: ', np.diag(x)) | |||
print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1)) | |||
print('min, max diag: ', min(np.diag(x)), max(np.diag(x))) | |||
print('mean x: ', np.mean(np.mean(x))) | |||
[lamnda, v] = eig(x) | |||
print('min, max lambda: ', min(lamnda), max(lamnda)) |
@@ -0,0 +1,77 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Draw data size v.s. chunksize. | |||
Created on Mon Oct 8 12:00:45 2018 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
def loadmin(file): | |||
result = np.load(file) | |||
return np.argmin(result), np.min(result), result | |||
def idx2chunksize(idx): | |||
if idx < 9: | |||
return idx * 10 + 10 | |||
elif idx < 18: | |||
return (idx - 8) * 100 | |||
elif idx < 27: | |||
return (idx - 17) * 1000 | |||
else: | |||
return (idx - 26) * 10000 | |||
def idx2chunksize2(idx): | |||
if idx < 5: | |||
return idx * 20 + 10 | |||
elif idx < 10: | |||
return (idx - 5) * 200 + 100 | |||
elif idx < 15: | |||
return (idx - 10) * 2000 + 1000 | |||
else: | |||
return (idx - 15) * 20000 * 10000 | |||
idx, mrlt, rlt = loadmin('test_parallel/myria/ENZYMES.npy') | |||
csize = idx2chunksize2(idx) | |||
#dsize = np.array([183, 150, 68, 94, 188, 2250, 600]) | |||
dsize = np.array([183, 150, 68, 94, 188, 2250]) | |||
dsize = dsize * (dsize + 1) / 2 | |||
#cs_sp_laptop = [900, 400, 70, 900, 2000, 8000, 300] | |||
#cs_sp_myria = [900, 500, 500, 300, 400, 4000, 300] | |||
cs_sp_laptop = [900, 400, 70, 900, 2000, 8000] | |||
cs_sp_myria = [900, 500, 500, 300, 400, 4000] | |||
cd_ssp_laptop = [500, 700, 500, 70, 3000, 3000] | |||
cd_ssp_myria = [100, 90, 700, 30, 3000, 5000] | |||
cs_sp_laptop = [x for _, x in sorted(zip(dsize, cs_sp_laptop))] | |||
cs_sp_myria = [x for _, x in sorted(zip(dsize, cs_sp_myria))] | |||
cd_ssp_laptop = [x for _, x in sorted(zip(dsize[0:6], cd_ssp_laptop))] | |||
cd_ssp_myria = [x for _, x in sorted(zip(dsize[0:6], cd_ssp_myria))] | |||
dsize.sort() | |||
cd_mean = np.mean([cs_sp_laptop[0:6], cs_sp_myria[0:6], cd_ssp_laptop, cd_ssp_myria], | |||
axis=0) | |||
#np.append(cd_mean, [6000]) | |||
fig, ax = plt.subplots() | |||
##p1 = ax.plot(dsize, cs_sp_laptop, 'o-', label='sp laptop') | |||
#p2 = ax.plot(dsize, cs_sp_myria, 'o-', label='sp CRIANN') | |||
##p3 = ax.plot(dsize[0:6], cd_ssp_laptop, 'o-', label='ssp laptop') | |||
#p4 = ax.plot(dsize[0:6], cd_ssp_myria, 'o-', label='ssp CRIANN') | |||
#p5 = ax.plot(dsize[0:6], cd_mean, 'o-', label='mean') | |||
#p1 = ax.plot(dsize[0:5], cs_sp_laptop[0:5], 'o-', label='sp laptop') | |||
p2 = ax.plot(dsize[0:5], cs_sp_myria[0:5], 'o-', label='sp CRIANN') | |||
#p3 = ax.plot(dsize[0:5], cd_ssp_laptop[0:5], 'o-', label='ssp laptop') | |||
p4 = ax.plot(dsize[0:5], cd_ssp_myria[0:5], 'o-', label='ssp CRIANN') | |||
p5 = ax.plot(dsize[0:5], cd_mean[0:5], 'o-', label='mean') | |||
#ax.set_xscale('log', nonposx='clip') | |||
#ax.set_yscale('log', nonposy='clip') | |||
ax.set_xlabel('data sizes') | |||
ax.set_ylabel('chunksizes for the fastest computation') | |||
ax.legend(loc='upper left') | |||
plt.show() |
@@ -0,0 +1,64 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Compare gram matrices | |||
Created on Mon Sep 24 10:52:25 2018 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
N = 7 | |||
egmin = [-3.425001366427846e-15, | |||
-5.513191435356332e-15, | |||
-1.1563146193980238e-15, | |||
-1.3999833987183273e-15, | |||
-5.811474553224136e-15, | |||
2.196833029054622e-10, | |||
0.0001002381061317695] | |||
egmin = np.abs(egmin) | |||
egmin2 = [-9.433792343294819e-15, | |||
np.NaN, | |||
-7.502900269338164e-16, | |||
-1.3999833987183273e-15, | |||
-8.73626400337456e-15, | |||
np.NaN, | |||
-4.04460628433013e-14] | |||
egmin2 = np.abs(egmin2) | |||
egmax = [142.86649135778595, | |||
140.08307372708344, | |||
64.31844814063015, | |||
92.38382991977493, | |||
160.72585558445357, | |||
943.9175660197347, | |||
299.17895175532897] | |||
egmax2 = [172.4203026547106, | |||
np.NaN, | |||
65.53092059526354, | |||
92.38382991977493, | |||
180.374192331094, | |||
np.NaN, | |||
529.3691973508182] | |||
fig, ax = plt.subplots() | |||
ind = np.arange(N) # the x locations for the groups | |||
width = 0.20 # the width of the bars: can also be len(x) sequence | |||
p1 = ax.bar(ind, egmin, width) | |||
p2 = ax.bar(ind, egmax, width, bottom=egmin) | |||
p3 = ax.bar(ind + width, egmin2, width) | |||
p4 = ax.bar(ind + width, egmax2, width, bottom=egmin2) | |||
ax.set_yscale('log', nonposy='clip') | |||
ax.set_xlabel('datasets') | |||
ax.set_ylabel('absolute eigen values') | |||
ax.set_title('Absolute eigen values of gram matrices on all datasets') | |||
plt.xticks(ind + width / 2, ('Acyclic', 'Alkane', 'MAO', 'PAH', 'MUTAG', 'Letter-med', 'ENZYMES')) | |||
#ax.set_yticks(np.logspace(-16, -3, num=20, base=10)) | |||
ax.set_ylim(bottom=1e-15) | |||
ax.legend((p1[0], p2[0], p3[0], p4[0]), ('min1', 'max1', 'min2', 'max2'), loc='upper right') | |||
plt.savefig('check_gm/compare_eigen_values.eps', format='eps', dpi=300) | |||
plt.show() |
@@ -0,0 +1,69 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Draw running time. | |||
Created on Mon Sep 24 17:37:26 2018 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
N = 6 | |||
tgm1 = [3.68, | |||
2.24, | |||
3.34, | |||
# 0, | |||
20.00, | |||
2020.46, | |||
3198.84] | |||
tgm2 = [4.29, | |||
3.35, | |||
5.78, | |||
# 11.21, | |||
40.58, | |||
3136.26, | |||
17222.21] | |||
tms1 = [51.19, | |||
73.09, | |||
5.01, | |||
# 0, | |||
22.87, | |||
2211.97, | |||
3211.58] | |||
tms2 = [65.16, | |||
53.02, | |||
10.32, | |||
# 1162.41, | |||
49.86, | |||
3931.68, | |||
17270.55] | |||
fig, ax = plt.subplots() | |||
ind = np.arange(N) # the x locations for the groups | |||
width = 0.30 # the width of the bars: can also be len(x) sequence | |||
p1 = ax.bar(ind, tgm1, width, label='$t_{gm}$ CRIANN') | |||
p2 = ax.bar(ind, tms1, width, bottom=tgm1, label='$t_{ms}$ CRIANN') | |||
p3 = ax.bar(ind + width, tgm2, width, label='$t_{gm}$ laptop') | |||
p4 = ax.bar(ind + width, tms2, width, bottom=tgm2, label='$t_{ms}$ laptop') | |||
ax.set_yscale('log', nonposy='clip') | |||
ax.set_xlabel('datasets') | |||
ax.set_ylabel('runtime($s$)') | |||
ax.set_title('Runtime of the shortest path kernel on all datasets') | |||
plt.xticks(ind + width / 2, ('Acyclic', 'Alkane', 'MAO', 'MUTAG', 'Letter-med', 'ENZYMES')) | |||
#ax.set_yticks(np.logspace(-16, -3, num=20, base=10)) | |||
#ax.set_ylim(bottom=1e-15) | |||
ax.legend(loc='upper left') | |||
ax2 = ax.twinx() | |||
p1 = ax2.plot(ind + width / 2, np.array(tgm2) / np.array(tgm1), 'ro-', | |||
label='$t_{gm}$ laptop / $t_{gm}$ CRIANN') | |||
ax2.set_ylabel('ratios') | |||
ax2.legend(loc='upper center') | |||
plt.savefig('check_gm/compare_running_time.eps', format='eps', dpi=300) | |||
plt.show() |
@@ -2341,8 +2341,8 @@ | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"display_name": "Python 3 (Spyder)", | |||
"language": "python3", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
@@ -0,0 +1,85 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Sep 28 17:01:13 2018 | |||
@author: ljia | |||
""" | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from sklearn.metrics.pairwise import rbf_kernel | |||
from pygraph.kernels.commonWalkKernel import commonwalkkernel | |||
from pygraph.utils.kernels import deltakernel, kernelproduct | |||
dslist = [ | |||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
'task': 'regression'}, # node symb | |||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node symb/nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node/edge symb | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||
# | |||
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||
# # not working below | |||
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
] | |||
estimator = commonwalkkernel | |||
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
param_grid_precomputed = [{'compute_method': ['geo'], | |||
'weight': np.logspace(0, -10, num=11, base=10)}, | |||
{'compute_method': ['exp'], 'weight': range(0, 10)}] | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
print() | |||
print(ds['name']) | |||
model_selection_for_precomputed_kernel( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
(param_grid[1] if ('task' in ds and ds['task'] | |||
== 'regression') else param_grid[0]), | |||
(ds['task'] if 'task' in ds else 'classification'), | |||
NUM_TRIALS=30, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
print() |
@@ -1637,8 +1637,8 @@ | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"display_name": "Python 3 (Spyder)", | |||
"language": "python3", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
@@ -0,0 +1,81 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Sep 28 18:58:47 2018 | |||
@author: ljia | |||
""" | |||
from libs import * | |||
import multiprocessing | |||
from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
dslist = [ | |||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
'task': 'regression'}, # node symb | |||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node symb/nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node/edge symb | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||
# | |||
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||
# # not working below | |||
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
] | |||
estimator = marginalizedkernel | |||
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9), | |||
'n_iteration': np.linspace(2, 20, 10), | |||
'remove_totters': [False]} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
print() | |||
print(ds['name']) | |||
model_selection_for_precomputed_kernel( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
(param_grid[1] if ('task' in ds and ds['task'] | |||
== 'regression') else param_grid[0]), | |||
(ds['task'] if 'task' in ds else 'classification'), | |||
NUM_TRIALS=30, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
print() |
@@ -1039,8 +1039,8 @@ | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"display_name": "Python 3 (Spyder)", | |||
"language": "python3", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
@@ -1,27 +1,25 @@ | |||
# %load_ext line_profiler | |||
# %matplotlib inline | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from sklearn.metrics.pairwise import rbf_kernel | |||
from pygraph.kernels.spKernel import spkernel, spkernel_do | |||
from pygraph.kernels.spKernel import spkernel | |||
from pygraph.utils.kernels import deltakernel, kernelproduct | |||
from pygraph.utils.model_selection_precomputed import trial_do | |||
#from pygraph.utils.model_selection_precomputed import trial_do | |||
dslist = [ | |||
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
# 'task': 'regression'}, # node symb | |||
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
'task': 'regression'}, # node symb | |||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node symb/nsymb | |||
# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# # node/edge symb | |||
# node symb/nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node/edge symb | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
@@ -57,7 +55,7 @@ estimator = spkernel | |||
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
param_grid_precomputed = {'node_kernels': [ | |||
{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
param_grid = [{'C': np.logspace(-10, 3, num=27, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
@@ -76,88 +74,4 @@ for ds in dslist: | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
# %lprun -f trial_do -f spkernel -f spkernel_do -f model_selection_for_precomputed_kernel \ | |||
# model_selection_for_precomputed_kernel( \ | |||
# ds['dataset'], \ | |||
# estimator, \ | |||
# param_grid_precomputed, \ | |||
# (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \ | |||
# (ds['task'] if 'task' in ds else 'classification'), \ | |||
# NUM_TRIALS=30, \ | |||
# datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), \ | |||
# extra_params=(ds['extra_params'] if 'extra_params' in ds else None), \ | |||
# ds_name=ds['name'], \ | |||
# n_jobs=multiprocessing.cpu_count()) | |||
print() | |||
# import functools | |||
# from libs import * | |||
# from pygraph.kernels.spKernel import spkernel | |||
# from pygraph.utils.kernels import deltakernel, kernelsum | |||
# from sklearn.metrics.pairwise import rbf_kernel | |||
# dslist = [ | |||
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', 'task': 'regression'}, # node symb | |||
# # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||
# # {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds',}, # unlabeled | |||
# # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds',}, # node/edge symb | |||
# # {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
# # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
# # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt',}, # contains single node graph, node symb | |||
# # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, # node/edge symb | |||
# # {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||
# # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
# # 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
# # {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||
# # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# # {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||
# # 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# # {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
# # 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||
# # # not working below | |||
# # {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
# # {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
# # {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
# # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
# ] | |||
# estimator = spkernel | |||
# mixkernel = functools.partial(kernelsum, deltakernel, rbf_kernel) | |||
# param_grid_precomputed = {'node_kernels': [{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]} | |||
# param_grid = [{'C': np.logspace(-10, 10, num = 41, base = 10)}, | |||
# {'alpha': np.logspace(-10, 10, num = 41, base = 10)}] | |||
# for ds in dslist: | |||
# print() | |||
# print(ds['name']) | |||
# model_selection_for_precomputed_kernel( | |||
# ds['dataset'], estimator, param_grid_precomputed, | |||
# (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), | |||
# (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30, | |||
# datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
# extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
# ds_name=ds['name']) | |||
# # %lprun -f spkernel \ | |||
# # model_selection_for_precomputed_kernel( \ | |||
# # ds['dataset'], estimator, param_grid_precomputed, \ | |||
# # (param_grid[1] if ('task' in ds and ds['task'] == 'regression') else param_grid[0]), \ | |||
# # (ds['task'] if 'task' in ds else 'classification'), NUM_TRIALS=30, \ | |||
# # datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), \ | |||
# # extra_params=(ds['extra_params'] if 'extra_params' in ds else None)) | |||
# print() |
@@ -0,0 +1,86 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Sep 28 16:37:29 2018 | |||
@author: ljia | |||
""" | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from sklearn.metrics.pairwise import rbf_kernel | |||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||
from pygraph.utils.kernels import deltakernel, kernelproduct | |||
dslist = [ | |||
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
# 'task': 'regression'}, # node symb | |||
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node symb/nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node/edge symb | |||
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||
# | |||
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||
# # not working below | |||
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
] | |||
estimator = structuralspkernel | |||
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
param_grid_precomputed = {'node_kernels': | |||
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}], | |||
'edge_kernels': | |||
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
print() | |||
print(ds['name']) | |||
model_selection_for_precomputed_kernel( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
(param_grid[1] if ('task' in ds and ds['task'] | |||
== 'regression') else param_grid[0]), | |||
(ds['task'] if 'task' in ds else 'classification'), | |||
NUM_TRIALS=30, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
print() |
@@ -5613,8 +5613,8 @@ | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"display_name": "Python 3 (Spyder)", | |||
"language": "python3", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
@@ -0,0 +1,84 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Oct 5 19:19:33 2018 | |||
@author: ljia | |||
""" | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from sklearn.metrics.pairwise import rbf_kernel | |||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
from pygraph.utils.kernels import deltakernel, kernelproduct | |||
dslist = [ | |||
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
# 'task': 'regression'}, # node symb | |||
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# # node symb/nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node/edge symb | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | |||
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | |||
# # # {'name': 'COX2', 'dataset': '../datasets/COX2_txt/COX2_A_sparse.txt'}, # node symb/nsymb | |||
# {'name': 'Fingerprint', 'dataset': '../datasets/Fingerprint/Fingerprint_A.txt'}, | |||
# | |||
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | |||
# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'PROTEINS_full', 'dataset': '../datasets/PROTEINS_full_txt/PROTEINS_full_A_sparse.txt'}, # node symb/nsymb | |||
# # {'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [1, 1, 2, 0, -1]}}, # node symb | |||
# {'name': 'NCI-HIV', 'dataset': '../datasets/NCI-HIV/AIDO99SD.sdf', | |||
# 'dataset_y': '../datasets/NCI-HIV/aids_conc_may04.txt',}, # node/edge symb | |||
# # not working below | |||
# {'name': 'PTC_FM', 'dataset': '../datasets/PTC/Train/FM.ds',}, | |||
# {'name': 'PTC_FR', 'dataset': '../datasets/PTC/Train/FR.ds',}, | |||
# {'name': 'PTC_MM', 'dataset': '../datasets/PTC/Train/MM.ds',}, | |||
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
] | |||
estimator = untilhpathkernel | |||
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
param_grid_precomputed = {'depth': np.linspace(7, 10, 10), | |||
'k_func': ['tanimoto', 'MinMax']} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
for ds in dslist: | |||
print() | |||
print(ds['name']) | |||
model_selection_for_precomputed_kernel( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
(param_grid[1] if ('task' in ds and ds['task'] | |||
== 'regression') else param_grid[0]), | |||
(ds['task'] if 'task' in ds else 'classification'), | |||
NUM_TRIALS=30, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
read_gm_from_file=False) | |||
print() |
@@ -2,6 +2,31 @@ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 7, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"[(1, {}), (2, {}), (3, {})]\n", | |||
"[(1, 2, {}), (2, 1, {}), (3, 1, {})]\n", | |||
"{2: {}}\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"import networkx as nx\n", | |||
"dg = nx.DiGraph()\n", | |||
"dg.add_nodes_from([1, 2, 3])\n", | |||
"dg.add_edges_from([(1, 2), (2, 1), (3, 1)])\n", | |||
"print(dg.nodes(data=True))\n", | |||
"print(dg.edges(data=True))\n", | |||
"print(dg[1])" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 2, | |||
"metadata": {}, | |||
"outputs": [], | |||
@@ -489,7 +514,7 @@ | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.5.2" | |||
"version": "3.6.5" | |||
} | |||
}, | |||
"nbformat": 4, | |||
@@ -0,0 +1,690 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Test of parallel, find the best parallel chunksize and iteration seperation scheme. | |||
Created on Wed Sep 26 12:09:34 2018 | |||
@author: ljia | |||
""" | |||
import sys | |||
import time | |||
from itertools import combinations_with_replacement, product, combinations | |||
from functools import partial | |||
from multiprocessing import Pool | |||
from tqdm import tqdm | |||
import networkx as nx | |||
import numpy as np | |||
import functools | |||
from libs import * | |||
import multiprocessing | |||
from sklearn.metrics.pairwise import rbf_kernel | |||
from matplotlib import pyplot as plt | |||
from sklearn.model_selection import ParameterGrid | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.utils import getSPGraph, direct_product | |||
from pygraph.utils.graphdataset import get_dataset_attributes | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.utils.kernels import deltakernel, kernelproduct | |||
def spkernel(*args, | |||
node_label='atom', | |||
edge_weight=None, | |||
node_kernels=None, | |||
n_jobs=None, | |||
chunksize=1): | |||
"""Calculate shortest-path kernels between graphs. | |||
Parameters | |||
---------- | |||
Gn : List of NetworkX graph | |||
List of graphs between which the kernels are calculated. | |||
/ | |||
G1, G2 : NetworkX graphs | |||
2 graphs between which the kernel is calculated. | |||
edge_weight : string | |||
Edge attribute name corresponding to the edge weight. | |||
node_kernels: dict | |||
A dictionary of kernel functions for nodes, including 3 items: 'symb' for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix' for both labels. The first 2 functions take two node labels as parameters, and the 'mix' function takes 4 parameters, a symbolic and a non-symbolic label for each the two nodes. Each label is in form of 2-D dimension array (n_samples, n_features). Each function returns an number as the kernel value. Ignored when nodes are unlabeled. | |||
Return | |||
------ | |||
Kmatrix : Numpy matrix | |||
Kernel matrix, each element of which is the sp kernel between 2 praphs. | |||
""" | |||
# pre-process | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
weight = None | |||
if edge_weight is None: | |||
pass | |||
else: | |||
try: | |||
some_weight = list( | |||
nx.get_edge_attributes(Gn[0], edge_weight).values())[0] | |||
if isinstance(some_weight, (float, int)): | |||
weight = edge_weight | |||
except: | |||
pass | |||
ds_attrs = get_dataset_attributes( | |||
Gn, | |||
attr_names=['node_labeled', 'node_attr_dim', 'is_directed'], | |||
node_label=node_label) | |||
# remove graphs with no edges, as no sp can be found in their structures, so the kernel between such a graph and itself will be zero. | |||
len_gn = len(Gn) | |||
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||
idx = [G[0] for G in Gn] | |||
Gn = [G[1] for G in Gn] | |||
if len(Gn) != len_gn: | |||
print('\n %d graphs are removed as they don\'t contain edges.\n' % | |||
(len_gn - len(Gn))) | |||
start_time = time.time() | |||
pool = Pool(n_jobs) | |||
# get shortest path graphs of Gn | |||
getsp_partial = partial(wrap_getSPGraph, Gn, weight) | |||
for i, g in tqdm( | |||
pool.imap_unordered(getsp_partial, range(0, len(Gn)), chunksize), | |||
desc='getting sp graphs', | |||
file=sys.stdout): | |||
Gn[i] = g | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
# ---- use pool.imap_unordered to parallel and track progress. ---- | |||
do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) | |||
itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
# if len_itr < 100: | |||
# chunksize, extra = divmod(len_itr, n_jobs * 4) | |||
# if extra: | |||
# chunksize += 1 | |||
# else: | |||
# chunksize = 300 | |||
for i, j, kernel in tqdm( | |||
pool.imap_unordered(do_partial, itr, chunksize), | |||
desc='calculating kernels', | |||
file=sys.stdout): | |||
Kmatrix[i][j] = kernel | |||
Kmatrix[j][i] = kernel | |||
pool.close() | |||
pool.join() | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time, idx | |||
def spkernel_do(Gn, ds_attrs, node_label, node_kernels, ij): | |||
i = ij[0] | |||
j = ij[1] | |||
g1 = Gn[i] | |||
g2 = Gn[j] | |||
Kmatrix = 0 | |||
try: | |||
# compute shortest path matrices first, method borrowed from FCSP. | |||
if ds_attrs['node_labeled']: | |||
# node symb and non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['mix'] | |||
vk_dict = {} # shortest path matrices dict | |||
for n1, n2 in product( | |||
g1.nodes(data=True), g2.nodes(data=True)): | |||
vk_dict[(n1[0], n2[0])] = kn( | |||
n1[1][node_label], n2[1][node_label], | |||
[n1[1]['attributes']], [n2[1]['attributes']]) | |||
# node symb labeled | |||
else: | |||
kn = node_kernels['symb'] | |||
vk_dict = {} # shortest path matrices dict | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label], | |||
n2[1][node_label]) | |||
else: | |||
# node non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['nsymb'] | |||
vk_dict = {} # shortest path matrices dict | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']], | |||
[n2[1]['attributes']]) | |||
# node unlabeled | |||
else: | |||
for e1, e2 in product( | |||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||
if e1[2]['cost'] == e2[2]['cost']: | |||
Kmatrix += 1 | |||
return i, j, Kmatrix | |||
# compute graph kernels | |||
if ds_attrs['is_directed']: | |||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | |||
if e1[2]['cost'] == e2[2]['cost']: | |||
# each edge walk is counted twice, starting from both its extreme nodes. | |||
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], | |||
e2[1])] | |||
kn1 = nk11 * nk22 | |||
Kmatrix += kn1 | |||
else: | |||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | |||
if e1[2]['cost'] == e2[2]['cost']: | |||
# each edge walk is counted twice, starting from both its extreme nodes. | |||
nk11, nk12, nk21, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[( | |||
e1[0], e2[1])], vk_dict[(e1[1], | |||
e2[0])], vk_dict[(e1[1], | |||
e2[1])] | |||
kn1 = nk11 * nk22 | |||
kn2 = nk12 * nk21 | |||
Kmatrix += kn1 + kn2 | |||
except KeyError: # missing labels or attributes | |||
pass | |||
return i, j, Kmatrix | |||
def wrap_getSPGraph(Gn, weight, i): | |||
return i, getSPGraph(Gn[i], edge_weight=weight) | |||
def commonwalkkernel(*args, | |||
node_label='atom', | |||
edge_label='bond_type', | |||
n=None, | |||
weight=1, | |||
compute_method=None, | |||
n_jobs=None, | |||
chunksize=1): | |||
"""Calculate common walk graph kernels between graphs. | |||
""" | |||
compute_method = compute_method.lower() | |||
# arrange all graphs in a list | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
ds_attrs = get_dataset_attributes( | |||
Gn, | |||
attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | |||
node_label=node_label, | |||
edge_label=edge_label) | |||
if not ds_attrs['node_labeled']: | |||
for G in Gn: | |||
nx.set_node_attributes(G, '0', 'atom') | |||
if not ds_attrs['edge_labeled']: | |||
for G in Gn: | |||
nx.set_edge_attributes(G, '0', 'bond_type') | |||
if not ds_attrs['is_directed']: # convert | |||
Gn = [G.to_directed() for G in Gn] | |||
start_time = time.time() | |||
# ---- use pool.imap_unordered to parallel and track progress. ---- | |||
pool = Pool(n_jobs) | |||
itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
# if len_itr < 100: | |||
# chunksize, extra = divmod(len_itr, n_jobs * 4) | |||
# if extra: | |||
# chunksize += 1 | |||
# else: | |||
# chunksize = 100 | |||
# direct product graph method - exponential | |||
if compute_method == 'exp': | |||
do_partial = partial(_commonwalkkernel_exp, Gn, node_label, edge_label, | |||
weight) | |||
# direct product graph method - geometric | |||
elif compute_method == 'geo': | |||
do_partial = partial(_commonwalkkernel_geo, Gn, node_label, edge_label, | |||
weight) | |||
for i, j, kernel in tqdm( | |||
pool.imap_unordered(do_partial, itr, chunksize), | |||
desc='calculating kernels', | |||
file=sys.stdout): | |||
Kmatrix[i][j] = kernel | |||
Kmatrix[j][i] = kernel | |||
pool.close() | |||
pool.join() | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time | |||
def _commonwalkkernel_exp(Gn, node_label, edge_label, beta, ij): | |||
"""Calculate walk graph kernels up to n between 2 graphs using exponential | |||
series. | |||
""" | |||
i = ij[0] | |||
j = ij[1] | |||
g1 = Gn[i] | |||
g2 = Gn[j] | |||
# get tensor product / direct product | |||
gp = direct_product(g1, g2, node_label, edge_label) | |||
A = nx.adjacency_matrix(gp).todense() | |||
ew, ev = np.linalg.eig(A) | |||
D = np.zeros((len(ew), len(ew))) | |||
for i in range(len(ew)): | |||
D[i][i] = np.exp(beta * ew[i]) | |||
exp_D = ev * D * ev.T | |||
return i, j, exp_D.sum() | |||
def _commonwalkkernel_geo(Gn, node_label, edge_label, gamma, ij): | |||
"""Calculate common walk graph kernels up to n between 2 graphs using | |||
geometric series. | |||
""" | |||
i = ij[0] | |||
j = ij[1] | |||
g1 = Gn[i] | |||
g2 = Gn[j] | |||
# get tensor product / direct product | |||
gp = direct_product(g1, g2, node_label, edge_label) | |||
A = nx.adjacency_matrix(gp).todense() | |||
mat = np.identity(len(A)) - gamma * A | |||
try: | |||
return i, j, mat.I.sum() | |||
except np.linalg.LinAlgError: | |||
return i, j, np.nan | |||
def compute_gram_matrices(datafile, | |||
estimator, | |||
param_grid_precomputed, | |||
datafile_y=None, | |||
extra_params=None, | |||
ds_name='ds-unknown', | |||
n_jobs=1, | |||
chunksize=1): | |||
""" | |||
Parameters | |||
---------- | |||
datafile : string | |||
Path of dataset file. | |||
estimator : function | |||
kernel function used to estimate. This function needs to return a gram matrix. | |||
param_grid_precomputed : dictionary | |||
Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted. | |||
datafile_y : string | |||
Path of file storing y data. This parameter is optional depending on the given dataset file. | |||
""" | |||
tqdm.monitor_interval = 0 | |||
# Load the dataset | |||
dataset, y = loadDataset( | |||
datafile, filename_y=datafile_y, extra_params=extra_params) | |||
# Grid of parameters with a discrete number of values for each. | |||
param_list_precomputed = list(ParameterGrid(param_grid_precomputed)) | |||
gram_matrix_time = [ | |||
] # a list to store time to calculate gram matrices | |||
# calculate all gram matrices | |||
for idx, params_out in enumerate(param_list_precomputed): | |||
params_out['n_jobs'] = n_jobs | |||
params_out['chunksize'] = chunksize | |||
rtn_data = estimator(dataset, **params_out) | |||
Kmatrix = rtn_data[0] | |||
current_run_time = rtn_data[1] | |||
# for some kernels, some graphs in datasets may not meet the | |||
# kernels' requirements for graph structure. These graphs are trimmed. | |||
if len(rtn_data) == 3: | |||
idx_trim = rtn_data[2] # the index of trimmed graph list | |||
y = [y[idx] for idx in idx_trim] # trim y accordingly | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
# remove graphs whose kernels with themselves are zeros | |||
nb_g_ignore = 0 | |||
for idx, diag in enumerate(Kmatrix_diag): | |||
if diag == 0: | |||
Kmatrix = np.delete(Kmatrix, (idx - nb_g_ignore), axis=0) | |||
Kmatrix = np.delete(Kmatrix, (idx - nb_g_ignore), axis=1) | |||
nb_g_ignore += 1 | |||
# normalization | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
gram_matrix_time.append(current_run_time) | |||
average_gram_matrix_time = np.mean(gram_matrix_time) | |||
return average_gram_matrix_time | |||
def structuralspkernel(*args, | |||
node_label='atom', | |||
edge_weight=None, | |||
edge_label='bond_type', | |||
node_kernels=None, | |||
edge_kernels=None, | |||
n_jobs=None, | |||
chunksize=1): | |||
"""Calculate mean average structural shortest path kernels between graphs. | |||
""" | |||
# pre-process | |||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | |||
weight = None | |||
if edge_weight is None: | |||
print('\n None edge weight specified. Set all weight to 1.\n') | |||
else: | |||
try: | |||
some_weight = list( | |||
nx.get_edge_attributes(Gn[0], edge_weight).values())[0] | |||
if isinstance(some_weight, (float, int)): | |||
weight = edge_weight | |||
else: | |||
print( | |||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||
% edge_weight) | |||
except: | |||
print( | |||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||
% edge_weight) | |||
ds_attrs = get_dataset_attributes( | |||
Gn, | |||
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | |||
'edge_attr_dim', 'is_directed'], | |||
node_label=node_label, edge_label=edge_label) | |||
start_time = time.time() | |||
# get shortest paths of each graph in Gn | |||
splist = [[] for _ in range(len(Gn))] | |||
pool = Pool(n_jobs) | |||
# get shortest path graphs of Gn | |||
getsp_partial = partial(wrap_getSP, Gn, weight, ds_attrs['is_directed']) | |||
# if len(Gn) < 100: | |||
# # use default chunksize as pool.map when iterable is less than 100 | |||
# chunksize, extra = divmod(len(Gn), n_jobs * 4) | |||
# if extra: | |||
# chunksize += 1 | |||
# else: | |||
# chunksize = 100 | |||
# chunksize = 300 # int(len(list(itr)) / n_jobs) | |||
for i, sp in tqdm( | |||
pool.imap_unordered(getsp_partial, range(0, len(Gn)), chunksize), | |||
desc='getting shortest paths', | |||
file=sys.stdout): | |||
splist[i] = sp | |||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
# ---- use pool.imap_unordered to parallel and track progress. ---- | |||
do_partial = partial(structuralspkernel_do, Gn, splist, ds_attrs, | |||
node_label, edge_label, node_kernels, edge_kernels) | |||
itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
# if len_itr < 100: | |||
# chunksize, extra = divmod(len_itr, n_jobs * 4) | |||
# if extra: | |||
# chunksize += 1 | |||
# else: | |||
# chunksize = 100 | |||
for i, j, kernel in tqdm( | |||
pool.imap_unordered(do_partial, itr, chunksize), | |||
desc='calculating kernels', | |||
file=sys.stdout): | |||
Kmatrix[i][j] = kernel | |||
Kmatrix[j][i] = kernel | |||
pool.close() | |||
pool.join() | |||
run_time = time.time() - start_time | |||
print( | |||
"\n --- shortest path kernel matrix of size %d built in %s seconds ---" | |||
% (len(Gn), run_time)) | |||
return Kmatrix, run_time | |||
def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label, | |||
node_kernels, edge_kernels, ij): | |||
iglobal = ij[0] | |||
jglobal = ij[1] | |||
g1 = Gn[iglobal] | |||
g2 = Gn[jglobal] | |||
spl1 = splist[iglobal] | |||
spl2 = splist[jglobal] | |||
kernel = 0 | |||
try: | |||
# First, compute shortest path matrices, method borrowed from FCSP. | |||
if ds_attrs['node_labeled']: | |||
# node symb and non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['mix'] | |||
vk_dict = {} # shortest path matrices dict | |||
for n1, n2 in product( | |||
g1.nodes(data=True), g2.nodes(data=True)): | |||
vk_dict[(n1[0], n2[0])] = kn( | |||
n1[1][node_label], n2[1][node_label], | |||
[n1[1]['attributes']], [n2[1]['attributes']]) | |||
# node symb labeled | |||
else: | |||
kn = node_kernels['symb'] | |||
vk_dict = {} # shortest path matrices dict | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label], | |||
n2[1][node_label]) | |||
else: | |||
# node non-synb labeled | |||
if ds_attrs['node_attr_dim'] > 0: | |||
kn = node_kernels['nsymb'] | |||
vk_dict = {} # shortest path matrices dict | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']], | |||
[n2[1]['attributes']]) | |||
# node unlabeled | |||
else: | |||
vk_dict = {} | |||
# Then, compute kernels between all pairs of edges, which idea is an | |||
# extension of FCSP. It suits sparse graphs, which is the most case we | |||
# went though. For dense graphs, it would be slow. | |||
if ds_attrs['edge_labeled']: | |||
# edge symb and non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['mix'] | |||
ek_dict = {} # dict of edge kernels | |||
for e1, e2 in product( | |||
g1.edges(data=True), g2.edges(data=True)): | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke( | |||
e1[2][edge_label], e2[2][edge_label], | |||
[e1[2]['attributes']], [e2[2]['attributes']]) | |||
# edge symb labeled | |||
else: | |||
ke = edge_kernels['symb'] | |||
ek_dict = {} | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke( | |||
e1[2][edge_label], e2[2][edge_label]) | |||
else: | |||
# edge non-synb labeled | |||
if ds_attrs['edge_attr_dim'] > 0: | |||
ke = edge_kernels['nsymb'] | |||
ek_dict = {} | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = kn( | |||
[e1[2]['attributes']], [e2[2]['attributes']]) | |||
# edge unlabeled | |||
else: | |||
ek_dict = {} | |||
# compute graph kernels | |||
if vk_dict: | |||
if ek_dict: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
kpath = vk_dict[(p1[0], p2[0])] | |||
if kpath: | |||
for idx in range(1, len(p1)): | |||
kpath *= vk_dict[(p1[idx], p2[idx])] * \ | |||
ek_dict[((p1[idx-1], p1[idx]), | |||
(p2[idx-1], p2[idx]))] | |||
if not kpath: | |||
break | |||
kernel += kpath # add up kernels of all paths | |||
else: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
kpath = vk_dict[(p1[0], p2[0])] | |||
if kpath: | |||
for idx in range(1, len(p1)): | |||
kpath *= vk_dict[(p1[idx], p2[idx])] | |||
if not kpath: | |||
break | |||
kernel += kpath # add up kernels of all paths | |||
else: | |||
if ek_dict: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
if len(p1) == 0: | |||
kernel += 1 | |||
else: | |||
kpath = 1 | |||
for idx in range(0, len(p1) - 1): | |||
kpath *= ek_dict[((p1[idx], p1[idx+1]), | |||
(p2[idx], p2[idx+1]))] | |||
if not kpath: | |||
break | |||
kernel += kpath # add up kernels of all paths | |||
else: | |||
for p1, p2 in product(spl1, spl2): | |||
if len(p1) == len(p2): | |||
kernel += 1 | |||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||
except KeyError: # missing labels or attributes | |||
pass | |||
return iglobal, jglobal, kernel | |||
def get_shortest_paths(G, weight, directed): | |||
"""Get all shortest paths of a graph. | |||
""" | |||
sp = [] | |||
for n1, n2 in combinations(G.nodes(), 2): | |||
try: | |||
sptemp = nx.shortest_path(G, n1, n2, weight=weight) | |||
sp.append(sptemp) | |||
# each edge walk is counted twice, starting from both its extreme nodes. | |||
if not directed: | |||
sp.append(sptemp[::-1]) | |||
except nx.NetworkXNoPath: # nodes not connected | |||
# sp.append([]) | |||
pass | |||
# add single nodes as length 0 paths. | |||
sp += [[n] for n in G.nodes()] | |||
return sp | |||
def wrap_getSP(Gn, weight, directed, i): | |||
return i, get_shortest_paths(Gn[i], weight, directed) | |||
dslist = [ | |||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||
'task': 'regression'}, # node symb | |||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
# node symb/nsymb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# node/edge symb | |||
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||
] | |||
fig, ax = plt.subplots() | |||
ax.set_xscale('log', nonposx='clip') | |||
ax.set_yscale('log', nonposy='clip') | |||
ax.set_xlabel('parallel chunksize') | |||
ax.set_ylabel('runtime($s$)') | |||
ax.set_title('Runtime of the sp kernel on all datasets V.S. parallel chunksize') | |||
estimator = structuralspkernel | |||
if estimator.__name__ == 'spkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
param_grid_precomputed = {'node_kernels': [ | |||
{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]} | |||
elif estimator.__name__ == 'commonwalkkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
param_grid_precomputed = {'compute_method': ['geo'], | |||
'weight': [1]} | |||
elif estimator.__name__ == 'structuralspkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, rbf_kernel) | |||
param_grid_precomputed = {'node_kernels': | |||
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}], | |||
'edge_kernels': | |||
[{'symb': deltakernel, 'nsymb': rbf_kernel, 'mix': mixkernel}]} | |||
#list(range(10, 100, 20)) + | |||
chunklist = list(range(10, 100, 20)) + list(range(100, 1000, 200)) + \ | |||
list(range(1000, 10000, 2000)) + list(range(10000, 100000, 20000)) | |||
# chunklist = list(range(300, 1000, 200)) + list(range(1000, 10000, 2000)) + list(range(10000, 100000, 20000)) | |||
gmtmat = np.zeros((len(dslist), len(chunklist))) | |||
for idx1, ds in enumerate(dslist): | |||
print() | |||
print(ds['name']) | |||
for idx2, cs in enumerate(chunklist): | |||
print(ds['name'], idx2, cs) | |||
gmtmat[idx1][idx2] = compute_gram_matrices( | |||
ds['dataset'], | |||
estimator, | |||
param_grid_precomputed, | |||
datafile_y=(ds['dataset_y'] if 'dataset_y' in ds else None), | |||
extra_params=(ds['extra_params'] | |||
if 'extra_params' in ds else None), | |||
ds_name=ds['name'], | |||
n_jobs=multiprocessing.cpu_count(), | |||
chunksize=cs) | |||
print() | |||
print(gmtmat[idx1, :]) | |||
np.save('test_parallel/' + estimator.__name__ + '.' + ds['name'], | |||
gmtmat[idx1, :]) | |||
p = ax.plot(chunklist, gmtmat[idx1, :], '.-', label=ds['name']) | |||
ax.legend(loc='upper center') | |||
plt.savefig('test_parallel/' + estimator.__name__ + str(idx1) + '.eps', | |||
format='eps', dpi=300) | |||
# plt.show() |
@@ -0,0 +1,15 @@ | |||
0 100 12315.039321660995 | |||
1 200 12115.199783325195 | |||
2 300 10830.247281551361 | |||
3 500 10983.445399045944 | |||
4 700 10847.954899311066 | |||
5 900 10847.967393398285 | |||
6 1000 10858.62141251564 | |||
7 3000 11041.101693153381 | |||
8 5000 11311.387048959732 | |||
9 7000 11937.84876036644 | |||
10 9000 11969.92341041565 | |||
11 10000 12337.565557003021 | |||
12 30000 13055.397030115128 | |||
13 50000 14964.71178483963 | |||
14 70000 20204.410992860794 |
@@ -0,0 +1,14 @@ | |||
0 10 1045.7502884864807 | |||
1 30 1042.6204540729523 | |||
2 50 1058.7516617774963 | |||
3 70 10983.445399045944 | |||
4 90 | |||
5 100 1045.951178073883 | |||
6 300 1046.520814895629 | |||
7 500 1080.4295434951782 | |||
8 700 1062.4622604846954 | |||
9 900 1105.4361708164215 | |||
10 1000 1090.1234941482544 | |||
11 3000 1175.5646018981934 | |||
12 5000 993.7158119678497 | |||
13 7000 |
@@ -0,0 +1,14 @@ | |||
0 100 1044301 5:12 | |||
1 30 | |||
2 50 4889.178356409073 | |||
3 70 5086.932644605637 | |||
4 90 5096.774455308914 | |||
5 100 5189.769321680069 | |||
6 300 5199.769321680069 | |||
7 500 | |||
8 700 5206.7741804122925 | |||
9 900 5203.689619779587 | |||
10 1000 | |||
11 3000 | |||
12 5000 | |||
@@ -0,0 +1,14 @@ | |||
0 10 5143.154480934143 | |||
1 30 | |||
2 50 4889.178356409073 | |||
3 70 5086.932644605637 | |||
4 90 5096.774455308914 | |||
5 100 5189.769321680069 | |||
6 300 5199.769321680069 | |||
7 500 | |||
8 700 5206.7741804122925 | |||
9 900 5203.689619779587 | |||
10 1000 | |||
11 3000 | |||
12 5000 | |||