|
|
@@ -0,0 +1,134 @@ |
|
|
|
#!/usr/bin/env python3 |
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
""" |
|
|
|
compute gm with load_data.py and test them. |
|
|
|
Created on Wed Sep 19 16:12:13 2018 |
|
|
|
|
|
|
|
@author: ljia |
|
|
|
""" |
|
|
|
|
|
|
|
"""Shortest-Path graph kernel. |
|
|
|
Python implementation based on: "Shortest-path kernels on graphs", by |
|
|
|
Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE |
|
|
|
International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005 |
|
|
|
doi: 10.1109/ICDM.2005.132 |
|
|
|
Author : Sandro Vega-Pons, Emanuele Olivetti |
|
|
|
""" |
|
|
|
|
|
|
|
import sys |
|
|
|
sys.path.insert(0, "../../") |
|
|
|
import numpy as np |
|
|
|
import networkx as nx |
|
|
|
from gklearn.utils.graphfiles import loadDataset |
|
|
|
import matplotlib.pyplot as plt |
|
|
|
from numpy.linalg import eig |
|
|
|
|
|
|
|
|
|
|
|
class GK_SP: |
|
|
|
""" |
|
|
|
Shorthest path graph kernel. |
|
|
|
""" |
|
|
|
|
|
|
|
def compare(self, g_1, g_2, verbose=False): |
|
|
|
"""Compute the kernel value (similarity) between two graphs. |
|
|
|
Parameters |
|
|
|
---------- |
|
|
|
g1 : networkx.Graph |
|
|
|
First graph. |
|
|
|
g2 : networkx.Graph |
|
|
|
Second graph. |
|
|
|
Returns |
|
|
|
------- |
|
|
|
k : The similarity value between g1 and g2. |
|
|
|
""" |
|
|
|
# Diagonal superior matrix of the floyd warshall shortest |
|
|
|
# paths: |
|
|
|
fwm1 = np.array(nx.floyd_warshall_numpy(g_1)) |
|
|
|
fwm1 = np.where(fwm1 == np.inf, 0, fwm1) |
|
|
|
fwm1 = np.where(fwm1 == np.nan, 0, fwm1) |
|
|
|
fwm1 = np.triu(fwm1, k=1) |
|
|
|
bc1 = np.bincount(fwm1.reshape(-1).astype(int)) |
|
|
|
|
|
|
|
fwm2 = np.array(nx.floyd_warshall_numpy(g_2)) |
|
|
|
fwm2 = np.where(fwm2 == np.inf, 0, fwm2) |
|
|
|
fwm2 = np.where(fwm2 == np.nan, 0, fwm2) |
|
|
|
fwm2 = np.triu(fwm2, k=1) |
|
|
|
bc2 = np.bincount(fwm2.reshape(-1).astype(int)) |
|
|
|
|
|
|
|
# Copy into arrays with the same length the non-zero shortests |
|
|
|
# paths: |
|
|
|
v1 = np.zeros(max(len(bc1), len(bc2)) - 1) |
|
|
|
v1[range(0, len(bc1)-1)] = bc1[1:] |
|
|
|
|
|
|
|
v2 = np.zeros(max(len(bc1), len(bc2)) - 1) |
|
|
|
v2[range(0, len(bc2)-1)] = bc2[1:] |
|
|
|
|
|
|
|
return np.sum(v1 * v2) |
|
|
|
|
|
|
|
def compare_normalized(self, g_1, g_2, verbose=False): |
|
|
|
"""Compute the normalized kernel value between two graphs. |
|
|
|
A normalized version of the kernel is given by the equation: |
|
|
|
k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2)) |
|
|
|
Parameters |
|
|
|
---------- |
|
|
|
g1 : networkx.Graph |
|
|
|
First graph. |
|
|
|
g2 : networkx.Graph |
|
|
|
Second graph. |
|
|
|
Returns |
|
|
|
------- |
|
|
|
k : The similarity value between g1 and g2. |
|
|
|
""" |
|
|
|
return self.compare(g_1, g_2) / (np.sqrt(self.compare(g_1, g_1) * |
|
|
|
self.compare(g_2, g_2))) |
|
|
|
|
|
|
|
def compare_list(self, graph_list, verbose=False): |
|
|
|
"""Compute the all-pairs kernel values for a list of graphs. |
|
|
|
This function can be used to directly compute the kernel |
|
|
|
matrix for a list of graphs. The direct computation of the |
|
|
|
kernel matrix is faster than the computation of all individual |
|
|
|
pairwise kernel values. |
|
|
|
Parameters |
|
|
|
---------- |
|
|
|
graph_list: list |
|
|
|
A list of graphs (list of networkx graphs) |
|
|
|
Return |
|
|
|
------ |
|
|
|
K: numpy.array, shape = (len(graph_list), len(graph_list)) |
|
|
|
The similarity matrix of all graphs in graph_list. |
|
|
|
""" |
|
|
|
n = len(graph_list) |
|
|
|
k = np.zeros((n, n)) |
|
|
|
for i in range(n): |
|
|
|
for j in range(i, n): |
|
|
|
k[i, j] = self.compare(graph_list[i], graph_list[j]) |
|
|
|
k[j, i] = k[i, j] |
|
|
|
|
|
|
|
k_norm = np.zeros(k.shape) |
|
|
|
for i in range(k.shape[0]): |
|
|
|
for j in range(k.shape[1]): |
|
|
|
k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) |
|
|
|
|
|
|
|
return k_norm |
|
|
|
|
|
|
|
|
|
|
|
ds_name = 'PAH' |
|
|
|
datafile = '../../datasets/PAH/dataset.ds' |
|
|
|
dataset, y = loadDataset(datafile, filename_y=None, extra_params=None) |
|
|
|
gk_sp = GK_SP() |
|
|
|
x = gk_sp.compare_list(dataset) |
|
|
|
np.savez('../check_gm/' + ds_name + '.gm.jstsp', gms=x) |
|
|
|
|
|
|
|
plt.imshow(x) |
|
|
|
plt.colorbar() |
|
|
|
plt.savefig('../check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300) |
|
|
|
# print(np.transpose(x)) |
|
|
|
print('if symmetric: ', np.array_equal(x, np.transpose(x))) |
|
|
|
|
|
|
|
print('diag: ', np.diag(x)) |
|
|
|
print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1)) |
|
|
|
print('min, max diag: ', min(np.diag(x)), max(np.diag(x))) |
|
|
|
print('mean x: ', np.mean(np.mean(x))) |
|
|
|
|
|
|
|
[lamnda, v] = eig(x) |
|
|
|
print('min, max lambda: ', min(lamnda), max(lamnda)) |