|
@@ -0,0 +1,73 @@ |
|
|
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
"""compute_distance_in_kernel_space.ipynb |
|
|
|
|
|
|
|
|
|
|
|
Automatically generated by Colaboratory. |
|
|
|
|
|
|
|
|
|
|
|
Original file is located at |
|
|
|
|
|
https://colab.research.google.com/drive/17tZP6IrineQmzo9sRtfZOnHpHx6HnlMA |
|
|
|
|
|
|
|
|
|
|
|
**This script demonstrates how to compute distance in kernel space between the image of a graph and the mean of images of a group of graphs.** |
|
|
|
|
|
--- |
|
|
|
|
|
|
|
|
|
|
|
**0. Install `graphkit-learn`.** |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
"""**1. Get dataset.**""" |
|
|
|
|
|
|
|
|
|
|
|
from gklearn.utils import Dataset |
|
|
|
|
|
|
|
|
|
|
|
# Predefined dataset name, use dataset "MUTAG". |
|
|
|
|
|
ds_name = 'MUTAG' |
|
|
|
|
|
|
|
|
|
|
|
# Initialize a Dataset. |
|
|
|
|
|
dataset = Dataset() |
|
|
|
|
|
# Load predefined dataset "MUTAG". |
|
|
|
|
|
dataset.load_predefined_dataset(ds_name) |
|
|
|
|
|
len(dataset.graphs) |
|
|
|
|
|
|
|
|
|
|
|
"""**2. Compute graph kernel.**""" |
|
|
|
|
|
|
|
|
|
|
|
from gklearn.kernels import PathUpToH |
|
|
|
|
|
import multiprocessing |
|
|
|
|
|
|
|
|
|
|
|
# Initailize parameters for graph kernel computation. |
|
|
|
|
|
kernel_options = {'depth': 3, |
|
|
|
|
|
'k_func': 'MinMax', |
|
|
|
|
|
'compute_method': 'trie' |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
# Initialize graph kernel. |
|
|
|
|
|
graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names. |
|
|
|
|
|
edge_labels=dataset.edge_labels, # list of edge label names. |
|
|
|
|
|
ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation. |
|
|
|
|
|
**kernel_options, # options for computation. |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
# Compute Gram matrix. |
|
|
|
|
|
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, |
|
|
|
|
|
parallel='imap_unordered', # or None. |
|
|
|
|
|
n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. |
|
|
|
|
|
normalize=True, # whether to return normalized Gram matrix. |
|
|
|
|
|
verbose=2 # whether to print out results. |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
"""**3. Compute distance in kernel space.** |
|
|
|
|
|
|
|
|
|
|
|
Given a dataset $\mathcal{G}_N$, compute the distance in kernel space between the image of $G_1 \in \mathcal{G}_N$ and the mean of images of $\mathcal{G}_k \subset \mathcal{G}_N$. |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
from gklearn.preimage.utils import compute_k_dis |
|
|
|
|
|
|
|
|
|
|
|
# Index of $G_1$. |
|
|
|
|
|
idx_1 = 10 |
|
|
|
|
|
# Indices of graphs in $\mathcal{G}_k$. |
|
|
|
|
|
idx_graphs = range(0, 10) |
|
|
|
|
|
|
|
|
|
|
|
# Compute the distance in kernel space. |
|
|
|
|
|
dis_k = compute_k_dis(idx_1, |
|
|
|
|
|
idx_graphs, |
|
|
|
|
|
[1 / len(idx_graphs)] * len(idx_graphs), # weights for images of graphs in $\mathcal{G}_k$; all equal when computing the mean. |
|
|
|
|
|
gram_matrix, # gram matrix of al graphs. |
|
|
|
|
|
withterm3=False |
|
|
|
|
|
) |
|
|
|
|
|
print(dis_k) |