|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- # -*- coding: utf-8 -*-
- """compute_distance_in_kernel_space.ipynb
-
- Automatically generated by Colaboratory.
-
- Original file is located at
- https://colab.research.google.com/drive/17tZP6IrineQmzo9sRtfZOnHpHx6HnlMA
-
- **This script demonstrates how to compute distance in kernel space between the image of a graph and the mean of images of a group of graphs.**
- ---
-
- **0. Install `graphkit-learn`.**
- """
-
- """**1. Get dataset.**"""
-
- from gklearn.utils import Dataset
-
- # Predefined dataset name, use dataset "MUTAG".
- ds_name = 'MUTAG'
-
- # Initialize a Dataset.
- dataset = Dataset()
- # Load predefined dataset "MUTAG".
- dataset.load_predefined_dataset(ds_name)
- len(dataset.graphs)
-
- """**2. Compute graph kernel.**"""
-
- from gklearn.kernels import PathUpToH
- import multiprocessing
-
- # Initailize parameters for graph kernel computation.
- kernel_options = {'depth': 3,
- 'k_func': 'MinMax',
- 'compute_method': 'trie'
- }
-
- # Initialize graph kernel.
- graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names.
- edge_labels=dataset.edge_labels, # list of edge label names.
- ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation.
- **kernel_options, # options for computation.
- )
-
- # Compute Gram matrix.
- gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
- parallel='imap_unordered', # or None.
- n_jobs=multiprocessing.cpu_count(), # number of parallel jobs.
- normalize=True, # whether to return normalized Gram matrix.
- verbose=2 # whether to print out results.
- )
-
- """**3. Compute distance in kernel space.**
-
- Given a dataset $\mathcal{G}_N$, compute the distance in kernel space between the image of $G_1 \in \mathcal{G}_N$ and the mean of images of $\mathcal{G}_k \subset \mathcal{G}_N$.
- """
-
- from gklearn.preimage.utils import compute_k_dis
-
- # Index of $G_1$.
- idx_1 = 10
- # Indices of graphs in $\mathcal{G}_k$.
- idx_graphs = range(0, 10)
-
- # Compute the distance in kernel space.
- dis_k = compute_k_dis(idx_1,
- idx_graphs,
- [1 / len(idx_graphs)] * len(idx_graphs), # weights for images of graphs in $\mathcal{G}_k$; all equal when computing the mean.
- gram_matrix, # gram matrix of al graphs.
- withterm3=False
- )
- print(dis_k)
|