From a26491997d2241927500195c5f03c23337a53948 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Mon, 28 Sep 2020 19:35:15 +0200
Subject: [PATCH] Update examples.

---
 gklearn/examples/ged/__init__.py                   |   0
 .../examples/ged/compute_graph_edit_distance.py    |  58 +++++++++++
 gklearn/examples/kernels/__init__.py               |   0
 .../kernels/compute_distance_in_kernel_space.py    |  73 +++++++++++++
 gklearn/examples/kernels/compute_graph_kernel.py   |  87 ++++++++++++++++
 .../examples/kernels/compute_graph_kernel_old.py   |  31 ++++++
 gklearn/examples/kernels/model_selection_old.py    |  38 +++++++
 gklearn/examples/preimage/__init__.py              |   0
 .../examples/preimage/median_preimege_generator.py | 115 +++++++++++++++++++++
 .../preimage/median_preimege_generator_cml.py      | 113 ++++++++++++++++++++
 .../preimage/median_preimege_generator_py.py       | 114 ++++++++++++++++++++
 11 files changed, 629 insertions(+)
 create mode 100644 gklearn/examples/ged/__init__.py
 create mode 100644 gklearn/examples/ged/compute_graph_edit_distance.py
 create mode 100644 gklearn/examples/kernels/__init__.py
 create mode 100644 gklearn/examples/kernels/compute_distance_in_kernel_space.py
 create mode 100644 gklearn/examples/kernels/compute_graph_kernel.py
 create mode 100644 gklearn/examples/kernels/compute_graph_kernel_old.py
 create mode 100644 gklearn/examples/kernels/model_selection_old.py
 create mode 100644 gklearn/examples/preimage/__init__.py
 create mode 100644 gklearn/examples/preimage/median_preimege_generator.py
 create mode 100644 gklearn/examples/preimage/median_preimege_generator_cml.py
 create mode 100644 gklearn/examples/preimage/median_preimege_generator_py.py

diff --git a/gklearn/examples/ged/__init__.py b/gklearn/examples/ged/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gklearn/examples/ged/compute_graph_edit_distance.py b/gklearn/examples/ged/compute_graph_edit_distance.py
new file mode 100644
index 0000000..027d1e4
--- /dev/null
+++ b/gklearn/examples/ged/compute_graph_edit_distance.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+"""compute_graph_edit_distance.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1Wfgn7WVuyOQQgwOvdUQBz0BzEVdp0YM3
+
+**This script demonstrates how to compute a graph edit distance.**
+---
+
+**0.   Install `graphkit-learn`.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset
+
+# Predefined dataset name, use dataset "MUTAG".
+ds_name = 'MUTAG'
+
+# Initialize a Dataset.
+dataset = Dataset()
+# Load predefined dataset "MUTAG".
+dataset.load_predefined_dataset(ds_name)
+graph1 = dataset.graphs[0]
+graph2 = dataset.graphs[1]
+print(graph1, graph2)
+
+"""**2.  Compute graph edit distance.**"""
+
+from gklearn.ged.env import GEDEnv
+
+
+ged_env = GEDEnv() # initailize GED environment.
+ged_env.set_edit_cost('CONSTANT', # GED cost type.
+                      edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
+					  )  
+ged_env.add_nx_graph(graph1, '') # add graph1
+ged_env.add_nx_graph(graph2, '') # add graph2
+listID = ged_env.get_all_graph_ids() # get list IDs of graphs
+ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
+options = {'initialization_method': 'RANDOM', # or 'NODE', etc.
+           'threads': 1 # parallel threads.
+		   }
+ged_env.set_method('BIPARTITE', # GED method.
+                   options # options for GED method.
+				   )
+ged_env.init_method() # initialize GED method.
+
+ged_env.run_method(listID[0], listID[1]) # run.
+
+pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
+pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
+dis = ged_env.get_upper_bound(listID[0], listID[1])	# GED bewteen two graphs.
+print(pi_forward)
+print(pi_backward)
+print(dis)
\ No newline at end of file
diff --git a/gklearn/examples/kernels/__init__.py b/gklearn/examples/kernels/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gklearn/examples/kernels/compute_distance_in_kernel_space.py b/gklearn/examples/kernels/compute_distance_in_kernel_space.py
new file mode 100644
index 0000000..76c7494
--- /dev/null
+++ b/gklearn/examples/kernels/compute_distance_in_kernel_space.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+"""compute_distance_in_kernel_space.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/17tZP6IrineQmzo9sRtfZOnHpHx6HnlMA
+
+**This script demonstrates how to compute distance in kernel space between the image of a graph and the mean of images of a group of graphs.**
+---
+
+**0.   Install `graphkit-learn`.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset
+
+# Predefined dataset name, use dataset "MUTAG".
+ds_name = 'MUTAG'
+
+# Initialize a Dataset.
+dataset = Dataset()
+# Load predefined dataset "MUTAG".
+dataset.load_predefined_dataset(ds_name)
+len(dataset.graphs)
+
+"""**2.  Compute graph kernel.**"""
+
+from gklearn.kernels import PathUpToH
+import multiprocessing
+
+# Initailize parameters for graph kernel computation.
+kernel_options = {'depth': 3,
+				  'k_func': 'MinMax',
+				  'compute_method': 'trie'
+				  }
+
+# Initialize graph kernel.
+graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names.
+						 edge_labels=dataset.edge_labels, # list of edge label names.
+						 ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation.
+						 **kernel_options, # options for computation.
+						 )
+
+# Compute Gram matrix.
+gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
+											 parallel='imap_unordered', # or None.
+											 n_jobs=multiprocessing.cpu_count(), # number of parallel jobs.
+											 normalize=True, # whether to return normalized Gram matrix.
+											 verbose=2 # whether to print out results.
+                                            )
+
+"""**3.   Compute distance in kernel space.**
+
+Given a dataset $\mathcal{G}_N$, compute the distance in kernel space between the image of $G_1 \in \mathcal{G}_N$ and the mean of images of $\mathcal{G}_k \subset \mathcal{G}_N$.
+"""
+
+from gklearn.preimage.utils import compute_k_dis
+
+# Index of $G_1$.
+idx_1 = 10
+# Indices of graphs in $\mathcal{G}_k$.
+idx_graphs = range(0, 10)
+
+# Compute the distance in kernel space.
+dis_k = compute_k_dis(idx_1,
+                      idx_graphs,
+					  [1 / len(idx_graphs)] * len(idx_graphs), # weights for images of graphs in $\mathcal{G}_k$; all equal when computing the mean.
+					  gram_matrix, # gram matrix of al graphs.
+					  withterm3=False
+					  )
+print(dis_k)
\ No newline at end of file
diff --git a/gklearn/examples/kernels/compute_graph_kernel.py b/gklearn/examples/kernels/compute_graph_kernel.py
new file mode 100644
index 0000000..2fe8d52
--- /dev/null
+++ b/gklearn/examples/kernels/compute_graph_kernel.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+"""compute_graph_kernel.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/17Q2QCl9CAtDweGF8LiWnWoN2laeJqT0u
+
+**This script demonstrates how to compute a graph kernel.**
+---
+
+**0.   Install `graphkit-learn`.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset
+
+# Predefined dataset name, use dataset "MUTAG".
+ds_name = 'MUTAG'
+
+# Initialize a Dataset.
+dataset = Dataset()
+# Load predefined dataset "MUTAG".
+dataset.load_predefined_dataset(ds_name)
+len(dataset.graphs)
+
+"""**2.  Compute graph kernel.**"""
+
+from gklearn.kernels import PathUpToH
+
+# Initailize parameters for graph kernel computation.
+kernel_options = {'depth': 3,
+			      		  'k_func': 'MinMax',
+					        'compute_method': 'trie'
+								 }
+
+# Initialize graph kernel.
+graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names.
+						 edge_labels=dataset.edge_labels, # list of edge label names.
+						 ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation.
+						 **kernel_options, # options for computation.
+						 )
+
+print('done.')
+
+import multiprocessing
+import matplotlib.pyplot as plt
+
+# Compute Gram matrix.
+gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
+											 parallel='imap_unordered', # or None.
+											 n_jobs=multiprocessing.cpu_count(), # number of parallel jobs.
+											 normalize=True, # whether to return normalized Gram matrix.
+											 verbose=2 # whether to print out results.
+											 )
+# Print results.
+print()
+print(gram_matrix)
+print(run_time)
+plt.imshow(gram_matrix)
+
+import multiprocessing
+
+# Compute grah kernels between a graph and a list of graphs.
+kernel_list, run_time = graph_kernel.compute(dataset.graphs, # a list of graphs. 
+                                             dataset.graphs[0], # a single graph.
+											 parallel='imap_unordered', # or None.
+											 n_jobs=multiprocessing.cpu_count(), # number of parallel jobs.
+											 verbose=2 # whether to print out results.
+                                            )
+# Print results.
+print()
+print(kernel_list)
+print(run_time)
+
+import multiprocessing
+
+# Compute a grah kernel between two graphs.
+kernel, run_time = graph_kernel.compute(dataset.graphs[0], # a single graph. 
+                                        dataset.graphs[1], # another single graph.
+										verbose=2 # whether to print out results.
+                                       )
+# Print results.
+print()
+print(kernel)
+print(run_time)
\ No newline at end of file
diff --git a/gklearn/examples/kernels/compute_graph_kernel_old.py b/gklearn/examples/kernels/compute_graph_kernel_old.py
new file mode 100644
index 0000000..7149c68
--- /dev/null
+++ b/gklearn/examples/kernels/compute_graph_kernel_old.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+"""compute_graph_kernel_v0.1.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/10jUz7-ahPiE_T1qvFrh2NvCVs1e47noj
+
+**This script demonstrates how to compute a graph kernel.**
+---
+
+**0.   Install `graphkit-learn`.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils.graphfiles import loadDataset
+
+graphs, targets = loadDataset('../../../datasets/MUTAG/MUTAG_A.txt')
+
+"""**2.  Compute graph kernel.**"""
+
+from gklearn.kernels import untilhpathkernel
+
+gram_matrix, run_time = untilhpathkernel(
+	graphs, # The list of input graphs.
+	depth=5, # The longest length of paths.
+	k_func='MinMax', # Or 'tanimoto'.
+	compute_method='trie', # Or 'naive'.
+	n_jobs=1, # The number of jobs to run in parallel.
+	verbose=True)
\ No newline at end of file
diff --git a/gklearn/examples/kernels/model_selection_old.py b/gklearn/examples/kernels/model_selection_old.py
new file mode 100644
index 0000000..ca66be6
--- /dev/null
+++ b/gklearn/examples/kernels/model_selection_old.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+"""model_selection_old.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1uVkl7scNgEPrimX8ks6iEC5ijuhB8L_D
+
+**This script demonstrates how to compute a graph kernel.**
+---
+
+**0.   Install `graphkit-learn`.**
+"""
+
+"""**1. Perform model seletion and classification.**"""
+
+from gklearn.utils import model_selection_for_precomputed_kernel
+from gklearn.kernels import untilhpathkernel
+import numpy as np
+
+# Set parameters.
+datafile = '../../../datasets/MUTAG/MUTAG_A.txt'
+param_grid_precomputed = {'depth': np.linspace(1, 10, 10),
+                          'k_func': ['MinMax', 'tanimoto'],
+                          'compute_method': ['trie']}
+param_grid = {'C': np.logspace(-10, 10, num=41, base=10)}
+
+# Perform model selection and classification.
+model_selection_for_precomputed_kernel(
+	datafile, # The path of dataset file.
+	untilhpathkernel, # The graph kernel used for estimation.
+	param_grid_precomputed, # The parameters used to compute gram matrices.
+	param_grid, # The penelty Parameters used for penelty items.
+	'classification', # Or 'regression'.
+	NUM_TRIALS=30, # The number of the random trials of the outer CV loop.
+	ds_name='MUTAG', # The name of the dataset.
+	n_jobs=1,
+	verbose=True)
\ No newline at end of file
diff --git a/gklearn/examples/preimage/__init__.py b/gklearn/examples/preimage/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gklearn/examples/preimage/median_preimege_generator.py b/gklearn/examples/preimage/median_preimege_generator.py
new file mode 100644
index 0000000..9afc7bd
--- /dev/null
+++ b/gklearn/examples/preimage/median_preimege_generator.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+"""example_median_preimege_generator.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK
+
+**This script demonstrates how to generate a graph preimage using Boria's method.**
+---
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset, split_dataset_by_target
+
+# Predefined dataset name, use dataset "MAO".
+ds_name = 'MAO'
+# The node/edge labels that will not be used in the computation.
+irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
+
+# Initialize a Dataset.
+dataset_all = Dataset()
+# Load predefined dataset "MAO".
+dataset_all.load_predefined_dataset(ds_name)
+# Remove irrelevant labels.
+dataset_all.remove_labels(**irrelevant_labels)
+# Split the whole dataset according to the classification targets.
+datasets = split_dataset_by_target(dataset_all)
+# Get the first class of graphs, whose median preimage will be computed.
+dataset = datasets[0]
+len(dataset.graphs)
+
+"""**2.  Set parameters.**"""
+
+import multiprocessing
+
+# Parameters for MedianPreimageGenerator (our method).
+mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting.
+			   'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
+			   'ds_name': ds_name, # name of the dataset.
+			   'parallel': True, # whether the parallel scheme is to be used.
+			   'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
+			   'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
+			   'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
+			   'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
+			   'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
+			   'verbose': 2 # whether to print out results.
+               }
+# Parameters for graph kernel computation.
+kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
+				  'depth': 9,
+				  'k_func': 'MinMax',
+				  'compute_method': 'trie',
+				  'parallel': 'imap_unordered', # or None
+				  'n_jobs': multiprocessing.cpu_count(),
+				  'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
+				  'verbose': 2 # whether to print out results.
+                  }
+# Parameters for GED computation.
+ged_options = {'method': 'IPFP', # use IPFP huristic.
+			   'initialization_method': 'RANDOM', # or 'NODE', etc.
+			   'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
+			   'edit_cost': 'CONSTANT', # use CONSTANT cost.
+			   'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance.
+			   'ratio_runs_from_initial_solutions': 1,
+			   'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
+			   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
+               }
+# Parameters for MedianGraphEstimator (Boria's method).
+mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
+			   'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
+			   'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
+			   'verbose': 2, # whether to print out results.
+			   'refine': False # whether to refine the final SODs or not.
+               }
+print('done.')
+
+"""**3.   Run median preimage generator.**"""
+
+from gklearn.preimage import MedianPreimageGenerator
+
+# Create median preimage generator instance.
+mpg = MedianPreimageGenerator()
+# Add dataset.
+mpg.dataset = dataset
+# Set parameters.
+mpg.set_options(**mpg_options.copy())
+mpg.kernel_options = kernel_options.copy()
+mpg.ged_options = ged_options.copy()
+mpg.mge_options = mge_options.copy()
+# Run.
+mpg.run()
+
+"""**4. Get results.**"""
+
+# Get results.
+import pprint
+pp = pprint.PrettyPrinter(indent=4) # pretty print
+results = mpg.get_results()
+pp.pprint(results)
+
+# Draw generated graphs.
+def draw_graph(graph):
+	import matplotlib.pyplot as plt
+	import networkx as nx
+	plt.figure()
+	pos = nx.spring_layout(graph)
+	nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
+	plt.show()
+	plt.clf()
+	plt.close()
+ 
+draw_graph(mpg.set_median)
+draw_graph(mpg.gen_median)
\ No newline at end of file
diff --git a/gklearn/examples/preimage/median_preimege_generator_cml.py b/gklearn/examples/preimage/median_preimege_generator_cml.py
new file mode 100644
index 0000000..314be97
--- /dev/null
+++ b/gklearn/examples/preimage/median_preimege_generator_cml.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 16 15:41:26 2020
+
+@author: ljia
+
+**This script demonstrates how to generate a graph preimage using Boria's method with cost matrices learning.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset, split_dataset_by_target
+
+# Predefined dataset name, use dataset "MAO".
+ds_name = 'MAO'
+# The node/edge labels that will not be used in the computation.
+irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
+
+# Initialize a Dataset.
+dataset_all = Dataset()
+# Load predefined dataset "MAO".
+dataset_all.load_predefined_dataset(ds_name)
+# Remove irrelevant labels.
+dataset_all.remove_labels(**irrelevant_labels)
+# Split the whole dataset according to the classification targets.
+datasets = split_dataset_by_target(dataset_all)
+# Get the first class of graphs, whose median preimage will be computed.
+dataset = datasets[0]
+len(dataset.graphs)
+
+"""**2.  Set parameters.**"""
+
+import multiprocessing
+
+# Parameters for MedianPreimageGenerator (our method).
+mpg_options = {'init_method': 'random', # how to initialize node label cost vector. "random" means to initialize randomly.
+			   'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
+			   'ds_name': ds_name, # name of the dataset.
+			   'parallel': True, # @todo: whether the parallel scheme is to be used.
+			   'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
+			   'max_itrs': 3, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
+			   'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
+			   'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
+			   'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
+			   'verbose': 2 # whether to print out results.
+               }
+# Parameters for graph kernel computation.
+kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
+				  'depth': 9,
+				  'k_func': 'MinMax',
+				  'compute_method': 'trie',
+				  'parallel': 'imap_unordered', # or None
+				  'n_jobs': multiprocessing.cpu_count(),
+				  'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
+				  'verbose': 2 # whether to print out results.
+                  }
+# Parameters for GED computation.
+ged_options = {'method': 'BIPARTITE', # use Bipartite huristic.
+			   'initialization_method': 'RANDOM', # or 'NODE', etc.
+			   'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
+			   'edit_cost': 'CONSTANT', # @todo: not needed. use CONSTANT cost.
+			   'attr_distance': 'euclidean', # @todo: not needed. the distance between non-symbolic node/edge labels is computed by euclidean distance.
+			   'ratio_runs_from_initial_solutions': 1,
+			   'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
+			   'init_option': 'LAZY_WITHOUT_SHUFFLED_COPIES' # 'EAGER_WITHOUT_SHUFFLED_COPIES'
+               }
+# Parameters for MedianGraphEstimator (Boria's method).
+mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
+			   'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
+			   'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
+			   'verbose': 2, # whether to print out results.
+			   'refine': False # whether to refine the final SODs or not.
+               }
+print('done.')
+
+"""**3.   Run median preimage generator.**"""
+
+from gklearn.preimage import MedianPreimageGeneratorCML
+
+# Create median preimage generator instance.
+mpg = MedianPreimageGeneratorCML()
+# Add dataset.
+mpg.dataset = dataset
+# Set parameters.
+mpg.set_options(**mpg_options.copy())
+mpg.kernel_options = kernel_options.copy()
+mpg.ged_options = ged_options.copy()
+mpg.mge_options = mge_options.copy()
+# Run.
+mpg.run()
+
+"""**4. Get results.**"""
+
+# Get results.
+import pprint
+pp = pprint.PrettyPrinter(indent=4) # pretty print
+results = mpg.get_results()
+pp.pprint(results)
+
+# Draw generated graphs.
+def draw_graph(graph):
+	import matplotlib.pyplot as plt
+	import networkx as nx
+	plt.figure()
+	pos = nx.spring_layout(graph)
+	nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
+	plt.show()
+	plt.clf()
+	plt.close()
+ 
+draw_graph(mpg.set_median)
+draw_graph(mpg.gen_median)
\ No newline at end of file
diff --git a/gklearn/examples/preimage/median_preimege_generator_py.py b/gklearn/examples/preimage/median_preimege_generator_py.py
new file mode 100644
index 0000000..5b8152e
--- /dev/null
+++ b/gklearn/examples/preimage/median_preimege_generator_py.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 16 15:41:26 2020
+
+@author: ljia
+
+**This script demonstrates how to generate a graph preimage using Boria's method with cost matrices learning.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset, split_dataset_by_target
+
+# Predefined dataset name, use dataset "MAO".
+ds_name = 'MAO'
+# The node/edge labels that will not be used in the computation.
+irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
+
+# Initialize a Dataset.
+dataset_all = Dataset()
+# Load predefined dataset "MAO".
+dataset_all.load_predefined_dataset(ds_name)
+# Remove irrelevant labels.
+dataset_all.remove_labels(**irrelevant_labels)
+# Split the whole dataset according to the classification targets.
+datasets = split_dataset_by_target(dataset_all)
+# Get the first class of graphs, whose median preimage will be computed.
+dataset = datasets[0]
+# dataset.cut_graphs(range(0, 10))
+len(dataset.graphs)
+
+"""**2.  Set parameters.**"""
+
+import multiprocessing
+
+# Parameters for MedianPreimageGenerator (our method).
+mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting.
+			   'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
+			   'ds_name': ds_name, # name of the dataset.
+			   'parallel': True, # @todo: whether the parallel scheme is to be used.
+			   'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
+			   'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
+			   'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
+			   'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
+			   'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
+			   'verbose': 2 # whether to print out results.
+               }
+# Parameters for graph kernel computation.
+kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
+				  'depth': 9,
+				  'k_func': 'MinMax',
+				  'compute_method': 'trie',
+				  'parallel': 'imap_unordered', # or None
+				  'n_jobs': multiprocessing.cpu_count(),
+				  'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
+				  'verbose': 2 # whether to print out results.
+                  }
+# Parameters for GED computation.
+ged_options = {'method': 'BIPARTITE', # use Bipartite huristic.
+			   'initialization_method': 'RANDOM', # or 'NODE', etc.
+			   'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
+			   'edit_cost': 'CONSTANT', # use CONSTANT cost.
+			   'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance.
+			   'ratio_runs_from_initial_solutions': 1,
+			   'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
+			   'init_option': 'LAZY_WITHOUT_SHUFFLED_COPIES' # 'EAGER_WITHOUT_SHUFFLED_COPIES'
+               }
+# Parameters for MedianGraphEstimator (Boria's method).
+mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
+			   'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
+			   'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
+			   'verbose': 2, # whether to print out results.
+			   'refine': False # whether to refine the final SODs or not.
+               }
+print('done.')
+
+"""**3.   Run median preimage generator.**"""
+
+from gklearn.preimage import MedianPreimageGeneratorPy
+
+# Create median preimage generator instance.
+mpg = MedianPreimageGeneratorPy()
+# Add dataset.
+mpg.dataset = dataset
+# Set parameters.
+mpg.set_options(**mpg_options.copy())
+mpg.kernel_options = kernel_options.copy()
+mpg.ged_options = ged_options.copy()
+mpg.mge_options = mge_options.copy()
+# Run.
+mpg.run()
+
+"""**4. Get results.**"""
+
+# Get results.
+import pprint
+pp = pprint.PrettyPrinter(indent=4) # pretty print
+results = mpg.get_results()
+pp.pprint(results)
+
+# Draw generated graphs.
+def draw_graph(graph):
+	import matplotlib.pyplot as plt
+	import networkx as nx
+	plt.figure()
+	pos = nx.spring_layout(graph)
+	nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
+	plt.show()
+	plt.clf()
+	plt.close()
+ 
+draw_graph(mpg.set_median)
+draw_graph(mpg.gen_median)
\ No newline at end of file