update datasets used in the tests.

5 years ago · 1d49a75040
--- a/.coveragerc
+++ b/.coveragerc
@@ -0,0 +1,2 @@
 [run]
 omit = gklearn/tests/*
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ datasets/*
 !datasets/AIDS/
 !datasets/monoterpenoides/
 !datasets/Fingerprint/*.txt
 !datasets/Cuneiform/*.txt
 notebooks/results/*
 notebooks/check_gm/*
 notebooks/test_parallel/*
@@ -41,3 +42,4 @@ dist/
 build/

 .coverage
 htmlcov
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,7 +22,7 @@ install:

 script:
 - python setup.py bdist_wheel
 - pytest -v --cov-report term --cov=gklearn gklearn/tests/
 - pytest -v --cov-config=.coveragerc --cov-report term --cov=gklearn gklearn/tests/

 after_success:
 - codecov
--- a/datasets/Cuneiform/Cuneiform_A.txt
+++ b/datasets/Cuneiform/Cuneiform_A.txt
--- a/datasets/Cuneiform/Cuneiform_edge_attributes.txt
+++ b/datasets/Cuneiform/Cuneiform_edge_attributes.txt
--- a/datasets/Cuneiform/Cuneiform_edge_labels.txt
+++ b/datasets/Cuneiform/Cuneiform_edge_labels.txt
--- a/datasets/Cuneiform/Cuneiform_graph_indicator.txt
+++ b/datasets/Cuneiform/Cuneiform_graph_indicator.txt
--- a/datasets/Cuneiform/Cuneiform_graph_labels.txt
+++ b/datasets/Cuneiform/Cuneiform_graph_labels.txt
@@ -0,0 +1,267 @@
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
--- a/datasets/Cuneiform/Cuneiform_node_attributes.txt
+++ b/datasets/Cuneiform/Cuneiform_node_attributes.txt
--- a/datasets/Cuneiform/Cuneiform_node_labels.txt
+++ b/datasets/Cuneiform/Cuneiform_node_labels.txt
--- a/datasets/Cuneiform/README.txt
+++ b/datasets/Cuneiform/README.txt
@@ -0,0 +1,119 @@
 README for dataset Cuneiform


 === Usage ===

 This folder contains the following comma separated text files 
 (replace DS by the name of the dataset):

 n = total number of nodes
 m = total number of edges
 N = number of graphs

 (1) 	DS_A.txt (m lines) 
 	sparse (block diagonal) adjacency matrix for all graphs,
 	each line corresponds to (row, col) resp. (node_id, node_id)

 (2) 	DS_graph_indicator.txt (n lines)
 	column vector of graph identifiers for all nodes of all graphs,
 	the value in the i-th line is the graph_id of the node with node_id i

 (3) 	DS_graph_labels.txt (N lines) 
 	class labels for all graphs in the dataset,
 	the value in the i-th line is the class label of the graph with graph_id i

 (4) 	DS_node_labels.txt (n lines)
 	column vector of node labels,
 	the value in the i-th line corresponds to the node with node_id i

 There are OPTIONAL files if the respective information is available:

 (5) 	DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt)
 	labels for the edges in DS_A_sparse.txt 

 (6) 	DS_edge_attributes.txt (m lines; same size as DS_A.txt)
 	attributes for the edges in DS_A.txt 

 (7) 	DS_node_attributes.txt (n lines) 
 	matrix of node attributes,
 	the comma seperated values in the i-th line is the attribute vector of the node with node_id i

 (8) 	DS_graph_attributes.txt (N lines) 
 	regression values for all graphs in the dataset,
 	the value in the i-th line is the attribute of the graph with graph_id i


 === Description ===

 The Cuneiform dataset contains graphs representing 29 different Hittite cuneiform signs.
 The data was obtained from nine cuneiform tablets written by scholars of Hittitology in
 the course of a study about individualistic characteristics of cuneiform hand writing.
 After automated extraction of individual wedges, the affiliation of the wedges to the 
 cuneiform signs were determined manually. The graph model is explained in detail in the
 referenced publication.


 === References ===

 Nils M. Kriege, Matthias Fey, Denis Fisseler, Petra Mutzel, Frank Weichert
 Recognizing Cuneiform Signs Using Graph Based Methods. 2018. arXiv:1802.05908
 https://arxiv.org/abs/1802.05908


 === Description of Labels ===

 Node labels were converted to integer values using this map:

 Component 0:
 	0	depthPoint
 	1	tailVertex
 	2	leftVertex
 	3	rightVertex

 Component 1:
 	0	vertical
 	1	Winkelhaken
 	2	horizontal



 Edge labels were converted to integer values using this map:

 Component 0:
 	0	wedge
 	1	arrangement



 Class labels were converted to integer values using this map:

 	0	tu
 	1	ta
 	2	ti
 	3	nu
 	4	na
 	5	ni
 	6	bu
 	7	ba
 	8	bi
 	9	zu
 	10	za
 	11	zi
 	12	su
 	13	sa
 	14	si
 	15	hu
 	16	ha
 	17	hi
 	18	du
 	19	da
 	20	di
 	21	ru
 	22	ra
 	23	ri
 	24	ku
 	25	ka
 	26	ki
 	27	lu
 	28	la
 	29	li
--- a/gklearn/kernels/commonWalkKernel.py
+++ b/gklearn/kernels/commonWalkKernel.py
@@ -16,7 +16,6 @@ from functools import partial
 import networkx as nx
 import numpy as np

 sys.path.insert(0, "../")
 from gklearn.utils.utils import direct_product
 from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.parallel import parallel_gm
--- a/gklearn/kernels/marginalizedKernel.py
+++ b/gklearn/kernels/marginalizedKernel.py
@@ -28,7 +28,6 @@ from gklearn.utils.kernels import deltakernel
 from gklearn.utils.utils import untotterTransformation
 from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.parallel import parallel_gm
 sys.path.insert(0, "../")


 def marginalizedkernel(*args,
--- a/gklearn/kernels/randomWalkKernel.py
+++ b/gklearn/kernels/randomWalkKernel.py
@@ -6,8 +6,6 @@
    [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
 """

 import sys
 sys.path.insert(0, "../")
 import time
 from functools import partial
 from tqdm import tqdm
--- a/gklearn/kernels/spKernel.py
+++ b/gklearn/kernels/spKernel.py
@@ -20,7 +20,6 @@ import numpy as np
 from gklearn.utils.utils import getSPGraph
 from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.parallel import parallel_gm
 sys.path.insert(0, "../")

 def spkernel(*args,
             node_label='atom',
--- a/gklearn/kernels/structuralspKernel.py
+++ b/gklearn/kernels/structuralspKernel.py
@@ -25,8 +25,6 @@ from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.parallel import parallel_gm
 from gklearn.utils.trie import Trie

 sys.path.insert(0, "../")


 def structuralspkernel(*args,
                       node_label='atom',
--- a/gklearn/kernels/treeletKernel.py
+++ b/gklearn/kernels/treeletKernel.py
@@ -8,7 +8,6 @@
 """

 import sys
 sys.path.insert(0, "../")
 import time
 from collections import Counter
 from itertools import chain
--- a/gklearn/kernels/untilHPathKernel.py
+++ b/gklearn/kernels/untilHPathKernel.py
@@ -9,7 +9,6 @@
 """

 import sys
 sys.path.insert(0, "../")
 import time
 from collections import Counter
 from itertools import chain
--- a/gklearn/kernels/weisfeilerLehmanKernel.py
+++ b/gklearn/kernels/weisfeilerLehmanKernel.py
@@ -10,7 +10,6 @@

 import sys
 from collections import Counter
 sys.path.insert(0, "../")
 from functools import partial
 import time
 #from multiprocessing import Pool
--- a/gklearn/preimage/find_best_k.py
+++ b/gklearn/preimage/find_best_k.py
@@ -9,10 +9,8 @@ import numpy as np
 import random
 import csv

 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset
 from preimage.test_k_closest_graphs import median_on_k_closest_graphs
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs

 def find_best_k():
    ds = {'name': 'monoterpenoides', 
--- a/gklearn/preimage/fitDistance.py
+++ b/gklearn/preimage/fitDistance.py
@@ -13,15 +13,14 @@ from multiprocessing import Pool
 from functools import partial
 import time
 import random
 import sys

 from scipy import optimize
 from scipy.optimize import minimize
 import cvxpy as cp

 import sys
 sys.path.insert(0, "../")
 from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
 from preimage.utils import kernel_distance_matrix
 from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
 from gklearn.preimage.utils import kernel_distance_matrix

 def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
                               params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', 
--- a/gklearn/preimage/ged.py
+++ b/gklearn/preimage/ged.py
@@ -128,12 +128,10 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method
    elif lib == 'gedlib-bash':
        import time
        import random
        import sys
        import os
        sys.path.insert(0, "../")
        from gklearn.utils.graphfiles import saveDataset
        
        tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/'
        tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
        if not os.path.exists(tmp_dir):
            os.makedirs(tmp_dir)
        fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
@@ -144,7 +142,7 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method
        command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
        command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
        command += 'export LD_LIBRARY_PATH\n'
        command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n'
        command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
        command += './ged_for_python_bash monoterpenoides ' + fn_collection \
                + ' \'' + algo_options + '\' '
        for ec in edit_cost_constant:
--- a/gklearn/preimage/iam.py
+++ b/gklearn/preimage/iam.py
@@ -11,11 +11,9 @@ import random
 import networkx as nx
 from tqdm import tqdm

 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphdataset import get_dataset_attributes
 from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
 from ged import GED, ged_median
 from gklearn.preimage.ged import GED, ged_median


 def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, 
@@ -438,7 +436,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,

 def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
             dataset='monoterpenoides',
             graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'):
             graph_dir=''):
    """Compute the iam by c++ implementation (gedlib) through bash.
    """
    import os
@@ -462,18 +460,18 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
            fgroup.write("\n</GraphCollection>")
            fgroup.close()

    tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/'
    tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
    fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
    createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection)
 #    fn_collection = tmp_dir + 'collection_for_debug'
 #    graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl'
 #    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl'
    
 #    if dataset == 'Letter-high' or dataset == 'Fingerprint':
 #        dataset = 'letter'
    command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
    command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
    command += 'export LD_LIBRARY_PATH\n'
    command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n'
    command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
    command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
            + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' '
    if edit_cost_constant is None:
@@ -489,8 +487,8 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
    sod_sm = float(output[0].strip())
    sod_gm = float(output[1].strip())
    
    fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
    fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
    fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
    fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
    
    return sod_sm, sod_gm, fname_sm, fname_gm

--- a/gklearn/preimage/knn.py
+++ b/gklearn/preimage/knn.py
@@ -11,14 +11,12 @@ from tqdm import tqdm
 import random
 #import csv
 from shutil import copyfile
 import os


 import sys
 sys.path.insert(0, "../")
 from preimage.iam import iam_bash
 from gklearn.preimage.iam import iam_bash
 from gklearn.utils.graphfiles import loadDataset, loadGXL
 from preimage.ged import GED
 from preimage.utils import get_same_item_indices
 from gklearn.preimage.ged import GED
 from gklearn.preimage.utils import get_same_item_indices

 def test_knn():
    ds = {'name': 'monoterpenoides', 
@@ -30,7 +28,7 @@ def test_knn():
 #    edge_label = 'bond_type'
 #    ds_name = 'mono'
    dir_output = 'results/knn/'
    graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'
    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'
    
    k_nn = 1
    percent = 0.1
--- a/gklearn/preimage/libs.py
+++ b/gklearn/preimage/libs.py
@@ -2,5 +2,5 @@ import sys
 import pathlib

 # insert gedlibpy library.
 sys.path.insert(0, "../../")
 sys.path.insert(0, "../../../")
 from gedlibpy import librariesImport, gedlibpy
--- a/gklearn/preimage/preimage_random.py
+++ b/gklearn/preimage/preimage_random.py
@@ -14,10 +14,7 @@ from tqdm import tqdm
 import networkx as nx
 import matplotlib.pyplot as plt


 sys.path.insert(0, "../")

 from utils import compute_kernel, dis_gstar
 from gklearn.preimage.utils import compute_kernel, dis_gstar


 def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
--- a/gklearn/preimage/test.py
+++ b/gklearn/preimage/test.py
@@ -52,8 +52,6 @@ def convertGraph(G):


 def testNxGrapĥ():
    import sys
    sys.path.insert(0, "../")
    from gklearn.utils.graphfiles import loadDataset
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
--- a/gklearn/preimage/test_fitDistance.py
+++ b/gklearn/preimage/test_fitDistance.py
@@ -9,12 +9,10 @@ from matplotlib import pyplot as plt
 import numpy as np
 from tqdm import tqdm

 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset
 from utils import remove_edges
 from fitDistance import fit_GED_to_kernel_distance
 from utils import normalize_distance_matrix
 from gklearn.preimage.utils import remove_edges
 from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
 from gklearn.preimage.utils import normalize_distance_matrix


 def test_update_costs():
@@ -63,7 +61,7 @@ def median_paper_clcpc_python_best():
    
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
    collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    
    fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt'
@@ -160,7 +158,7 @@ def median_paper_clcpc_python_bash_cpp():
    
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
    collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    
    fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt'
--- a/gklearn/preimage/test_ged.py
+++ b/gklearn/preimage/test_ged.py
@@ -14,13 +14,11 @@ import sys
 def test_NON_SYMBOLIC_cost():
    """Test edit cost LETTER2.
    """
    import sys
    sys.path.insert(0, "../")
    from preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
    from preimage.test_k_closest_graphs import reform_attributes
    from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
    from gklearn.preimage.test_k_closest_graphs import reform_attributes
    from gklearn.utils.graphfiles import loadDataset

    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt'
    dataset = '../../datasets/Letter-high/Letter-high_A.txt'
    Gn, y_all = loadDataset(dataset)

    g1 = Gn[200]
@@ -53,14 +51,12 @@ def test_NON_SYMBOLIC_cost():
 def test_LETTER2_cost():
    """Test edit cost LETTER2.
    """
    import sys
    sys.path.insert(0, "../")
    from preimage.ged import GED, get_nb_edit_operations_letter
    from preimage.test_k_closest_graphs import reform_attributes
    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
    from gklearn.preimage.test_k_closest_graphs import reform_attributes
    from gklearn.utils.graphfiles import loadDataset

    ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'}  # node/edge symb
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])

    g1 = Gn[200]
@@ -96,14 +92,12 @@ def test_get_nb_edit_operations_letter():
    should be the same as the cost computed by number of operations and edit
    cost constants.
    """
    import sys
    sys.path.insert(0, "../")
    from preimage.ged import GED, get_nb_edit_operations_letter
    from preimage.test_k_closest_graphs import reform_attributes
    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
    from gklearn.preimage.test_k_closest_graphs import reform_attributes
    from gklearn.utils.graphfiles import loadDataset

    ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'}  # node/edge symb
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])

    g1 = Gn[200]
@@ -136,13 +130,12 @@ def test_get_nb_edit_operations():
    numbers of edit operations. The distance/cost computed by GED should be the
    same as the cost computed by number of operations and edit cost constants.
    """
    import sys
    sys.path.insert(0, "../")
    from preimage.ged import GED, get_nb_edit_operations
    from gklearn.preimage.ged import GED, get_nb_edit_operations
    from gklearn.utils.graphfiles import loadDataset
    import os

    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'}  # node/edge symb
    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])

    g1 = Gn[20]
@@ -173,11 +166,10 @@ def test_get_nb_edit_operations():
 def test_ged_python_bash_cpp():
    """Test ged computation with python invoking the c++ code by bash command (with updated library).
    """
    sys.path.insert(0, "../")
    from gklearn.utils.graphfiles import loadDataset
    from preimage.ged import GED
    from gklearn.preimage.ged import GED

    data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
 #    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
@@ -233,7 +225,7 @@ def test_ged_best_settings_updated():
    """Test ged computation with best settings the same as in the C++ code (with updated library).
    """

    data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
 #    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'

@@ -292,7 +284,7 @@ def test_ged_best_settings():
    """Test ged computation with best settings the same as in the C++ code.
    """

    data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'

@@ -350,7 +342,7 @@ def test_ged_default():
    """Test ged computation with default settings.
    """

    data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'

@@ -404,11 +396,10 @@ def test_ged_default():
 def test_ged_min():
    """Test ged computation with the "min" stabilizer.
    """
    sys.path.insert(0, "../")
    from gklearn.utils.graphfiles import loadDataset
    from preimage.ged import GED
    from gklearn.preimage.ged import GED

    data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'

@@ -487,8 +478,6 @@ def convertGraph(G):


 def testNxGrapĥ():
    import sys
    sys.path.insert(0, "../")
    from gklearn.utils.graphfiles import loadDataset
    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
          'extra_params': {}}  # node/edge symb
--- a/gklearn/preimage/test_iam.py
+++ b/gklearn/preimage/test_iam.py
@@ -13,14 +13,11 @@ import time
 import random
 #from tqdm import tqdm

 #import os
 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset
 #from gklearn.utils.logger2file import *
 from iam import iam_upgraded
 from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
 #from ged import ged_median
 from gklearn.preimage.iam import iam_upgraded
 from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
 #from gklearn.preimage.ged import ged_median


 def test_iam_monoterpenoides_with_init40():
@@ -52,7 +49,7 @@ def test_iam_monoterpenoides_with_init40():
                  'stabilizer': ged_stabilizer}

    
    collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
--- a/gklearn/preimage/test_k_closest_graphs.py
+++ b/gklearn/preimage/test_k_closest_graphs.py
@@ -17,15 +17,12 @@ import multiprocessing
 from multiprocessing import Pool
 from functools import partial

 #import os
 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset, loadGXL
 #from gklearn.utils.logger2file import *
 from iam import iam_upgraded, iam_bash
 from utils import compute_kernel, dis_gstar, kernel_distance_matrix
 from fitDistance import fit_GED_to_kernel_distance
 #from ged import ged_median
 from gklearn.preimage.iam import iam_upgraded, iam_bash
 from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix
 from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
 #from gklearn.preimage.ged import ged_median


 def fit_edit_cost_constants(fit_method, edit_cost_name, 
@@ -204,6 +201,8 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho
    if Kmatrix is not None:
        Kmatrix_median = np.copy(Kmatrix[group_min,:])
        Kmatrix_median = Kmatrix_median[:,group_min]
    else:
        Kmatrix_median = None
        

    # 1. fit edit cost constants. 
@@ -379,7 +378,7 @@ def test_k_closest_graphs_with_cv():
    
    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
    repeats = 50
    collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
    graph_dir = collection_path + 'gxl/'
    
    sod_sm_list = []
--- a/gklearn/preimage/test_others.py
+++ b/gklearn/preimage/test_others.py
@@ -11,12 +11,10 @@ import matplotlib.pyplot as plt
 import time
 from tqdm import tqdm

 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset
 from median import draw_Letter_graph
 from ged import GED, ged_median
 from utils import get_same_item_indices, compute_kernel, gram2distances, \
 from gklearn.preimage.median import draw_Letter_graph
 from gklearn.preimage.ged import GED, ged_median
 from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \
    dis_gstar, remove_edges


--- a/gklearn/preimage/test_preimage_iam.py
+++ b/gklearn/preimage/test_preimage_iam.py
@@ -13,14 +13,11 @@ import time
 import random
 #from tqdm import tqdm

 #import os
 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset
 from utils import remove_edges, compute_kernel, get_same_item_indices
 from ged import ged_median
 from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices
 from gklearn.preimage.ged import ged_median

 from preimage_iam import preimage_iam 
 from gklearn.preimage.preimage_iam import preimage_iam 


 ###############################################################################
--- a/gklearn/preimage/test_preimage_mix.py
+++ b/gklearn/preimage/test_preimage_mix.py
@@ -13,13 +13,10 @@ import time
 import random
 #from tqdm import tqdm

 #import os
 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset
 from ged import ged_median
 from utils import compute_kernel, get_same_item_indices, remove_edges
 from preimage_iam import preimage_iam_random_mix
 from gklearn.preimage.ged import ged_median
 from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
 from gklearn.preimage.preimage_iam import preimage_iam_random_mix

 ###############################################################################
 # tests on different values on grid of median-sets and k.
--- a/gklearn/preimage/test_preimage_random.py
+++ b/gklearn/preimage/test_preimage_random.py
@@ -13,14 +13,10 @@ import time
 import random
 #from tqdm import tqdm

 #import os
 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset

 from preimage_random import preimage_random
 from ged import ged_median
 from utils import compute_kernel, get_same_item_indices, remove_edges
 from gklearn.preimage.preimage_random import preimage_random
 from gklearn.preimage.ged import ged_median
 from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges


 ###############################################################################
--- a/gklearn/preimage/utils.py
+++ b/gklearn/preimage/utils.py
@@ -11,8 +11,6 @@ Useful functions.
 import multiprocessing
 import numpy as np

 import sys
 sys.path.insert(0, "../")
 from gklearn.kernels.marginalizedKernel import marginalizedkernel
 from gklearn.kernels.untilHPathKernel import untilhpathkernel
 from gklearn.kernels.spKernel import spkernel
@@ -41,7 +39,7 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
    return np.sqrt(term1 - term2 + term3)


 def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
 def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'):
    if graph_kernel == 'marginalizedkernel':
        Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
                                  p_quit=0.03, n_iteration=10, remove_totters=False,
@@ -49,6 +47,7 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
    elif graph_kernel == 'untilhpathkernel':
        Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
                                  depth=7, k_func='MinMax', compute_method='trie',
                                  parallel=parallel,
                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'spkernel':
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
@@ -66,18 +65,18 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
        Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, 
                              edge_label=edge_label, node_kernels=sub_kernels,
                              edge_kernels=sub_kernels,
                              parallel=None, n_jobs=multiprocessing.cpu_count(), 
                              parallel=parallel, n_jobs=multiprocessing.cpu_count(), 
                              verbose=verbose)
    elif graph_kernel == 'treeletkernel':
        pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
 #        pkernel = functools.partial(gaussiankernel, gamma=1e-6)
        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
        Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
                                   sub_kernel=pkernel,
                                   sub_kernel=pkernel, parallel=parallel,
                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
    elif graph_kernel == 'weisfeilerlehmankernel':
        Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
                                   height=4, base_kernel='subtree',
                                   height=4, base_kernel='subtree', parallel=None,
                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
        
    # normalization
--- a/gklearn/preimage/visualization.py
+++ b/gklearn/preimage/visualization.py
@@ -11,11 +11,8 @@ import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
 from tqdm import tqdm


 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset, loadGXL
 from utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices
 from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices


 def visualize_graph_dataset(dis_measure, visual_method, draw_figure, 
@@ -115,11 +112,11 @@ def visualize_distances_in_kernel():
 #    Gn = Gn[0:50]
    fname_medians = 'expert.treelet'
    # add set median.
    fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
    fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
    set_median = loadGXL(fname_sm)
    Gn.append(set_median)
    # add generalized median (estimated pre-image.)
    fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
    fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
    gen_median = loadGXL(fname_gm)
    Gn.append(gen_median)
    
@@ -166,19 +163,19 @@ def visualize_distances_in_kernel():
        
    
 def visualize_distances_in_ged():
    from fitDistance import compute_geds
    from ged import GED
    from gklearn.preimage.fitDistance import compute_geds
    from gklearn.preimage.ged import GED
    ds = {'name': 'monoterpenoides', 
          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    # add set median.
    fname_medians = 'expert.treelet'
    fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
    fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
    set_median = loadGXL(fname_sm)
    Gn.append(set_median)
    # add generalized median (estimated pre-image.)
    fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
    fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
    gen_median = loadGXL(fname_gm)
    Gn.append(gen_median)
    
@@ -227,9 +224,10 @@ def visualize_distances_in_ged():
    
    
 def visualize_distances_in_kernel_monoterpenoides():
    
    import os

    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'}  # node/edge symb
          'graph_dir': os.path.dirname(os.path.realpath(__file__))  + '../../datasets/monoterpenoides/'}  # node/edge symb
    Gn_original, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    
@@ -301,11 +299,12 @@ def visualize_distances_in_kernel_monoterpenoides():
        
    
 def visualize_distances_in_ged_monoterpenoides():
    from fitDistance import compute_geds
    from ged import GED
    from gklearn.preimage.fitDistance import compute_geds
    from gklearn.preimage.ged import GED
    import os
    
    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'}  # node/edge symb
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
    Gn_original, y_all = loadDataset(ds['dataset'])
 #    Gn = Gn[0:50]
    
@@ -379,8 +378,8 @@ def visualize_distances_in_ged_monoterpenoides():
    
 def visualize_distances_in_kernel_letter_h():
    
    ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'}  # node/edge symb
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
 #    Gn = Gn[0:50]
    
@@ -455,8 +454,8 @@ def visualize_distances_in_ged_letter_h():
    from fitDistance import compute_geds
    from preimage.test_k_closest_graphs import reform_attributes
    
    ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'}  # node/edge symb
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
 #    Gn = Gn[0:50]
    
--- a/gklearn/preimage/xp_fit_method.py
+++ b/gklearn/preimage/xp_fit_method.py
@@ -11,35 +11,37 @@ import csv
 from shutil import copyfile
 import networkx as nx
 import matplotlib.pyplot as plt
 import os

 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
 from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
 from preimage.find_best_k import getRelations
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
 from gklearn.preimage.find_best_k import getRelations


 def get_dataset(ds_name):
    if ds_name == 'Letter-high': # node non-symb
        dataset = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml'
        graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/' 
        dataset = 'cpp_ext/data/collections/Letter.xml'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' 
        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
        for G in Gn:
            reform_attributes(G)
    elif ds_name == 'Fingerprint':
        dataset = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/collections/Fingerprint.xml'
        graph_dir = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/datasets/Fingerprint/data/'
        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
        for G in Gn:
            reform_attributes(G)
 #        dataset = 'cpp_ext/data/collections/Fingerprint.xml'
 #        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
 #        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
 #        for G in Gn:
 #            reform_attributes(G)
        dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
        Gn, y_all = loadDataset(dataset)
    elif ds_name == 'SYNTHETIC':
        pass
    elif ds_name == 'SYNTHETICnew':
        dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
        graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew'
 #        dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt'
 #        graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'
        dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew'
 #        dataset = '../../datasets/Letter-high/Letter-high_A.txt'
 #        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
        Gn, y_all = loadDataset(dataset)
    elif ds_name == 'Synthie':
        pass
@@ -184,6 +186,8 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti
            if Kmatrix is not None:
                Kmatrix_sub = Kmatrix[values,:]
                Kmatrix_sub = Kmatrix_sub[:,values]
            else:
                Kmatrix_sub = None
            
            for repeat in range(repeats):
                print('\nrepeat =', repeat)
@@ -273,11 +277,11 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti
                    nb_dis_k_gi2gm[2] += 1
                    
                # save median graphs.
                fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
@@ -427,63 +431,101 @@ if __name__ == "__main__":
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
    
    
 #    #### xp 3: Fingerprint, sspkernel, using LETTER2.
        
        
 #    #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    run_time = gmfile['run_time']
 #    # normalization
 #    Kmatrix_diag = Kmatrix.diagonal().copy()
 #    for i in range(len(Kmatrix)):
 #        for j in range(i, len(Kmatrix)):
 #            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
 #            Kmatrix[j][i] = Kmatrix[i][j]
 ##    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
 ##             Kmatrix=Kmatrix, run_time=run_time)
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Fingerprint'
 #    ds_name = 'SYNTHETICnew'
 #    gkernel = 'structuralspkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without nodes and edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
 #          and nx.number_of_edges(G) != 0)]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 ##    Gn = Gn[0:50]
 ##    y_all = y_all[0:50]
 ##    Gn = Gn[0:10]
 ##    y_all = y_all[0:10]
 #    for G in Gn:
 #        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
 #    # compute pair distances.
 ##    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 ##        Kmatrix=None, gkernel=gkernel, verbose=True)
 #    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
 #    fit_methods = ['k-graphs', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
 #        print('\n-------------------------------------')
 #        print('fit method:', fit_method)
 #        parameters = {'ds_name': ds_name,
 #                      'gkernel': gkernel,
 #                      'edit_cost_name': 'LETTER2',
 #                      'edit_cost_name': 'NON_SYMBOLIC',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method}
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       initial_solutions=1,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
        
        
 #    #### xp 4: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
 #    ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    # normalization
 #    Kmatrix_diag = Kmatrix.diagonal().copy()
 #    for i in range(len(Kmatrix)):
 #        for j in range(i, len(Kmatrix)):
 #            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
 #            Kmatrix[j][i] = Kmatrix[i][j]
 #    run_time = 21821.35
 #    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
 #             Kmatrix=Kmatrix, run_time=run_time)
 #    
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'SYNTHETICnew'
 #    gkernel = 'structuralspkernel'
 #    gkernel = 'spkernel'
 #    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without nodes and edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0
 #          and nx.number_of_edges(G) != 0)]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 #    Gn = Gn[0:10]
 #    y_all = y_all[0:10]
 ##    # remove graphs without nodes and edges.
 ##    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0
 ##          and nx.number_of_edges(G) != 0)]
 ##    idx = [G[0] for G in Gn]
 ##    Gn = [G[1] for G in Gn]
 ##    y_all = [y_all[i] for i in idx]
 ##    Gn = Gn[0:5]
 ##    y_all = y_all[0:5]
 #    for G in Gn:
 #        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
 #    # compute pair distances.
 #    
 #    # compute/read Gram matrix and pair distances.
 ##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
 ##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 ##         Kmatrix=Kmatrix)
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 #    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=None, gkernel=gkernel, verbose=True)
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
 #    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
@@ -496,68 +538,69 @@ if __name__ == "__main__":
 #                      'attr_distance': 'euclidean',
 #                      'fit_method': fit_method}
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
 #                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
        
        
    ### xp 5: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
    Kmatrix = gmfile['Kmatrix']
    # normalization
    Kmatrix_diag = Kmatrix.diagonal().copy()
    for i in range(len(Kmatrix)):
        for j in range(i, len(Kmatrix)):
            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
            Kmatrix[j][i] = Kmatrix[i][j]
    run_time = 21821.35
    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
             Kmatrix=Kmatrix, run_time=run_time)
 #                                       initial_solutions=1,
 #                                       Gn_data=[Gn, y_all, graph_dir],
 #                                       k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
 #                                       Kmatrix=Kmatrix)
    
    
    #### xp 5: Fingerprint, sspkernel, using LETTER2.
    # load dataset.
    print('getting dataset and computing kernel distance matrix first...')
    ds_name = 'SYNTHETICnew'
    gkernel = 'spkernel'
    ds_name = 'Fingerprint'
    gkernel = 'structuralspkernel'
    Gn, y_all, graph_dir = get_dataset(ds_name)
 #    # remove graphs without nodes and edges.
 #    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0
    # remove graphs without nodes and edges.
    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0)]
 #          and nx.number_of_edges(G) != 0)]
 #    idx = [G[0] for G in Gn]
 #    Gn = [G[1] for G in Gn]
 #    y_all = [y_all[i] for i in idx]
 #    Gn = Gn[0:5]
 #    y_all = y_all[0:5]
    idx = [G[0] for G in Gn]
    Gn = [G[1] for G in Gn]
    y_all = [y_all[i] for i in idx]
    y_idx = get_same_item_indices(y_all)
    # remove unused labels.
    for G in Gn:
        G.graph['edge_attrs'] = []
        for edge in G.edges:
            del G.edges[edge]['attributes']
            del G.edges[edge]['orient']
            del G.edges[edge]['angle']
    Gn = Gn[805:815]
    y_all = y_all[805:815]
    for G in Gn:
        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
    
            
    # compute/read Gram matrix and pair distances.
 #    Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
 #    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
 #         Kmatrix=Kmatrix)
    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
    Kmatrix = gmfile['Kmatrix']
    run_time = gmfile['run_time']
    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
         Kmatrix=Kmatrix)
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    run_time = gmfile['run_time']
 #    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 #    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
 #    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 #    Kmatrix = np.zeros((len(Gn), len(Gn)))
 #    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
    
            
    # compute pair distances.
 #    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
 #        Kmatrix=None, gkernel=gkernel, verbose=True)
 #    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
    # fitting and computing.
    fit_methods = ['k-graphs', 'random', 'random', 'random']
    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
    for fit_method in fit_methods:
        print('\n-------------------------------------')
        print('fit method:', fit_method)
        parameters = {'ds_name': ds_name,
                      'gkernel': gkernel,
                      'edit_cost_name': 'NON_SYMBOLIC',
                      'edit_cost_name': 'LETTER2',
                      'ged_method': 'mIPFP',
                      'attr_distance': 'euclidean',
                      'fit_method': fit_method}
        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
                                       initial_solutions=1,
                                       Gn_data=[Gn, y_all, graph_dir],
                                       k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
                                       initial_solutions=40,
                                       Gn_data = [Gn, y_all, graph_dir],
                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
                                       Kmatrix=Kmatrix)
--- a/gklearn/preimage/xp_letter_h.py
+++ b/gklearn/preimage/xp_letter_h.py
@@ -12,17 +12,15 @@ from shutil import copyfile
 import networkx as nx
 import matplotlib.pyplot as plt

 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
 from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from preimage.utils import get_same_item_indices, kernel_distance_matrix
 from preimage.find_best_k import getRelations
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix
 from gklearn.preimage.find_best_k import getRelations


 def xp_letter_h_LETTER2_cost():
    ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'}  # node/edge symb
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
    
    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel')
@@ -177,11 +175,11 @@ def xp_letter_h_LETTER2_cost():
                    nb_dis_k_gi2gm[2] += 1
                    
                # save median graphs.
                fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
@@ -243,8 +241,8 @@ def xp_letter_h_LETTER2_cost():


 def xp_letter_h():
    ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'}  # node/edge symb
    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
    for G in Gn:
        reform_attributes(G)
@@ -396,11 +394,11 @@ def xp_letter_h():
                    nb_dis_k_gi2gm[2] += 1
                    
                # save median graphs.
                fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
--- a/gklearn/preimage/xp_monoterpenoides.py
+++ b/gklearn/preimage/xp_monoterpenoides.py
@@ -13,16 +13,16 @@ from shutil import copyfile
 import networkx as nx
 import matplotlib.pyplot as plt

 import sys
 sys.path.insert(0, "../")
 from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
 from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from preimage.utils import get_same_item_indices
 from preimage.find_best_k import getRelations
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
 from gklearn.preimage.utils import get_same_item_indices
 from gklearn.preimage.find_best_k import getRelations

 def xp_monoterpenoides():
    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'}  # node/edge symb
    import os

    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
    Gn, y_all = loadDataset(ds['dataset'])
 #    ds = {'name': 'Letter-high', 
 #          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
@@ -169,11 +169,11 @@ def xp_monoterpenoides():
                    nb_dis_k_gi2gm[2] += 1
                    
                # save median graphs.
                fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
                fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
--- a/gklearn/tests/test_graphkernels.py
+++ b/gklearn/tests/test_graphkernels.py
@@ -15,10 +15,16 @@ def chooseDataset(ds_name):
        ds_file = 'datasets/Alkane/dataset.ds'
        ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt'
        Gn, y = loadDataset(ds_file, filename_y=ds_y)
        for G in Gn:
            for node in G.nodes:
                del G.nodes[node]['attributes']
    # node symbolic labels.
    elif ds_name == 'Acyclic':
        ds_file = 'datasets/acyclic/dataset_bps.ds'
        Gn, y = loadDataset(ds_file)
        for G in Gn:
            for node in G.nodes:
                del G.nodes[node]['attributes']
    # node non-symbolic labels.
    elif ds_name == 'Letter-med':
        ds_file = 'datasets/Letter-med/Letter-med_A.txt'
@@ -27,14 +33,39 @@ def chooseDataset(ds_name):
    elif ds_name == 'AIDS':
        ds_file = 'datasets/AIDS/AIDS_A.txt'
        Gn, y = loadDataset(ds_file)
        
    
    # edge non-symbolic labels (no node labels).
    elif ds_name == 'Fingerprint_edge':
        import networkx as nx
        ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
        Gn, y = loadDataset(ds_file)
        Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
        idx = [G[0] for G in Gn]
        Gn = [G[1] for G in Gn]
        y = [y[i] for i in idx]
        for G in Gn:
            G.graph['node_attrs'] = []
            for node in G.nodes:
                del G.nodes[node]['attributes']
                del G.nodes[node]['x']
                del G.nodes[node]['y']
    # edge non-symbolic labels (and node non-symbolic labels).
    elif ds_name == 'Fingerprint':
        import networkx as nx
        ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
        Gn, y = loadDataset(ds_file)
        Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
        idx = [G[0] for G in Gn]
        Gn = [G[1] for G in Gn]
        y = [y[i] for i in idx]
    # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels).
    elif ds_name == 'Cuneiform':
        import networkx as nx
        ds_file = 'datasets/Cuneiform/Cuneiform_A.txt'
        Gn, y = loadDataset(ds_file)
    
    Gn = Gn[0:10]
    y = y[0:10]
    Gn = Gn[0:3]
    y = y[0:3]
    
    return Gn, y

@@ -152,7 +183,7 @@ def test_spkernel(ds_name, parallel):


 #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS'])
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
 def test_structuralspkernel(ds_name, parallel):
    """Test structural shortest path kernel.
@@ -246,4 +277,5 @@ def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel):
        

 if __name__ == "__main__":
    test_spkernel()
 #    test_spkernel('Alkane', 'imap_unordered')
    test_structuralspkernel('Fingerprint_edge', 'imap_unordered')
--- a/gklearn/utils/graphfiles.py
+++ b/gklearn/utils/graphfiles.py
@@ -753,15 +753,12 @@ if __name__ == '__main__':
 #          'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}  # node/edge symb
 #    Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
 #    saveDataset(Gn, y, group='xml', filename='temp/temp')
    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
    Gn, y_all = loadDataset(dataset)
    filename = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew/SYNTHETICnew'
    saveDataset(Gn, y_all, gformat='gxl', group='xml', filename=filename)
    
    # test - new way to add labels and attributes.
 #    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
 #    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Fingerprint/Fingerprint_A.txt'
 #    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-med/Letter-med_A.txt'
 #    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/AIDS/AIDS_A.txt'
 #    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
 #    Gn, y_all = loadDataset(dataset)
 #    dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
 #    dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
 #    dataset = '../../datasets/Letter-med/Letter-med_A.txt'
 #    dataset = '../../datasets/AIDS/AIDS_A.txt'
 #    dataset = '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
 #    Gn, y_all = loadDataset(dataset)
    pass
--- a/gklearn/utils/model_selection_precomputed.py
+++ b/gklearn/utils/model_selection_precomputed.py
@@ -11,7 +11,6 @@ from sklearn.model_selection import KFold, train_test_split, ParameterGrid
 from multiprocessing import Pool, Array
 from functools import partial
 import sys
 sys.path.insert(0, "../")
 import os
 import time
 import datetime
@@ -74,8 +73,6 @@ def model_selection_for_precomputed_kernel(datafile,
    Examples
    --------
    >>> import numpy as np
    >>> import sys
    >>> sys.path.insert(0, "../")
    >>> from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel
    >>> from gklearn.kernels.untilHPathKernel import untilhpathkernel
    >>>
--- a/notebooks/utils/plot_all_graphs.ipynb
+++ b/notebooks/utils/plot_all_graphs.ipynb
@@ -46466,7 +46466,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
--- a/notebooks/utils/plot_all_graphs.py
+++ b/notebooks/utils/plot_all_graphs.py
@@ -7,10 +7,6 @@ Created on Tue Jan  7 15:25:36 2020
 """

 # draw all the praphs
 import sys
 import pathlib
 sys.path.insert(0, "../../")

 import matplotlib.pyplot as plt

 import networkx as nx
@@ -19,6 +15,58 @@ from gklearn.utils.graphfiles import loadDataset, loadGXL


 def main(): 
    # MUTAG dataset.
    dataset, y = loadDataset("../../datasets/MUTAG/MUTAG_A.txt")
    for idx in [65]:#[6]:
        G = dataset[idx]
        for node in G.nodes:
            if G.nodes[node]['atom'] == '0':
                G.nodes[node]['atom'] = 'C'
            elif G.nodes[node]['atom'] == '1':
                G.nodes[node]['atom'] = 'N'
            elif G.nodes[node]['atom'] == '2':
                G.nodes[node]['atom'] = 'O'
            elif G.nodes[node]['atom'] == '3':
                G.nodes[node]['atom'] = 'F'
            elif G.nodes[node]['atom'] == '4':
                G.nodes[node]['atom'] = 'I'
            elif G.nodes[node]['atom'] == '5':
                G.nodes[node]['atom'] = 'Cl'
            elif G.nodes[node]['atom'] == '6':
                G.nodes[node]['atom'] = 'Br'
        ecolors = []
        for edge in G.edges:
            if G.edges[edge]['bond_type'] == '0':
                ecolors.append('orange')
            elif G.edges[edge]['bond_type'] == '1':
                ecolors.append('r')
            elif G.edges[edge]['bond_type'] == '2':
                ecolors.append('purple')
            elif G.edges[edge]['bond_type'] == '3':
                ecolors.append('orange')

        print(idx)
        print(nx.get_node_attributes(G, 'atom'))
        edge_labels = nx.get_edge_attributes(G, 'bond_type')
        print(edge_labels)
        pos=nx.spring_layout(G)
        nx.draw(G, 
                pos,
                node_size=500,
                labels=nx.get_node_attributes(G, 'atom'), 
                node_color='blue', 
                font_color='w', 
                edge_color=ecolors,
                width=3,
                with_labels=True)
 #        edge_labels = nx.draw_networkx_edge_labels(G, pos, 
 #                                                   edge_labels=edge_labels,
 #                                                   font_color='pink')
        plt.savefig('mol1_graph.svg', format='svg', dpi=300)
        plt.show()
        plt.clf()
    
    
 #    # monoterpenoides dataset.
 #    dataset, y = loadDataset("../../datasets/monoterpenoides/dataset_10+.ds")
 #    for idx in [12,22,29,74]:
@@ -67,35 +115,35 @@ def main():
 #        draw_Fingerprint_graph(Gn[idx], file_prefix='')
    
    
    # SYNTHETIC dataset.
    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
    Gn, y_all = loadDataset(dataset)
    
    idx_no_node = []
    idx_no_edge = []
    idx_no_both = []
    for idx, G in enumerate(Gn):
        if nx.number_of_nodes(G) == 0:
            idx_no_node.append(idx)
            if nx.number_of_edges(G) == 0:
                idx_no_both.append(idx)
        if nx.number_of_edges(G) == 0:
            idx_no_edge.append(idx)
 #        file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name']
 #        draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True)
 #        draw_SYNTHETIC_graph(Gn[idx])
    print('nb_no_node: ', len(idx_no_node))
    print('nb_no_edge: ', len(idx_no_edge))
    print('nb_no_both: ', len(idx_no_both))
    print('idx_no_node: ', idx_no_node)
    print('idx_no_edge: ', idx_no_edge)
    print('idx_no_both: ', idx_no_both)
    
    for idx in [0, 10, 100]:
        print(idx)
        print(Gn[idx].nodes(data=True))
        print(Gn[idx].edges(data=True))
        draw_SYNTHETIC_graph(Gn[idx], save=None)
 #    # SYNTHETIC dataset.
 #    dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
 #    Gn, y_all = loadDataset(dataset)
 #    
 #    idx_no_node = []
 #    idx_no_edge = []
 #    idx_no_both = []
 #    for idx, G in enumerate(Gn):
 #        if nx.number_of_nodes(G) == 0:
 #            idx_no_node.append(idx)
 #            if nx.number_of_edges(G) == 0:
 #                idx_no_both.append(idx)
 #        if nx.number_of_edges(G) == 0:
 #            idx_no_edge.append(idx)
 ##        file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name']
 ##        draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True)
 ##        draw_SYNTHETIC_graph(Gn[idx])
 #    print('nb_no_node: ', len(idx_no_node))
 #    print('nb_no_edge: ', len(idx_no_edge))
 #    print('nb_no_both: ', len(idx_no_both))
 #    print('idx_no_node: ', idx_no_node)
 #    print('idx_no_edge: ', idx_no_edge)
 #    print('idx_no_both: ', idx_no_both)
 #    
 #    for idx in [0, 10, 100]:
 #        print(idx)
 #        print(Gn[idx].nodes(data=True))
 #        print(Gn[idx].edges(data=True))
 #        draw_SYNTHETIC_graph(Gn[idx], save=None)
        
        
 def plot_a_graph(graph_filename):