Browse Source

update datasets used in the tests.

v0.1
jajupmochi 5 years ago
parent
commit
1d49a75040
45 changed files with 89563 additions and 307 deletions
  1. +2
    -0
      .coveragerc
  2. +2
    -0
      .gitignore
  3. +1
    -1
      .travis.yml
  4. +23922
    -0
      datasets/Cuneiform/Cuneiform_A.txt
  5. +23922
    -0
      datasets/Cuneiform/Cuneiform_edge_attributes.txt
  6. +23922
    -0
      datasets/Cuneiform/Cuneiform_edge_labels.txt
  7. +5680
    -0
      datasets/Cuneiform/Cuneiform_graph_indicator.txt
  8. +267
    -0
      datasets/Cuneiform/Cuneiform_graph_labels.txt
  9. +5680
    -0
      datasets/Cuneiform/Cuneiform_node_attributes.txt
  10. +5680
    -0
      datasets/Cuneiform/Cuneiform_node_labels.txt
  11. +119
    -0
      datasets/Cuneiform/README.txt
  12. +0
    -1
      gklearn/kernels/commonWalkKernel.py
  13. +0
    -1
      gklearn/kernels/marginalizedKernel.py
  14. +0
    -2
      gklearn/kernels/randomWalkKernel.py
  15. +0
    -1
      gklearn/kernels/spKernel.py
  16. +0
    -2
      gklearn/kernels/structuralspKernel.py
  17. +0
    -1
      gklearn/kernels/treeletKernel.py
  18. +0
    -1
      gklearn/kernels/untilHPathKernel.py
  19. +0
    -1
      gklearn/kernels/weisfeilerLehmanKernel.py
  20. +1
    -3
      gklearn/preimage/find_best_k.py
  21. +3
    -4
      gklearn/preimage/fitDistance.py
  22. +2
    -4
      gklearn/preimage/ged.py
  23. +7
    -9
      gklearn/preimage/iam.py
  24. +5
    -7
      gklearn/preimage/knn.py
  25. +1
    -1
      gklearn/preimage/libs.py
  26. +1
    -4
      gklearn/preimage/preimage_random.py
  27. +0
    -2
      gklearn/preimage/test.py
  28. +5
    -7
      gklearn/preimage/test_fitDistance.py
  29. +22
    -33
      gklearn/preimage/test_ged.py
  30. +4
    -7
      gklearn/preimage/test_iam.py
  31. +7
    -8
      gklearn/preimage/test_k_closest_graphs.py
  32. +3
    -5
      gklearn/preimage/test_others.py
  33. +3
    -6
      gklearn/preimage/test_preimage_iam.py
  34. +3
    -6
      gklearn/preimage/test_preimage_mix.py
  35. +3
    -7
      gklearn/preimage/test_preimage_random.py
  36. +5
    -6
      gklearn/preimage/utils.py
  37. +18
    -19
      gklearn/preimage/visualization.py
  38. +127
    -84
      gklearn/preimage/xp_fit_method.py
  39. +11
    -13
      gklearn/preimage/xp_letter_h.py
  40. +9
    -9
      gklearn/preimage/xp_monoterpenoides.py
  41. +37
    -5
      gklearn/tests/test_graphkernels.py
  42. +7
    -10
      gklearn/utils/graphfiles.py
  43. +0
    -3
      gklearn/utils/model_selection_precomputed.py
  44. +1
    -1
      notebooks/utils/plot_all_graphs.ipynb
  45. +81
    -33
      notebooks/utils/plot_all_graphs.py

+ 2
- 0
.coveragerc View File

@@ -0,0 +1,2 @@
[run]
omit = gklearn/tests/*

+ 2
- 0
.gitignore View File

@@ -15,6 +15,7 @@ datasets/*
!datasets/AIDS/
!datasets/monoterpenoides/
!datasets/Fingerprint/*.txt
!datasets/Cuneiform/*.txt
notebooks/results/*
notebooks/check_gm/*
notebooks/test_parallel/*
@@ -41,3 +42,4 @@ dist/
build/

.coverage
htmlcov

+ 1
- 1
.travis.yml View File

@@ -22,7 +22,7 @@ install:

script:
- python setup.py bdist_wheel
- pytest -v --cov-report term --cov=gklearn gklearn/tests/
- pytest -v --cov-config=.coveragerc --cov-report term --cov=gklearn gklearn/tests/

after_success:
- codecov

+ 23922
- 0
datasets/Cuneiform/Cuneiform_A.txt
File diff suppressed because it is too large
View File


+ 23922
- 0
datasets/Cuneiform/Cuneiform_edge_attributes.txt
File diff suppressed because it is too large
View File


+ 23922
- 0
datasets/Cuneiform/Cuneiform_edge_labels.txt
File diff suppressed because it is too large
View File


+ 5680
- 0
datasets/Cuneiform/Cuneiform_graph_indicator.txt
File diff suppressed because it is too large
View File


+ 267
- 0
datasets/Cuneiform/Cuneiform_graph_labels.txt View File

@@ -0,0 +1,267 @@
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

+ 5680
- 0
datasets/Cuneiform/Cuneiform_node_attributes.txt
File diff suppressed because it is too large
View File


+ 5680
- 0
datasets/Cuneiform/Cuneiform_node_labels.txt
File diff suppressed because it is too large
View File


+ 119
- 0
datasets/Cuneiform/README.txt View File

@@ -0,0 +1,119 @@
README for dataset Cuneiform


=== Usage ===

This folder contains the following comma separated text files
(replace DS by the name of the dataset):

n = total number of nodes
m = total number of edges
N = number of graphs

(1) DS_A.txt (m lines)
sparse (block diagonal) adjacency matrix for all graphs,
each line corresponds to (row, col) resp. (node_id, node_id)

(2) DS_graph_indicator.txt (n lines)
column vector of graph identifiers for all nodes of all graphs,
the value in the i-th line is the graph_id of the node with node_id i

(3) DS_graph_labels.txt (N lines)
class labels for all graphs in the dataset,
the value in the i-th line is the class label of the graph with graph_id i

(4) DS_node_labels.txt (n lines)
column vector of node labels,
the value in the i-th line corresponds to the node with node_id i

There are OPTIONAL files if the respective information is available:

(5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt)
labels for the edges in DS_A_sparse.txt

(6) DS_edge_attributes.txt (m lines; same size as DS_A.txt)
attributes for the edges in DS_A.txt

(7) DS_node_attributes.txt (n lines)
matrix of node attributes,
the comma seperated values in the i-th line is the attribute vector of the node with node_id i

(8) DS_graph_attributes.txt (N lines)
regression values for all graphs in the dataset,
the value in the i-th line is the attribute of the graph with graph_id i


=== Description ===

The Cuneiform dataset contains graphs representing 29 different Hittite cuneiform signs.
The data was obtained from nine cuneiform tablets written by scholars of Hittitology in
the course of a study about individualistic characteristics of cuneiform hand writing.
After automated extraction of individual wedges, the affiliation of the wedges to the
cuneiform signs were determined manually. The graph model is explained in detail in the
referenced publication.


=== References ===

Nils M. Kriege, Matthias Fey, Denis Fisseler, Petra Mutzel, Frank Weichert
Recognizing Cuneiform Signs Using Graph Based Methods. 2018. arXiv:1802.05908
https://arxiv.org/abs/1802.05908


=== Description of Labels ===

Node labels were converted to integer values using this map:

Component 0:
0 depthPoint
1 tailVertex
2 leftVertex
3 rightVertex

Component 1:
0 vertical
1 Winkelhaken
2 horizontal



Edge labels were converted to integer values using this map:

Component 0:
0 wedge
1 arrangement



Class labels were converted to integer values using this map:

0 tu
1 ta
2 ti
3 nu
4 na
5 ni
6 bu
7 ba
8 bi
9 zu
10 za
11 zi
12 su
13 sa
14 si
15 hu
16 ha
17 hi
18 du
19 da
20 di
21 ru
22 ra
23 ri
24 ku
25 ka
26 ki
27 lu
28 la
29 li

+ 0
- 1
gklearn/kernels/commonWalkKernel.py View File

@@ -16,7 +16,6 @@ from functools import partial
import networkx as nx
import numpy as np

sys.path.insert(0, "../")
from gklearn.utils.utils import direct_product
from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm


+ 0
- 1
gklearn/kernels/marginalizedKernel.py View File

@@ -28,7 +28,6 @@ from gklearn.utils.kernels import deltakernel
from gklearn.utils.utils import untotterTransformation
from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm
sys.path.insert(0, "../")


def marginalizedkernel(*args,


+ 0
- 2
gklearn/kernels/randomWalkKernel.py View File

@@ -6,8 +6,6 @@
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
"""

import sys
sys.path.insert(0, "../")
import time
from functools import partial
from tqdm import tqdm


+ 0
- 1
gklearn/kernels/spKernel.py View File

@@ -20,7 +20,6 @@ import numpy as np
from gklearn.utils.utils import getSPGraph
from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm
sys.path.insert(0, "../")

def spkernel(*args,
node_label='atom',


+ 0
- 2
gklearn/kernels/structuralspKernel.py View File

@@ -25,8 +25,6 @@ from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm
from gklearn.utils.trie import Trie

sys.path.insert(0, "../")


def structuralspkernel(*args,
node_label='atom',


+ 0
- 1
gklearn/kernels/treeletKernel.py View File

@@ -8,7 +8,6 @@
"""

import sys
sys.path.insert(0, "../")
import time
from collections import Counter
from itertools import chain


+ 0
- 1
gklearn/kernels/untilHPathKernel.py View File

@@ -9,7 +9,6 @@
"""

import sys
sys.path.insert(0, "../")
import time
from collections import Counter
from itertools import chain


+ 0
- 1
gklearn/kernels/weisfeilerLehmanKernel.py View File

@@ -10,7 +10,6 @@

import sys
from collections import Counter
sys.path.insert(0, "../")
from functools import partial
import time
#from multiprocessing import Pool


+ 1
- 3
gklearn/preimage/find_best_k.py View File

@@ -9,10 +9,8 @@ import numpy as np
import random
import csv

import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from preimage.test_k_closest_graphs import median_on_k_closest_graphs
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs

def find_best_k():
ds = {'name': 'monoterpenoides',


+ 3
- 4
gklearn/preimage/fitDistance.py View File

@@ -13,15 +13,14 @@ from multiprocessing import Pool
from functools import partial
import time
import random
import sys

from scipy import optimize
from scipy.optimize import minimize
import cvxpy as cp

import sys
sys.path.insert(0, "../")
from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
from preimage.utils import kernel_distance_matrix
from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
from gklearn.preimage.utils import kernel_distance_matrix

def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT',


+ 2
- 4
gklearn/preimage/ged.py View File

@@ -128,12 +128,10 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method
elif lib == 'gedlib-bash':
import time
import random
import sys
import os
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import saveDataset
tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/'
tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
@@ -144,7 +142,7 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
command += 'export LD_LIBRARY_PATH\n'
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n'
command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
command += './ged_for_python_bash monoterpenoides ' + fn_collection \
+ ' \'' + algo_options + '\' '
for ec in edit_cost_constant:


+ 7
- 9
gklearn/preimage/iam.py View File

@@ -11,11 +11,9 @@ import random
import networkx as nx
from tqdm import tqdm

import sys
sys.path.insert(0, "../")
from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
from ged import GED, ged_median
from gklearn.preimage.ged import GED, ged_median


def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
@@ -438,7 +436,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,

def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
dataset='monoterpenoides',
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'):
graph_dir=''):
"""Compute the iam by c++ implementation (gedlib) through bash.
"""
import os
@@ -462,18 +460,18 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
fgroup.write("\n</GraphCollection>")
fgroup.close()

tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/'
tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection)
# fn_collection = tmp_dir + 'collection_for_debug'
# graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl'
# graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl'
# if dataset == 'Letter-high' or dataset == 'Fingerprint':
# dataset = 'letter'
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
command += 'export LD_LIBRARY_PATH\n'
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n'
command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
+ ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' '
if edit_cost_constant is None:
@@ -489,8 +487,8 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
sod_sm = float(output[0].strip())
sod_gm = float(output[1].strip())
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
return sod_sm, sod_gm, fname_sm, fname_gm



+ 5
- 7
gklearn/preimage/knn.py View File

@@ -11,14 +11,12 @@ from tqdm import tqdm
import random
#import csv
from shutil import copyfile
import os


import sys
sys.path.insert(0, "../")
from preimage.iam import iam_bash
from gklearn.preimage.iam import iam_bash
from gklearn.utils.graphfiles import loadDataset, loadGXL
from preimage.ged import GED
from preimage.utils import get_same_item_indices
from gklearn.preimage.ged import GED
from gklearn.preimage.utils import get_same_item_indices

def test_knn():
ds = {'name': 'monoterpenoides',
@@ -30,7 +28,7 @@ def test_knn():
# edge_label = 'bond_type'
# ds_name = 'mono'
dir_output = 'results/knn/'
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'
k_nn = 1
percent = 0.1


+ 1
- 1
gklearn/preimage/libs.py View File

@@ -2,5 +2,5 @@ import sys
import pathlib

# insert gedlibpy library.
sys.path.insert(0, "../../")
sys.path.insert(0, "../../../")
from gedlibpy import librariesImport, gedlibpy

+ 1
- 4
gklearn/preimage/preimage_random.py View File

@@ -14,10 +14,7 @@ from tqdm import tqdm
import networkx as nx
import matplotlib.pyplot as plt


sys.path.insert(0, "../")

from utils import compute_kernel, dis_gstar
from gklearn.preimage.utils import compute_kernel, dis_gstar


def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):


+ 0
- 2
gklearn/preimage/test.py View File

@@ -52,8 +52,6 @@ def convertGraph(G):


def testNxGrapĥ():
import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb


+ 5
- 7
gklearn/preimage/test_fitDistance.py View File

@@ -9,12 +9,10 @@ from matplotlib import pyplot as plt
import numpy as np
from tqdm import tqdm

import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from utils import remove_edges
from fitDistance import fit_GED_to_kernel_distance
from utils import normalize_distance_matrix
from gklearn.preimage.utils import remove_edges
from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
from gklearn.preimage.utils import normalize_distance_matrix


def test_update_costs():
@@ -63,7 +61,7 @@ def median_paper_clcpc_python_best():
y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
repeats = 50
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
graph_dir = collection_path + 'gxl/'
fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt'
@@ -160,7 +158,7 @@ def median_paper_clcpc_python_bash_cpp():
y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
repeats = 50
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
graph_dir = collection_path + 'gxl/'
fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt'


+ 22
- 33
gklearn/preimage/test_ged.py View File

@@ -14,13 +14,11 @@ import sys
def test_NON_SYMBOLIC_cost():
"""Test edit cost LETTER2.
"""
import sys
sys.path.insert(0, "../")
from preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
from preimage.test_k_closest_graphs import reform_attributes
from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
from gklearn.preimage.test_k_closest_graphs import reform_attributes
from gklearn.utils.graphfiles import loadDataset

dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt'
dataset = '../../datasets/Letter-high/Letter-high_A.txt'
Gn, y_all = loadDataset(dataset)

g1 = Gn[200]
@@ -53,14 +51,12 @@ def test_NON_SYMBOLIC_cost():
def test_LETTER2_cost():
"""Test edit cost LETTER2.
"""
import sys
sys.path.insert(0, "../")
from preimage.ged import GED, get_nb_edit_operations_letter
from preimage.test_k_closest_graphs import reform_attributes
from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
from gklearn.preimage.test_k_closest_graphs import reform_attributes
from gklearn.utils.graphfiles import loadDataset

ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])

g1 = Gn[200]
@@ -96,14 +92,12 @@ def test_get_nb_edit_operations_letter():
should be the same as the cost computed by number of operations and edit
cost constants.
"""
import sys
sys.path.insert(0, "../")
from preimage.ged import GED, get_nb_edit_operations_letter
from preimage.test_k_closest_graphs import reform_attributes
from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
from gklearn.preimage.test_k_closest_graphs import reform_attributes
from gklearn.utils.graphfiles import loadDataset

ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])

g1 = Gn[200]
@@ -136,13 +130,12 @@ def test_get_nb_edit_operations():
numbers of edit operations. The distance/cost computed by GED should be the
same as the cost computed by number of operations and edit cost constants.
"""
import sys
sys.path.insert(0, "../")
from preimage.ged import GED, get_nb_edit_operations
from gklearn.preimage.ged import GED, get_nb_edit_operations
from gklearn.utils.graphfiles import loadDataset
import os

ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb
ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'])

g1 = Gn[20]
@@ -173,11 +166,10 @@ def test_get_nb_edit_operations():
def test_ged_python_bash_cpp():
"""Test ged computation with python invoking the c++ code by bash command (with updated library).
"""
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from preimage.ged import GED
from gklearn.preimage.ged import GED

data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
# collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
@@ -233,7 +225,7 @@ def test_ged_best_settings_updated():
"""Test ged computation with best settings the same as in the C++ code (with updated library).
"""

data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
# collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'

@@ -292,7 +284,7 @@ def test_ged_best_settings():
"""Test ged computation with best settings the same as in the C++ code.
"""

data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'

@@ -350,7 +342,7 @@ def test_ged_default():
"""Test ged computation with default settings.
"""

data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'

@@ -404,11 +396,10 @@ def test_ged_default():
def test_ged_min():
"""Test ged computation with the "min" stabilizer.
"""
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from preimage.ged import GED
from gklearn.preimage.ged import GED

data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/'
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'

@@ -487,8 +478,6 @@ def convertGraph(G):


def testNxGrapĥ():
import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb


+ 4
- 7
gklearn/preimage/test_iam.py View File

@@ -13,14 +13,11 @@ import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
#from gklearn.utils.logger2file import *
from iam import iam_upgraded
from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
#from ged import ged_median
from gklearn.preimage.iam import iam_upgraded
from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
#from gklearn.preimage.ged import ged_median


def test_iam_monoterpenoides_with_init40():
@@ -52,7 +49,7 @@ def test_iam_monoterpenoides_with_init40():
'stabilizer': ged_stabilizer}

collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
graph_dir = collection_path + 'gxl/'
y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
repeats = 50


+ 7
- 8
gklearn/preimage/test_k_closest_graphs.py View File

@@ -17,15 +17,12 @@ import multiprocessing
from multiprocessing import Pool
from functools import partial

#import os
import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset, loadGXL
#from gklearn.utils.logger2file import *
from iam import iam_upgraded, iam_bash
from utils import compute_kernel, dis_gstar, kernel_distance_matrix
from fitDistance import fit_GED_to_kernel_distance
#from ged import ged_median
from gklearn.preimage.iam import iam_upgraded, iam_bash
from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix
from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
#from gklearn.preimage.ged import ged_median


def fit_edit_cost_constants(fit_method, edit_cost_name,
@@ -204,6 +201,8 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho
if Kmatrix is not None:
Kmatrix_median = np.copy(Kmatrix[group_min,:])
Kmatrix_median = Kmatrix_median[:,group_min]
else:
Kmatrix_median = None

# 1. fit edit cost constants.
@@ -379,7 +378,7 @@ def test_k_closest_graphs_with_cv():
y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
repeats = 50
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
graph_dir = collection_path + 'gxl/'
sod_sm_list = []


+ 3
- 5
gklearn/preimage/test_others.py View File

@@ -11,12 +11,10 @@ import matplotlib.pyplot as plt
import time
from tqdm import tqdm

import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from median import draw_Letter_graph
from ged import GED, ged_median
from utils import get_same_item_indices, compute_kernel, gram2distances, \
from gklearn.preimage.median import draw_Letter_graph
from gklearn.preimage.ged import GED, ged_median
from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \
dis_gstar, remove_edges




+ 3
- 6
gklearn/preimage/test_preimage_iam.py View File

@@ -13,14 +13,11 @@ import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from utils import remove_edges, compute_kernel, get_same_item_indices
from ged import ged_median
from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices
from gklearn.preimage.ged import ged_median

from preimage_iam import preimage_iam
from gklearn.preimage.preimage_iam import preimage_iam


###############################################################################


+ 3
- 6
gklearn/preimage/test_preimage_mix.py View File

@@ -13,13 +13,10 @@ import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset
from ged import ged_median
from utils import compute_kernel, get_same_item_indices, remove_edges
from preimage_iam import preimage_iam_random_mix
from gklearn.preimage.ged import ged_median
from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
from gklearn.preimage.preimage_iam import preimage_iam_random_mix

###############################################################################
# tests on different values on grid of median-sets and k.


+ 3
- 7
gklearn/preimage/test_preimage_random.py View File

@@ -13,14 +13,10 @@ import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset

from preimage_random import preimage_random
from ged import ged_median
from utils import compute_kernel, get_same_item_indices, remove_edges
from gklearn.preimage.preimage_random import preimage_random
from gklearn.preimage.ged import ged_median
from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges


###############################################################################


+ 5
- 6
gklearn/preimage/utils.py View File

@@ -11,8 +11,6 @@ Useful functions.
import multiprocessing
import numpy as np

import sys
sys.path.insert(0, "../")
from gklearn.kernels.marginalizedKernel import marginalizedkernel
from gklearn.kernels.untilHPathKernel import untilhpathkernel
from gklearn.kernels.spKernel import spkernel
@@ -41,7 +39,7 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
return np.sqrt(term1 - term2 + term3)


def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'):
if graph_kernel == 'marginalizedkernel':
Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
p_quit=0.03, n_iteration=10, remove_totters=False,
@@ -49,6 +47,7 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
elif graph_kernel == 'untilhpathkernel':
Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
depth=7, k_func='MinMax', compute_method='trie',
parallel=parallel,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
@@ -66,18 +65,18 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
Kmatrix, _ = structuralspkernel(Gn, node_label=node_label,
edge_label=edge_label, node_kernels=sub_kernels,
edge_kernels=sub_kernels,
parallel=None, n_jobs=multiprocessing.cpu_count(),
parallel=parallel, n_jobs=multiprocessing.cpu_count(),
verbose=verbose)
elif graph_kernel == 'treeletkernel':
pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
# pkernel = functools.partial(gaussiankernel, gamma=1e-6)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
sub_kernel=pkernel,
sub_kernel=pkernel, parallel=parallel,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'weisfeilerlehmankernel':
Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
height=4, base_kernel='subtree',
height=4, base_kernel='subtree', parallel=None,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
# normalization


+ 18
- 19
gklearn/preimage/visualization.py View File

@@ -11,11 +11,8 @@ import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
from tqdm import tqdm


import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset, loadGXL
from utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices
from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices


def visualize_graph_dataset(dis_measure, visual_method, draw_figure,
@@ -115,11 +112,11 @@ def visualize_distances_in_kernel():
# Gn = Gn[0:50]
fname_medians = 'expert.treelet'
# add set median.
fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
set_median = loadGXL(fname_sm)
Gn.append(set_median)
# add generalized median (estimated pre-image.)
fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
gen_median = loadGXL(fname_gm)
Gn.append(gen_median)
@@ -166,19 +163,19 @@ def visualize_distances_in_kernel():
def visualize_distances_in_ged():
from fitDistance import compute_geds
from ged import GED
from gklearn.preimage.fitDistance import compute_geds
from gklearn.preimage.ged import GED
ds = {'name': 'monoterpenoides',
'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'])
# Gn = Gn[0:50]
# add set median.
fname_medians = 'expert.treelet'
fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
set_median = loadGXL(fname_sm)
Gn.append(set_median)
# add generalized median (estimated pre-image.)
fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
gen_median = loadGXL(fname_gm)
Gn.append(gen_median)
@@ -227,9 +224,10 @@ def visualize_distances_in_ged():
def visualize_distances_in_kernel_monoterpenoides():
import os

ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb
Gn_original, y_all = loadDataset(ds['dataset'])
# Gn = Gn[0:50]
@@ -301,11 +299,12 @@ def visualize_distances_in_kernel_monoterpenoides():
def visualize_distances_in_ged_monoterpenoides():
from fitDistance import compute_geds
from ged import GED
from gklearn.preimage.fitDistance import compute_geds
from gklearn.preimage.ged import GED
import os
ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb
Gn_original, y_all = loadDataset(ds['dataset'])
# Gn = Gn[0:50]
@@ -379,8 +378,8 @@ def visualize_distances_in_ged_monoterpenoides():
def visualize_distances_in_kernel_letter_h():
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
# Gn = Gn[0:50]
@@ -455,8 +454,8 @@ def visualize_distances_in_ged_letter_h():
from fitDistance import compute_geds
from preimage.test_k_closest_graphs import reform_attributes
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
# Gn = Gn[0:50]


+ 127
- 84
gklearn/preimage/xp_fit_method.py View File

@@ -11,35 +11,37 @@ import csv
from shutil import copyfile
import networkx as nx
import matplotlib.pyplot as plt
import os

import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
from preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
from preimage.find_best_k import getRelations
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
from gklearn.preimage.find_best_k import getRelations


def get_dataset(ds_name):
if ds_name == 'Letter-high': # node non-symb
dataset = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml'
graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'
dataset = 'cpp_ext/data/collections/Letter.xml'
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
for G in Gn:
reform_attributes(G)
elif ds_name == 'Fingerprint':
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/collections/Fingerprint.xml'
graph_dir = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/datasets/Fingerprint/data/'
Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
for G in Gn:
reform_attributes(G)
# dataset = 'cpp_ext/data/collections/Fingerprint.xml'
# graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
# Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
# for G in Gn:
# reform_attributes(G)
dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
Gn, y_all = loadDataset(dataset)
elif ds_name == 'SYNTHETIC':
pass
elif ds_name == 'SYNTHETICnew':
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew'
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt'
# graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'
dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew'
# dataset = '../../datasets/Letter-high/Letter-high_A.txt'
# graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
Gn, y_all = loadDataset(dataset)
elif ds_name == 'Synthie':
pass
@@ -184,6 +186,8 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti
if Kmatrix is not None:
Kmatrix_sub = Kmatrix[values,:]
Kmatrix_sub = Kmatrix_sub[:,values]
else:
Kmatrix_sub = None
for repeat in range(repeats):
print('\nrepeat =', repeat)
@@ -273,11 +277,11 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti
nb_dis_k_gi2gm[2] += 1
# save median graphs.
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
copyfile(fname_sm, fn_pre_sm_new + '.gxl')
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
copyfile(fname_gm, fn_pre_gm_new + '.gxl')
@@ -427,63 +431,101 @@ if __name__ == "__main__":
# initial_solutions=40,
# Gn_data = [Gn, y_all, graph_dir],
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
# #### xp 3: Fingerprint, sspkernel, using LETTER2.
# #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
# gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz')
# Kmatrix = gmfile['Kmatrix']
# run_time = gmfile['run_time']
# # normalization
# Kmatrix_diag = Kmatrix.diagonal().copy()
# for i in range(len(Kmatrix)):
# for j in range(i, len(Kmatrix)):
# Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
# Kmatrix[j][i] = Kmatrix[i][j]
## np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
## Kmatrix=Kmatrix, run_time=run_time)
# # load dataset.
# print('getting dataset and computing kernel distance matrix first...')
# ds_name = 'Fingerprint'
# ds_name = 'SYNTHETICnew'
# gkernel = 'structuralspkernel'
# Gn, y_all, graph_dir = get_dataset(ds_name)
# # remove graphs without nodes and edges.
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
# and nx.number_of_edges(G) != 0)]
# idx = [G[0] for G in Gn]
# Gn = [G[1] for G in Gn]
# y_all = [y_all[i] for i in idx]
## Gn = Gn[0:50]
## y_all = y_all[0:50]
## Gn = Gn[0:10]
## y_all = y_all[0:10]
# for G in Gn:
# G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
# # compute pair distances.
## dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
## Kmatrix=None, gkernel=gkernel, verbose=True)
# dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
# # fitting and computing.
# fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
# fit_methods = ['k-graphs', 'random', 'random', 'random']
# for fit_method in fit_methods:
# print('\n-------------------------------------')
# print('fit method:', fit_method)
# parameters = {'ds_name': ds_name,
# 'gkernel': gkernel,
# 'edit_cost_name': 'LETTER2',
# 'edit_cost_name': 'NON_SYMBOLIC',
# 'ged_method': 'mIPFP',
# 'attr_distance': 'euclidean',
# 'fit_method': fit_method}
# xp_fit_method_for_non_symbolic(parameters, save_results=True,
# initial_solutions=40,
# initial_solutions=1,
# Gn_data = [Gn, y_all, graph_dir],
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
# Kmatrix=Kmatrix)
# #### xp 4: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
# ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
# gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
# Kmatrix = gmfile['Kmatrix']
# # normalization
# Kmatrix_diag = Kmatrix.diagonal().copy()
# for i in range(len(Kmatrix)):
# for j in range(i, len(Kmatrix)):
# Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
# Kmatrix[j][i] = Kmatrix[i][j]
# run_time = 21821.35
# np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
# Kmatrix=Kmatrix, run_time=run_time)
#
# # load dataset.
# print('getting dataset and computing kernel distance matrix first...')
# ds_name = 'SYNTHETICnew'
# gkernel = 'structuralspkernel'
# gkernel = 'spkernel'
# Gn, y_all, graph_dir = get_dataset(ds_name)
# # remove graphs without nodes and edges.
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0
# and nx.number_of_edges(G) != 0)]
# idx = [G[0] for G in Gn]
# Gn = [G[1] for G in Gn]
# y_all = [y_all[i] for i in idx]
# Gn = Gn[0:10]
# y_all = y_all[0:10]
## # remove graphs without nodes and edges.
## Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0
## and nx.number_of_edges(G) != 0)]
## idx = [G[0] for G in Gn]
## Gn = [G[1] for G in Gn]
## y_all = [y_all[i] for i in idx]
## Gn = Gn[0:5]
## y_all = y_all[0:5]
# for G in Gn:
# G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
# # compute pair distances.
#
# # compute/read Gram matrix and pair distances.
## Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
## np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
## Kmatrix=Kmatrix)
# gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
# Kmatrix = gmfile['Kmatrix']
# run_time = gmfile['run_time']
## Kmatrix = Kmatrix[[0,1,2,3,4],:]
## Kmatrix = Kmatrix[:,[0,1,2,3,4]]
# print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
# Kmatrix=None, gkernel=gkernel, verbose=True)
# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
## Kmatrix = np.zeros((len(Gn), len(Gn)))
## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
#
# # fitting and computing.
# fit_methods = ['k-graphs', 'random', 'random', 'random']
# for fit_method in fit_methods:
@@ -496,68 +538,69 @@ if __name__ == "__main__":
# 'attr_distance': 'euclidean',
# 'fit_method': fit_method}
# xp_fit_method_for_non_symbolic(parameters, save_results=True,
# initial_solutions=40,
# Gn_data = [Gn, y_all, graph_dir],
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
### xp 5: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
Kmatrix = gmfile['Kmatrix']
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
run_time = 21821.35
np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
Kmatrix=Kmatrix, run_time=run_time)
# initial_solutions=1,
# Gn_data=[Gn, y_all, graph_dir],
# k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
# Kmatrix=Kmatrix)
#### xp 5: Fingerprint, sspkernel, using LETTER2.
# load dataset.
print('getting dataset and computing kernel distance matrix first...')
ds_name = 'SYNTHETICnew'
gkernel = 'spkernel'
ds_name = 'Fingerprint'
gkernel = 'structuralspkernel'
Gn, y_all, graph_dir = get_dataset(ds_name)
# # remove graphs without nodes and edges.
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0
# remove graphs without nodes and edges.
Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0)]
# and nx.number_of_edges(G) != 0)]
# idx = [G[0] for G in Gn]
# Gn = [G[1] for G in Gn]
# y_all = [y_all[i] for i in idx]
# Gn = Gn[0:5]
# y_all = y_all[0:5]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
y_all = [y_all[i] for i in idx]
y_idx = get_same_item_indices(y_all)
# remove unused labels.
for G in Gn:
G.graph['edge_attrs'] = []
for edge in G.edges:
del G.edges[edge]['attributes']
del G.edges[edge]['orient']
del G.edges[edge]['angle']
Gn = Gn[805:815]
y_all = y_all[805:815]
for G in Gn:
G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
# compute/read Gram matrix and pair distances.
# Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
# np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
# Kmatrix=Kmatrix)
gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
Kmatrix = gmfile['Kmatrix']
run_time = gmfile['run_time']
Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm',
Kmatrix=Kmatrix)
# gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
# Kmatrix = gmfile['Kmatrix']
# run_time = gmfile['run_time']
# Kmatrix = Kmatrix[[0,1,2,3,4],:]
# Kmatrix = Kmatrix[:,[0,1,2,3,4]]
print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
# print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
# Kmatrix = np.zeros((len(Gn), len(Gn)))
# dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
# compute pair distances.
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None,
# Kmatrix=None, gkernel=gkernel, verbose=True)
# dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
# fitting and computing.
fit_methods = ['k-graphs', 'random', 'random', 'random']
fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
for fit_method in fit_methods:
print('\n-------------------------------------')
print('fit method:', fit_method)
parameters = {'ds_name': ds_name,
'gkernel': gkernel,
'edit_cost_name': 'NON_SYMBOLIC',
'edit_cost_name': 'LETTER2',
'ged_method': 'mIPFP',
'attr_distance': 'euclidean',
'fit_method': fit_method}
xp_fit_method_for_non_symbolic(parameters, save_results=True,
initial_solutions=1,
Gn_data=[Gn, y_all, graph_dir],
k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
initial_solutions=40,
Gn_data = [Gn, y_all, graph_dir],
k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
Kmatrix=Kmatrix)

+ 11
- 13
gklearn/preimage/xp_letter_h.py View File

@@ -12,17 +12,15 @@ from shutil import copyfile
import networkx as nx
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
from preimage.utils import get_same_item_indices, kernel_distance_matrix
from preimage.find_best_k import getRelations
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix
from gklearn.preimage.find_best_k import getRelations


def xp_letter_h_LETTER2_cost():
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel')
@@ -177,11 +175,11 @@ def xp_letter_h_LETTER2_cost():
nb_dis_k_gi2gm[2] += 1
# save median graphs.
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_sm, fn_pre_sm_new + '.gxl')
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_gm, fn_pre_gm_new + '.gxl')
@@ -243,8 +241,8 @@ def xp_letter_h_LETTER2_cost():


def xp_letter_h():
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
for G in Gn:
reform_attributes(G)
@@ -396,11 +394,11 @@ def xp_letter_h():
nb_dis_k_gi2gm[2] += 1
# save median graphs.
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_sm, fn_pre_sm_new + '.gxl')
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_gm, fn_pre_gm_new + '.gxl')


+ 9
- 9
gklearn/preimage/xp_monoterpenoides.py View File

@@ -13,16 +13,16 @@ from shutil import copyfile
import networkx as nx
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "../")
from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
from preimage.utils import get_same_item_indices
from preimage.find_best_k import getRelations
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
from gklearn.preimage.utils import get_same_item_indices
from gklearn.preimage.find_best_k import getRelations

def xp_monoterpenoides():
ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb
import os

ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'])
# ds = {'name': 'Letter-high',
# 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb
@@ -169,11 +169,11 @@ def xp_monoterpenoides():
nb_dis_k_gi2gm[2] += 1
# save median graphs.
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
copyfile(fname_sm, fn_pre_sm_new + '.gxl')
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
copyfile(fname_gm, fn_pre_gm_new + '.gxl')


+ 37
- 5
gklearn/tests/test_graphkernels.py View File

@@ -15,10 +15,16 @@ def chooseDataset(ds_name):
ds_file = 'datasets/Alkane/dataset.ds'
ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt'
Gn, y = loadDataset(ds_file, filename_y=ds_y)
for G in Gn:
for node in G.nodes:
del G.nodes[node]['attributes']
# node symbolic labels.
elif ds_name == 'Acyclic':
ds_file = 'datasets/acyclic/dataset_bps.ds'
Gn, y = loadDataset(ds_file)
for G in Gn:
for node in G.nodes:
del G.nodes[node]['attributes']
# node non-symbolic labels.
elif ds_name == 'Letter-med':
ds_file = 'datasets/Letter-med/Letter-med_A.txt'
@@ -27,14 +33,39 @@ def chooseDataset(ds_name):
elif ds_name == 'AIDS':
ds_file = 'datasets/AIDS/AIDS_A.txt'
Gn, y = loadDataset(ds_file)
# edge non-symbolic labels (no node labels).
elif ds_name == 'Fingerprint_edge':
import networkx as nx
ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
Gn, y = loadDataset(ds_file)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
y = [y[i] for i in idx]
for G in Gn:
G.graph['node_attrs'] = []
for node in G.nodes:
del G.nodes[node]['attributes']
del G.nodes[node]['x']
del G.nodes[node]['y']
# edge non-symbolic labels (and node non-symbolic labels).
elif ds_name == 'Fingerprint':
import networkx as nx
ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
Gn, y = loadDataset(ds_file)
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
idx = [G[0] for G in Gn]
Gn = [G[1] for G in Gn]
y = [y[i] for i in idx]
# edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels).
elif ds_name == 'Cuneiform':
import networkx as nx
ds_file = 'datasets/Cuneiform/Cuneiform_A.txt'
Gn, y = loadDataset(ds_file)
Gn = Gn[0:10]
y = y[0:10]
Gn = Gn[0:3]
y = y[0:3]
return Gn, y

@@ -152,7 +183,7 @@ def test_spkernel(ds_name, parallel):


#@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS'])
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_structuralspkernel(ds_name, parallel):
"""Test structural shortest path kernel.
@@ -246,4 +277,5 @@ def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel):

if __name__ == "__main__":
test_spkernel()
# test_spkernel('Alkane', 'imap_unordered')
test_structuralspkernel('Fingerprint_edge', 'imap_unordered')

+ 7
- 10
gklearn/utils/graphfiles.py View File

@@ -753,15 +753,12 @@ if __name__ == '__main__':
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
# Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# saveDataset(Gn, y, group='xml', filename='temp/temp')
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
Gn, y_all = loadDataset(dataset)
filename = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew/SYNTHETICnew'
saveDataset(Gn, y_all, gformat='gxl', group='xml', filename=filename)
# test - new way to add labels and attributes.
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Fingerprint/Fingerprint_A.txt'
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-med/Letter-med_A.txt'
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/AIDS/AIDS_A.txt'
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
# Gn, y_all = loadDataset(dataset)
# dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
# dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
# dataset = '../../datasets/Letter-med/Letter-med_A.txt'
# dataset = '../../datasets/AIDS/AIDS_A.txt'
# dataset = '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
# Gn, y_all = loadDataset(dataset)
pass

+ 0
- 3
gklearn/utils/model_selection_precomputed.py View File

@@ -11,7 +11,6 @@ from sklearn.model_selection import KFold, train_test_split, ParameterGrid
from multiprocessing import Pool, Array
from functools import partial
import sys
sys.path.insert(0, "../")
import os
import time
import datetime
@@ -74,8 +73,6 @@ def model_selection_for_precomputed_kernel(datafile,
Examples
--------
>>> import numpy as np
>>> import sys
>>> sys.path.insert(0, "../")
>>> from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel
>>> from gklearn.kernels.untilHPathKernel import untilhpathkernel
>>>


+ 1
- 1
notebooks/utils/plot_all_graphs.ipynb View File

@@ -46466,7 +46466,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.6.9"
}
},
"nbformat": 4,


+ 81
- 33
notebooks/utils/plot_all_graphs.py View File

@@ -7,10 +7,6 @@ Created on Tue Jan 7 15:25:36 2020
"""

# draw all the praphs
import sys
import pathlib
sys.path.insert(0, "../../")

import matplotlib.pyplot as plt

import networkx as nx
@@ -19,6 +15,58 @@ from gklearn.utils.graphfiles import loadDataset, loadGXL


def main():
# MUTAG dataset.
dataset, y = loadDataset("../../datasets/MUTAG/MUTAG_A.txt")
for idx in [65]:#[6]:
G = dataset[idx]
for node in G.nodes:
if G.nodes[node]['atom'] == '0':
G.nodes[node]['atom'] = 'C'
elif G.nodes[node]['atom'] == '1':
G.nodes[node]['atom'] = 'N'
elif G.nodes[node]['atom'] == '2':
G.nodes[node]['atom'] = 'O'
elif G.nodes[node]['atom'] == '3':
G.nodes[node]['atom'] = 'F'
elif G.nodes[node]['atom'] == '4':
G.nodes[node]['atom'] = 'I'
elif G.nodes[node]['atom'] == '5':
G.nodes[node]['atom'] = 'Cl'
elif G.nodes[node]['atom'] == '6':
G.nodes[node]['atom'] = 'Br'
ecolors = []
for edge in G.edges:
if G.edges[edge]['bond_type'] == '0':
ecolors.append('orange')
elif G.edges[edge]['bond_type'] == '1':
ecolors.append('r')
elif G.edges[edge]['bond_type'] == '2':
ecolors.append('purple')
elif G.edges[edge]['bond_type'] == '3':
ecolors.append('orange')

print(idx)
print(nx.get_node_attributes(G, 'atom'))
edge_labels = nx.get_edge_attributes(G, 'bond_type')
print(edge_labels)
pos=nx.spring_layout(G)
nx.draw(G,
pos,
node_size=500,
labels=nx.get_node_attributes(G, 'atom'),
node_color='blue',
font_color='w',
edge_color=ecolors,
width=3,
with_labels=True)
# edge_labels = nx.draw_networkx_edge_labels(G, pos,
# edge_labels=edge_labels,
# font_color='pink')
plt.savefig('mol1_graph.svg', format='svg', dpi=300)
plt.show()
plt.clf()
# # monoterpenoides dataset.
# dataset, y = loadDataset("../../datasets/monoterpenoides/dataset_10+.ds")
# for idx in [12,22,29,74]:
@@ -67,35 +115,35 @@ def main():
# draw_Fingerprint_graph(Gn[idx], file_prefix='')
# SYNTHETIC dataset.
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
Gn, y_all = loadDataset(dataset)
idx_no_node = []
idx_no_edge = []
idx_no_both = []
for idx, G in enumerate(Gn):
if nx.number_of_nodes(G) == 0:
idx_no_node.append(idx)
if nx.number_of_edges(G) == 0:
idx_no_both.append(idx)
if nx.number_of_edges(G) == 0:
idx_no_edge.append(idx)
# file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name']
# draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True)
# draw_SYNTHETIC_graph(Gn[idx])
print('nb_no_node: ', len(idx_no_node))
print('nb_no_edge: ', len(idx_no_edge))
print('nb_no_both: ', len(idx_no_both))
print('idx_no_node: ', idx_no_node)
print('idx_no_edge: ', idx_no_edge)
print('idx_no_both: ', idx_no_both)
for idx in [0, 10, 100]:
print(idx)
print(Gn[idx].nodes(data=True))
print(Gn[idx].edges(data=True))
draw_SYNTHETIC_graph(Gn[idx], save=None)
# # SYNTHETIC dataset.
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
# Gn, y_all = loadDataset(dataset)
#
# idx_no_node = []
# idx_no_edge = []
# idx_no_both = []
# for idx, G in enumerate(Gn):
# if nx.number_of_nodes(G) == 0:
# idx_no_node.append(idx)
# if nx.number_of_edges(G) == 0:
# idx_no_both.append(idx)
# if nx.number_of_edges(G) == 0:
# idx_no_edge.append(idx)
## file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name']
## draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True)
## draw_SYNTHETIC_graph(Gn[idx])
# print('nb_no_node: ', len(idx_no_node))
# print('nb_no_edge: ', len(idx_no_edge))
# print('nb_no_both: ', len(idx_no_both))
# print('idx_no_node: ', idx_no_node)
# print('idx_no_edge: ', idx_no_edge)
# print('idx_no_both: ', idx_no_both)
#
# for idx in [0, 10, 100]:
# print(idx)
# print(Gn[idx].nodes(data=True))
# print(Gn[idx].edges(data=True))
# draw_SYNTHETIC_graph(Gn[idx], save=None)
def plot_a_graph(graph_filename):


Loading…
Cancel
Save