@@ -0,0 +1,2 @@ | |||
[run] | |||
omit = gklearn/tests/* |
@@ -15,6 +15,7 @@ datasets/* | |||
!datasets/AIDS/ | |||
!datasets/monoterpenoides/ | |||
!datasets/Fingerprint/*.txt | |||
!datasets/Cuneiform/*.txt | |||
notebooks/results/* | |||
notebooks/check_gm/* | |||
notebooks/test_parallel/* | |||
@@ -41,3 +42,4 @@ dist/ | |||
build/ | |||
.coverage | |||
htmlcov |
@@ -22,7 +22,7 @@ install: | |||
script: | |||
- python setup.py bdist_wheel | |||
- pytest -v --cov-report term --cov=gklearn gklearn/tests/ | |||
- pytest -v --cov-config=.coveragerc --cov-report term --cov=gklearn gklearn/tests/ | |||
after_success: | |||
- codecov |
@@ -0,0 +1,267 @@ | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 | |||
0 | |||
1 | |||
2 | |||
3 | |||
4 | |||
5 | |||
6 | |||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | |||
24 | |||
25 | |||
26 | |||
27 | |||
28 | |||
29 |
@@ -0,0 +1,119 @@ | |||
README for dataset Cuneiform | |||
=== Usage === | |||
This folder contains the following comma separated text files | |||
(replace DS by the name of the dataset): | |||
n = total number of nodes | |||
m = total number of edges | |||
N = number of graphs | |||
(1) DS_A.txt (m lines) | |||
sparse (block diagonal) adjacency matrix for all graphs, | |||
each line corresponds to (row, col) resp. (node_id, node_id) | |||
(2) DS_graph_indicator.txt (n lines) | |||
column vector of graph identifiers for all nodes of all graphs, | |||
the value in the i-th line is the graph_id of the node with node_id i | |||
(3) DS_graph_labels.txt (N lines) | |||
class labels for all graphs in the dataset, | |||
the value in the i-th line is the class label of the graph with graph_id i | |||
(4) DS_node_labels.txt (n lines) | |||
column vector of node labels, | |||
the value in the i-th line corresponds to the node with node_id i | |||
There are OPTIONAL files if the respective information is available: | |||
(5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt) | |||
labels for the edges in DS_A_sparse.txt | |||
(6) DS_edge_attributes.txt (m lines; same size as DS_A.txt) | |||
attributes for the edges in DS_A.txt | |||
(7) DS_node_attributes.txt (n lines) | |||
matrix of node attributes, | |||
the comma seperated values in the i-th line is the attribute vector of the node with node_id i | |||
(8) DS_graph_attributes.txt (N lines) | |||
regression values for all graphs in the dataset, | |||
the value in the i-th line is the attribute of the graph with graph_id i | |||
=== Description === | |||
The Cuneiform dataset contains graphs representing 29 different Hittite cuneiform signs. | |||
The data was obtained from nine cuneiform tablets written by scholars of Hittitology in | |||
the course of a study about individualistic characteristics of cuneiform hand writing. | |||
After automated extraction of individual wedges, the affiliation of the wedges to the | |||
cuneiform signs were determined manually. The graph model is explained in detail in the | |||
referenced publication. | |||
=== References === | |||
Nils M. Kriege, Matthias Fey, Denis Fisseler, Petra Mutzel, Frank Weichert | |||
Recognizing Cuneiform Signs Using Graph Based Methods. 2018. arXiv:1802.05908 | |||
https://arxiv.org/abs/1802.05908 | |||
=== Description of Labels === | |||
Node labels were converted to integer values using this map: | |||
Component 0: | |||
0 depthPoint | |||
1 tailVertex | |||
2 leftVertex | |||
3 rightVertex | |||
Component 1: | |||
0 vertical | |||
1 Winkelhaken | |||
2 horizontal | |||
Edge labels were converted to integer values using this map: | |||
Component 0: | |||
0 wedge | |||
1 arrangement | |||
Class labels were converted to integer values using this map: | |||
0 tu | |||
1 ta | |||
2 ti | |||
3 nu | |||
4 na | |||
5 ni | |||
6 bu | |||
7 ba | |||
8 bi | |||
9 zu | |||
10 za | |||
11 zi | |||
12 su | |||
13 sa | |||
14 si | |||
15 hu | |||
16 ha | |||
17 hi | |||
18 du | |||
19 da | |||
20 di | |||
21 ru | |||
22 ra | |||
23 ri | |||
24 ku | |||
25 ka | |||
26 ki | |||
27 lu | |||
28 la | |||
29 li |
@@ -16,7 +16,6 @@ from functools import partial | |||
import networkx as nx | |||
import numpy as np | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.utils import direct_product | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.parallel import parallel_gm | |||
@@ -28,7 +28,6 @@ from gklearn.utils.kernels import deltakernel | |||
from gklearn.utils.utils import untotterTransformation | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.parallel import parallel_gm | |||
sys.path.insert(0, "../") | |||
def marginalizedkernel(*args, | |||
@@ -6,8 +6,6 @@ | |||
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
import time | |||
from functools import partial | |||
from tqdm import tqdm | |||
@@ -20,7 +20,6 @@ import numpy as np | |||
from gklearn.utils.utils import getSPGraph | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.parallel import parallel_gm | |||
sys.path.insert(0, "../") | |||
def spkernel(*args, | |||
node_label='atom', | |||
@@ -25,8 +25,6 @@ from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.parallel import parallel_gm | |||
from gklearn.utils.trie import Trie | |||
sys.path.insert(0, "../") | |||
def structuralspkernel(*args, | |||
node_label='atom', | |||
@@ -8,7 +8,6 @@ | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
import time | |||
from collections import Counter | |||
from itertools import chain | |||
@@ -9,7 +9,6 @@ | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
import time | |||
from collections import Counter | |||
from itertools import chain | |||
@@ -10,7 +10,6 @@ | |||
import sys | |||
from collections import Counter | |||
sys.path.insert(0, "../") | |||
from functools import partial | |||
import time | |||
#from multiprocessing import Pool | |||
@@ -9,10 +9,8 @@ import numpy as np | |||
import random | |||
import csv | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from preimage.test_k_closest_graphs import median_on_k_closest_graphs | |||
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs | |||
def find_best_k(): | |||
ds = {'name': 'monoterpenoides', | |||
@@ -13,15 +13,14 @@ from multiprocessing import Pool | |||
from functools import partial | |||
import time | |||
import random | |||
import sys | |||
from scipy import optimize | |||
from scipy.optimize import minimize | |||
import cvxpy as cp | |||
import sys | |||
sys.path.insert(0, "../") | |||
from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic | |||
from preimage.utils import kernel_distance_matrix | |||
from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic | |||
from gklearn.preimage.utils import kernel_distance_matrix | |||
def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, | |||
params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', | |||
@@ -128,12 +128,10 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method | |||
elif lib == 'gedlib-bash': | |||
import time | |||
import random | |||
import sys | |||
import os | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import saveDataset | |||
tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||
tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/' | |||
if not os.path.exists(tmp_dir): | |||
os.makedirs(tmp_dir) | |||
fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | |||
@@ -144,7 +142,7 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method | |||
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' | |||
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | |||
command += 'export LD_LIBRARY_PATH\n' | |||
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||
command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n' | |||
command += './ged_for_python_bash monoterpenoides ' + fn_collection \ | |||
+ ' \'' + algo_options + '\' ' | |||
for ec in edit_cost_constant: | |||
@@ -11,11 +11,9 @@ import random | |||
import networkx as nx | |||
from tqdm import tqdm | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphdataset import get_dataset_attributes | |||
from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | |||
from ged import GED, ged_median | |||
from gklearn.preimage.ged import GED, ged_median | |||
def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
@@ -438,7 +436,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||
dataset='monoterpenoides', | |||
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'): | |||
graph_dir=''): | |||
"""Compute the iam by c++ implementation (gedlib) through bash. | |||
""" | |||
import os | |||
@@ -462,18 +460,18 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||
fgroup.write("\n</GraphCollection>") | |||
fgroup.close() | |||
tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||
tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/' | |||
fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | |||
createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection) | |||
# fn_collection = tmp_dir + 'collection_for_debug' | |||
# graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl' | |||
# graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl' | |||
# if dataset == 'Letter-high' or dataset == 'Fingerprint': | |||
# dataset = 'letter' | |||
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n' | |||
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | |||
command += 'export LD_LIBRARY_PATH\n' | |||
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||
command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n' | |||
command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \ | |||
+ ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' ' | |||
if edit_cost_constant is None: | |||
@@ -489,8 +487,8 @@ def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1, | |||
sod_sm = float(output[0].strip()) | |||
sod_gm = float(output[1].strip()) | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
return sod_sm, sod_gm, fname_sm, fname_gm | |||
@@ -11,14 +11,12 @@ from tqdm import tqdm | |||
import random | |||
#import csv | |||
from shutil import copyfile | |||
import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from preimage.iam import iam_bash | |||
from gklearn.preimage.iam import iam_bash | |||
from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
from preimage.ged import GED | |||
from preimage.utils import get_same_item_indices | |||
from gklearn.preimage.ged import GED | |||
from gklearn.preimage.utils import get_same_item_indices | |||
def test_knn(): | |||
ds = {'name': 'monoterpenoides', | |||
@@ -30,7 +28,7 @@ def test_knn(): | |||
# edge_label = 'bond_type' | |||
# ds_name = 'mono' | |||
dir_output = 'results/knn/' | |||
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/' | |||
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/' | |||
k_nn = 1 | |||
percent = 0.1 | |||
@@ -2,5 +2,5 @@ import sys | |||
import pathlib | |||
# insert gedlibpy library. | |||
sys.path.insert(0, "../../") | |||
sys.path.insert(0, "../../../") | |||
from gedlibpy import librariesImport, gedlibpy |
@@ -14,10 +14,7 @@ from tqdm import tqdm | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
sys.path.insert(0, "../") | |||
from utils import compute_kernel, dis_gstar | |||
from gklearn.preimage.utils import compute_kernel, dis_gstar | |||
def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | |||
@@ -52,8 +52,6 @@ def convertGraph(G): | |||
def testNxGrapĥ(): | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
@@ -9,12 +9,10 @@ from matplotlib import pyplot as plt | |||
import numpy as np | |||
from tqdm import tqdm | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from utils import remove_edges | |||
from fitDistance import fit_GED_to_kernel_distance | |||
from utils import normalize_distance_matrix | |||
from gklearn.preimage.utils import remove_edges | |||
from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance | |||
from gklearn.preimage.utils import normalize_distance_matrix | |||
def test_update_costs(): | |||
@@ -63,7 +61,7 @@ def median_paper_clcpc_python_best(): | |||
y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
repeats = 50 | |||
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
graph_dir = collection_path + 'gxl/' | |||
fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt' | |||
@@ -160,7 +158,7 @@ def median_paper_clcpc_python_bash_cpp(): | |||
y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
repeats = 50 | |||
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
graph_dir = collection_path + 'gxl/' | |||
fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt' | |||
@@ -14,13 +14,11 @@ import sys | |||
def test_NON_SYMBOLIC_cost(): | |||
"""Test edit cost LETTER2. | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
from preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter | |||
from preimage.test_k_closest_graphs import reform_attributes | |||
from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter | |||
from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||
from gklearn.utils.graphfiles import loadDataset | |||
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt' | |||
dataset = '../../datasets/Letter-high/Letter-high_A.txt' | |||
Gn, y_all = loadDataset(dataset) | |||
g1 = Gn[200] | |||
@@ -53,14 +51,12 @@ def test_NON_SYMBOLIC_cost(): | |||
def test_LETTER2_cost(): | |||
"""Test edit cost LETTER2. | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
from preimage.ged import GED, get_nb_edit_operations_letter | |||
from preimage.test_k_closest_graphs import reform_attributes | |||
from gklearn.preimage.ged import GED, get_nb_edit_operations_letter | |||
from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||
from gklearn.utils.graphfiles import loadDataset | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
g1 = Gn[200] | |||
@@ -96,14 +92,12 @@ def test_get_nb_edit_operations_letter(): | |||
should be the same as the cost computed by number of operations and edit | |||
cost constants. | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
from preimage.ged import GED, get_nb_edit_operations_letter | |||
from preimage.test_k_closest_graphs import reform_attributes | |||
from gklearn.preimage.ged import GED, get_nb_edit_operations_letter | |||
from gklearn.preimage.test_k_closest_graphs import reform_attributes | |||
from gklearn.utils.graphfiles import loadDataset | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
g1 = Gn[200] | |||
@@ -136,13 +130,12 @@ def test_get_nb_edit_operations(): | |||
numbers of edit operations. The distance/cost computed by GED should be the | |||
same as the cost computed by number of operations and edit cost constants. | |||
""" | |||
import sys | |||
sys.path.insert(0, "../") | |||
from preimage.ged import GED, get_nb_edit_operations | |||
from gklearn.preimage.ged import GED, get_nb_edit_operations | |||
from gklearn.utils.graphfiles import loadDataset | |||
import os | |||
ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset']) | |||
g1 = Gn[20] | |||
@@ -173,11 +166,10 @@ def test_get_nb_edit_operations(): | |||
def test_ged_python_bash_cpp(): | |||
"""Test ged computation with python invoking the c++ code by bash command (with updated library). | |||
""" | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from preimage.ged import GED | |||
from gklearn.preimage.ged import GED | |||
data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
# collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | |||
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
@@ -233,7 +225,7 @@ def test_ged_best_settings_updated(): | |||
"""Test ged computation with best settings the same as in the C++ code (with updated library). | |||
""" | |||
data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
# collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml' | |||
@@ -292,7 +284,7 @@ def test_ged_best_settings(): | |||
"""Test ged computation with best settings the same as in the C++ code. | |||
""" | |||
data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
@@ -350,7 +342,7 @@ def test_ged_default(): | |||
"""Test ged computation with default settings. | |||
""" | |||
data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
@@ -404,11 +396,10 @@ def test_ged_default(): | |||
def test_ged_min(): | |||
"""Test ged computation with the "min" stabilizer. | |||
""" | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from preimage.ged import GED | |||
from gklearn.preimage.ged import GED | |||
data_dir_prefix = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/' | |||
data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/' | |||
collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml' | |||
graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/' | |||
@@ -487,8 +478,6 @@ def convertGraph(G): | |||
def testNxGrapĥ(): | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
@@ -13,14 +13,11 @@ import time | |||
import random | |||
#from tqdm import tqdm | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
#from gklearn.utils.logger2file import * | |||
from iam import iam_upgraded | |||
from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar | |||
#from ged import ged_median | |||
from gklearn.preimage.iam import iam_upgraded | |||
from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar | |||
#from gklearn.preimage.ged import ged_median | |||
def test_iam_monoterpenoides_with_init40(): | |||
@@ -52,7 +49,7 @@ def test_iam_monoterpenoides_with_init40(): | |||
'stabilizer': ged_stabilizer} | |||
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
graph_dir = collection_path + 'gxl/' | |||
y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
repeats = 50 | |||
@@ -17,15 +17,12 @@ import multiprocessing | |||
from multiprocessing import Pool | |||
from functools import partial | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
#from gklearn.utils.logger2file import * | |||
from iam import iam_upgraded, iam_bash | |||
from utils import compute_kernel, dis_gstar, kernel_distance_matrix | |||
from fitDistance import fit_GED_to_kernel_distance | |||
#from ged import ged_median | |||
from gklearn.preimage.iam import iam_upgraded, iam_bash | |||
from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix | |||
from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance | |||
#from gklearn.preimage.ged import ged_median | |||
def fit_edit_cost_constants(fit_method, edit_cost_name, | |||
@@ -204,6 +201,8 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho | |||
if Kmatrix is not None: | |||
Kmatrix_median = np.copy(Kmatrix[group_min,:]) | |||
Kmatrix_median = Kmatrix_median[:,group_min] | |||
else: | |||
Kmatrix_median = None | |||
# 1. fit edit cost constants. | |||
@@ -379,7 +378,7 @@ def test_k_closest_graphs_with_cv(): | |||
y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
repeats = 50 | |||
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/' | |||
graph_dir = collection_path + 'gxl/' | |||
sod_sm_list = [] | |||
@@ -11,12 +11,10 @@ import matplotlib.pyplot as plt | |||
import time | |||
from tqdm import tqdm | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from median import draw_Letter_graph | |||
from ged import GED, ged_median | |||
from utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||
from gklearn.preimage.median import draw_Letter_graph | |||
from gklearn.preimage.ged import GED, ged_median | |||
from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||
dis_gstar, remove_edges | |||
@@ -13,14 +13,11 @@ import time | |||
import random | |||
#from tqdm import tqdm | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from utils import remove_edges, compute_kernel, get_same_item_indices | |||
from ged import ged_median | |||
from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices | |||
from gklearn.preimage.ged import ged_median | |||
from preimage_iam import preimage_iam | |||
from gklearn.preimage.preimage_iam import preimage_iam | |||
############################################################################### | |||
@@ -13,13 +13,10 @@ import time | |||
import random | |||
#from tqdm import tqdm | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from ged import ged_median | |||
from utils import compute_kernel, get_same_item_indices, remove_edges | |||
from preimage_iam import preimage_iam_random_mix | |||
from gklearn.preimage.ged import ged_median | |||
from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges | |||
from gklearn.preimage.preimage_iam import preimage_iam_random_mix | |||
############################################################################### | |||
# tests on different values on grid of median-sets and k. | |||
@@ -13,14 +13,10 @@ import time | |||
import random | |||
#from tqdm import tqdm | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset | |||
from preimage_random import preimage_random | |||
from ged import ged_median | |||
from utils import compute_kernel, get_same_item_indices, remove_edges | |||
from gklearn.preimage.preimage_random import preimage_random | |||
from gklearn.preimage.ged import ged_median | |||
from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges | |||
############################################################################### | |||
@@ -11,8 +11,6 @@ Useful functions. | |||
import multiprocessing | |||
import numpy as np | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.kernels.marginalizedKernel import marginalizedkernel | |||
from gklearn.kernels.untilHPathKernel import untilhpathkernel | |||
from gklearn.kernels.spKernel import spkernel | |||
@@ -41,7 +39,7 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||
return np.sqrt(term1 - term2 + term3) | |||
def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'): | |||
if graph_kernel == 'marginalizedkernel': | |||
Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
p_quit=0.03, n_iteration=10, remove_totters=False, | |||
@@ -49,6 +47,7 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
elif graph_kernel == 'untilhpathkernel': | |||
Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
depth=7, k_func='MinMax', compute_method='trie', | |||
parallel=parallel, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'spkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
@@ -66,18 +65,18 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, | |||
edge_label=edge_label, node_kernels=sub_kernels, | |||
edge_kernels=sub_kernels, | |||
parallel=None, n_jobs=multiprocessing.cpu_count(), | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), | |||
verbose=verbose) | |||
elif graph_kernel == 'treeletkernel': | |||
pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||
# pkernel = functools.partial(gaussiankernel, gamma=1e-6) | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
sub_kernel=pkernel, | |||
sub_kernel=pkernel, parallel=parallel, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'weisfeilerlehmankernel': | |||
Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | |||
height=4, base_kernel='subtree', | |||
height=4, base_kernel='subtree', parallel=None, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
# normalization | |||
@@ -11,11 +11,8 @@ import matplotlib.pyplot as plt | |||
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset | |||
from tqdm import tqdm | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
from utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices | |||
from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices | |||
def visualize_graph_dataset(dis_measure, visual_method, draw_figure, | |||
@@ -115,11 +112,11 @@ def visualize_distances_in_kernel(): | |||
# Gn = Gn[0:50] | |||
fname_medians = 'expert.treelet' | |||
# add set median. | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
set_median = loadGXL(fname_sm) | |||
Gn.append(set_median) | |||
# add generalized median (estimated pre-image.) | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
gen_median = loadGXL(fname_gm) | |||
Gn.append(gen_median) | |||
@@ -166,19 +163,19 @@ def visualize_distances_in_kernel(): | |||
def visualize_distances_in_ged(): | |||
from fitDistance import compute_geds | |||
from ged import GED | |||
from gklearn.preimage.fitDistance import compute_geds | |||
from gklearn.preimage.ged import GED | |||
ds = {'name': 'monoterpenoides', | |||
'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset']) | |||
# Gn = Gn[0:50] | |||
# add set median. | |||
fname_medians = 'expert.treelet' | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl' | |||
set_median = loadGXL(fname_sm) | |||
Gn.append(set_median) | |||
# add generalized median (estimated pre-image.) | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl' | |||
gen_median = loadGXL(fname_gm) | |||
Gn.append(gen_median) | |||
@@ -227,9 +224,10 @@ def visualize_distances_in_ged(): | |||
def visualize_distances_in_kernel_monoterpenoides(): | |||
import os | |||
ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
Gn_original, y_all = loadDataset(ds['dataset']) | |||
# Gn = Gn[0:50] | |||
@@ -301,11 +299,12 @@ def visualize_distances_in_kernel_monoterpenoides(): | |||
def visualize_distances_in_ged_monoterpenoides(): | |||
from fitDistance import compute_geds | |||
from ged import GED | |||
from gklearn.preimage.fitDistance import compute_geds | |||
from gklearn.preimage.ged import GED | |||
import os | |||
ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
Gn_original, y_all = loadDataset(ds['dataset']) | |||
# Gn = Gn[0:50] | |||
@@ -379,8 +378,8 @@ def visualize_distances_in_ged_monoterpenoides(): | |||
def visualize_distances_in_kernel_letter_h(): | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
# Gn = Gn[0:50] | |||
@@ -455,8 +454,8 @@ def visualize_distances_in_ged_letter_h(): | |||
from fitDistance import compute_geds | |||
from preimage.test_k_closest_graphs import reform_attributes | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
# Gn = Gn[0:50] | |||
@@ -11,35 +11,37 @@ import csv | |||
from shutil import copyfile | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | |||
from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
from preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel | |||
from preimage.find_best_k import getRelations | |||
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel | |||
from gklearn.preimage.find_best_k import getRelations | |||
def get_dataset(ds_name): | |||
if ds_name == 'Letter-high': # node non-symb | |||
dataset = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml' | |||
graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/' | |||
dataset = 'cpp_ext/data/collections/Letter.xml' | |||
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' | |||
Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||
for G in Gn: | |||
reform_attributes(G) | |||
elif ds_name == 'Fingerprint': | |||
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/collections/Fingerprint.xml' | |||
graph_dir = '/media/ljia/DATA/research-repo/codes/Linlin/gedlib/data/datasets/Fingerprint/data/' | |||
Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||
for G in Gn: | |||
reform_attributes(G) | |||
# dataset = 'cpp_ext/data/collections/Fingerprint.xml' | |||
# graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/' | |||
# Gn, y_all = loadDataset(dataset, extra_params=graph_dir) | |||
# for G in Gn: | |||
# reform_attributes(G) | |||
dataset = '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/' | |||
Gn, y_all = loadDataset(dataset) | |||
elif ds_name == 'SYNTHETIC': | |||
pass | |||
elif ds_name == 'SYNTHETICnew': | |||
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew' | |||
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-high/Letter-high_A.txt' | |||
# graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/' | |||
dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew' | |||
# dataset = '../../datasets/Letter-high/Letter-high_A.txt' | |||
# graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' | |||
Gn, y_all = loadDataset(dataset) | |||
elif ds_name == 'Synthie': | |||
pass | |||
@@ -184,6 +186,8 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||
if Kmatrix is not None: | |||
Kmatrix_sub = Kmatrix[values,:] | |||
Kmatrix_sub = Kmatrix_sub[:,values] | |||
else: | |||
Kmatrix_sub = None | |||
for repeat in range(repeats): | |||
print('\nrepeat =', repeat) | |||
@@ -273,11 +277,11 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||
nb_dis_k_gi2gm[2] += 1 | |||
# save median graphs. | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | |||
copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat) | |||
copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
@@ -427,63 +431,101 @@ if __name__ == "__main__": | |||
# initial_solutions=40, | |||
# Gn_data = [Gn, y_all, graph_dir], | |||
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||
# #### xp 3: Fingerprint, sspkernel, using LETTER2. | |||
# #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC. | |||
# gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz') | |||
# Kmatrix = gmfile['Kmatrix'] | |||
# run_time = gmfile['run_time'] | |||
# # normalization | |||
# Kmatrix_diag = Kmatrix.diagonal().copy() | |||
# for i in range(len(Kmatrix)): | |||
# for j in range(i, len(Kmatrix)): | |||
# Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
## np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||
## Kmatrix=Kmatrix, run_time=run_time) | |||
# # load dataset. | |||
# print('getting dataset and computing kernel distance matrix first...') | |||
# ds_name = 'Fingerprint' | |||
# ds_name = 'SYNTHETICnew' | |||
# gkernel = 'structuralspkernel' | |||
# Gn, y_all, graph_dir = get_dataset(ds_name) | |||
# # remove graphs without nodes and edges. | |||
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0 | |||
# and nx.number_of_edges(G) != 0)] | |||
# idx = [G[0] for G in Gn] | |||
# Gn = [G[1] for G in Gn] | |||
# y_all = [y_all[i] for i in idx] | |||
## Gn = Gn[0:50] | |||
## y_all = y_all[0:50] | |||
## Gn = Gn[0:10] | |||
## y_all = y_all[0:10] | |||
# for G in Gn: | |||
# G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
# # compute pair distances. | |||
## dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
## Kmatrix=None, gkernel=gkernel, verbose=True) | |||
# dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||
## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
# # fitting and computing. | |||
# fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||
# fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||
# for fit_method in fit_methods: | |||
# print('\n-------------------------------------') | |||
# print('fit method:', fit_method) | |||
# parameters = {'ds_name': ds_name, | |||
# 'gkernel': gkernel, | |||
# 'edit_cost_name': 'LETTER2', | |||
# 'edit_cost_name': 'NON_SYMBOLIC', | |||
# 'ged_method': 'mIPFP', | |||
# 'attr_distance': 'euclidean', | |||
# 'fit_method': fit_method} | |||
# xp_fit_method_for_non_symbolic(parameters, save_results=True, | |||
# initial_solutions=40, | |||
# initial_solutions=1, | |||
# Gn_data = [Gn, y_all, graph_dir], | |||
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||
# Kmatrix=Kmatrix) | |||
# #### xp 4: SYNTHETICnew, sspkernel, using NON_SYMBOLIC. | |||
# ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC. | |||
# gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz') | |||
# Kmatrix = gmfile['Kmatrix'] | |||
# # normalization | |||
# Kmatrix_diag = Kmatrix.diagonal().copy() | |||
# for i in range(len(Kmatrix)): | |||
# for j in range(i, len(Kmatrix)): | |||
# Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
# Kmatrix[j][i] = Kmatrix[i][j] | |||
# run_time = 21821.35 | |||
# np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||
# Kmatrix=Kmatrix, run_time=run_time) | |||
# | |||
# # load dataset. | |||
# print('getting dataset and computing kernel distance matrix first...') | |||
# ds_name = 'SYNTHETICnew' | |||
# gkernel = 'structuralspkernel' | |||
# gkernel = 'spkernel' | |||
# Gn, y_all, graph_dir = get_dataset(ds_name) | |||
# # remove graphs without nodes and edges. | |||
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||
# and nx.number_of_edges(G) != 0)] | |||
# idx = [G[0] for G in Gn] | |||
# Gn = [G[1] for G in Gn] | |||
# y_all = [y_all[i] for i in idx] | |||
# Gn = Gn[0:10] | |||
# y_all = y_all[0:10] | |||
## # remove graphs without nodes and edges. | |||
## Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0 | |||
## and nx.number_of_edges(G) != 0)] | |||
## idx = [G[0] for G in Gn] | |||
## Gn = [G[1] for G in Gn] | |||
## y_all = [y_all[i] for i in idx] | |||
## Gn = Gn[0:5] | |||
## y_all = y_all[0:5] | |||
# for G in Gn: | |||
# G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
# # compute pair distances. | |||
# | |||
# # compute/read Gram matrix and pair distances. | |||
## Kmatrix = compute_kernel(Gn, gkernel, None, None, True) | |||
## np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
## Kmatrix=Kmatrix) | |||
# gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||
# Kmatrix = gmfile['Kmatrix'] | |||
# run_time = gmfile['run_time'] | |||
## Kmatrix = Kmatrix[[0,1,2,3,4],:] | |||
## Kmatrix = Kmatrix[:,[0,1,2,3,4]] | |||
# print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
# Kmatrix=None, gkernel=gkernel, verbose=True) | |||
# Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||
## Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
# | |||
# # fitting and computing. | |||
# fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||
# for fit_method in fit_methods: | |||
@@ -496,68 +538,69 @@ if __name__ == "__main__": | |||
# 'attr_distance': 'euclidean', | |||
# 'fit_method': fit_method} | |||
# xp_fit_method_for_non_symbolic(parameters, save_results=True, | |||
# initial_solutions=40, | |||
# Gn_data = [Gn, y_all, graph_dir], | |||
# k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]) | |||
### xp 5: SYNTHETICnew, spkernel, using NON_SYMBOLIC. | |||
gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz') | |||
Kmatrix = gmfile['Kmatrix'] | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
run_time = 21821.35 | |||
np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm', | |||
Kmatrix=Kmatrix, run_time=run_time) | |||
# initial_solutions=1, | |||
# Gn_data=[Gn, y_all, graph_dir], | |||
# k_dis_data=[dis_mat, dis_max, dis_min, dis_mean], | |||
# Kmatrix=Kmatrix) | |||
#### xp 5: Fingerprint, sspkernel, using LETTER2. | |||
# load dataset. | |||
print('getting dataset and computing kernel distance matrix first...') | |||
ds_name = 'SYNTHETICnew' | |||
gkernel = 'spkernel' | |||
ds_name = 'Fingerprint' | |||
gkernel = 'structuralspkernel' | |||
Gn, y_all, graph_dir = get_dataset(ds_name) | |||
# # remove graphs without nodes and edges. | |||
# Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_edges(G) != 0 | |||
# remove graphs without nodes and edges. | |||
Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0)] | |||
# and nx.number_of_edges(G) != 0)] | |||
# idx = [G[0] for G in Gn] | |||
# Gn = [G[1] for G in Gn] | |||
# y_all = [y_all[i] for i in idx] | |||
# Gn = Gn[0:5] | |||
# y_all = y_all[0:5] | |||
idx = [G[0] for G in Gn] | |||
Gn = [G[1] for G in Gn] | |||
y_all = [y_all[i] for i in idx] | |||
y_idx = get_same_item_indices(y_all) | |||
# remove unused labels. | |||
for G in Gn: | |||
G.graph['edge_attrs'] = [] | |||
for edge in G.edges: | |||
del G.edges[edge]['attributes'] | |||
del G.edges[edge]['orient'] | |||
del G.edges[edge]['angle'] | |||
Gn = Gn[805:815] | |||
y_all = y_all[805:815] | |||
for G in Gn: | |||
G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||
# compute/read Gram matrix and pair distances. | |||
# Kmatrix = compute_kernel(Gn, gkernel, None, None, True) | |||
# np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
# Kmatrix=Kmatrix) | |||
gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||
Kmatrix = gmfile['Kmatrix'] | |||
run_time = gmfile['run_time'] | |||
Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||
np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||
Kmatrix=Kmatrix) | |||
# gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||
# Kmatrix = gmfile['Kmatrix'] | |||
# run_time = gmfile['run_time'] | |||
# Kmatrix = Kmatrix[[0,1,2,3,4],:] | |||
# Kmatrix = Kmatrix[:,[0,1,2,3,4]] | |||
print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||
# print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||
dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||
# Kmatrix = np.zeros((len(Gn), len(Gn))) | |||
# dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
# compute pair distances. | |||
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||
# Kmatrix=None, gkernel=gkernel, verbose=True) | |||
# dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||
# fitting and computing. | |||
fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||
fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||
for fit_method in fit_methods: | |||
print('\n-------------------------------------') | |||
print('fit method:', fit_method) | |||
parameters = {'ds_name': ds_name, | |||
'gkernel': gkernel, | |||
'edit_cost_name': 'NON_SYMBOLIC', | |||
'edit_cost_name': 'LETTER2', | |||
'ged_method': 'mIPFP', | |||
'attr_distance': 'euclidean', | |||
'fit_method': fit_method} | |||
xp_fit_method_for_non_symbolic(parameters, save_results=True, | |||
initial_solutions=1, | |||
Gn_data=[Gn, y_all, graph_dir], | |||
k_dis_data=[dis_mat, dis_max, dis_min, dis_mean], | |||
initial_solutions=40, | |||
Gn_data = [Gn, y_all, graph_dir], | |||
k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||
Kmatrix=Kmatrix) |
@@ -12,17 +12,15 @@ from shutil import copyfile | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | |||
from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
from preimage.utils import get_same_item_indices, kernel_distance_matrix | |||
from preimage.find_best_k import getRelations | |||
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix | |||
from gklearn.preimage.find_best_k import getRelations | |||
def xp_letter_h_LETTER2_cost(): | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel') | |||
@@ -177,11 +175,11 @@ def xp_letter_h_LETTER2_cost(): | |||
nb_dis_k_gi2gm[2] += 1 | |||
# save median graphs. | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
@@ -243,8 +241,8 @@ def xp_letter_h_LETTER2_cost(): | |||
def xp_letter_h(): | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
ds = {'dataset': 'cpp_ext/data/collections/Letter.xml', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
for G in Gn: | |||
reform_attributes(G) | |||
@@ -396,11 +394,11 @@ def xp_letter_h(): | |||
nb_dis_k_gi2gm[2] += 1 | |||
# save median graphs. | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
@@ -13,16 +13,16 @@ from shutil import copyfile | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import sys | |||
sys.path.insert(0, "../") | |||
from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | |||
from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
from preimage.utils import get_same_item_indices | |||
from preimage.find_best_k import getRelations | |||
from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | |||
from gklearn.preimage.utils import get_same_item_indices | |||
from gklearn.preimage.find_best_k import getRelations | |||
def xp_monoterpenoides(): | |||
ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/monoterpenoides/'} # node/edge symb | |||
import os | |||
ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds', | |||
'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset']) | |||
# ds = {'name': 'Letter-high', | |||
# 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb | |||
@@ -169,11 +169,11 @@ def xp_monoterpenoides(): | |||
nb_dis_k_gi2gm[2] += 1 | |||
# save median graphs. | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl' | |||
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | |||
copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl' | |||
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat) | |||
copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
@@ -15,10 +15,16 @@ def chooseDataset(ds_name): | |||
ds_file = 'datasets/Alkane/dataset.ds' | |||
ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt' | |||
Gn, y = loadDataset(ds_file, filename_y=ds_y) | |||
for G in Gn: | |||
for node in G.nodes: | |||
del G.nodes[node]['attributes'] | |||
# node symbolic labels. | |||
elif ds_name == 'Acyclic': | |||
ds_file = 'datasets/acyclic/dataset_bps.ds' | |||
Gn, y = loadDataset(ds_file) | |||
for G in Gn: | |||
for node in G.nodes: | |||
del G.nodes[node]['attributes'] | |||
# node non-symbolic labels. | |||
elif ds_name == 'Letter-med': | |||
ds_file = 'datasets/Letter-med/Letter-med_A.txt' | |||
@@ -27,14 +33,39 @@ def chooseDataset(ds_name): | |||
elif ds_name == 'AIDS': | |||
ds_file = 'datasets/AIDS/AIDS_A.txt' | |||
Gn, y = loadDataset(ds_file) | |||
# edge non-symbolic labels (no node labels). | |||
elif ds_name == 'Fingerprint_edge': | |||
import networkx as nx | |||
ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' | |||
Gn, y = loadDataset(ds_file) | |||
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||
idx = [G[0] for G in Gn] | |||
Gn = [G[1] for G in Gn] | |||
y = [y[i] for i in idx] | |||
for G in Gn: | |||
G.graph['node_attrs'] = [] | |||
for node in G.nodes: | |||
del G.nodes[node]['attributes'] | |||
del G.nodes[node]['x'] | |||
del G.nodes[node]['y'] | |||
# edge non-symbolic labels (and node non-symbolic labels). | |||
elif ds_name == 'Fingerprint': | |||
import networkx as nx | |||
ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' | |||
Gn, y = loadDataset(ds_file) | |||
Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0] | |||
idx = [G[0] for G in Gn] | |||
Gn = [G[1] for G in Gn] | |||
y = [y[i] for i in idx] | |||
# edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels). | |||
elif ds_name == 'Cuneiform': | |||
import networkx as nx | |||
ds_file = 'datasets/Cuneiform/Cuneiform_A.txt' | |||
Gn, y = loadDataset(ds_file) | |||
Gn = Gn[0:10] | |||
y = y[0:10] | |||
Gn = Gn[0:3] | |||
y = y[0:3] | |||
return Gn, y | |||
@@ -152,7 +183,7 @@ def test_spkernel(ds_name, parallel): | |||
#@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | |||
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS']) | |||
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform']) | |||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
def test_structuralspkernel(ds_name, parallel): | |||
"""Test structural shortest path kernel. | |||
@@ -246,4 +277,5 @@ def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel): | |||
if __name__ == "__main__": | |||
test_spkernel() | |||
# test_spkernel('Alkane', 'imap_unordered') | |||
test_structuralspkernel('Fingerprint_edge', 'imap_unordered') |
@@ -753,15 +753,12 @@ if __name__ == '__main__': | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||
# Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# saveDataset(Gn, y, group='xml', filename='temp/temp') | |||
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
Gn, y_all = loadDataset(dataset) | |||
filename = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/SYNTHETICnew/SYNTHETICnew' | |||
saveDataset(Gn, y_all, gformat='gxl', group='xml', filename=filename) | |||
# test - new way to add labels and attributes. | |||
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Fingerprint/Fingerprint_A.txt' | |||
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/Letter-med/Letter-med_A.txt' | |||
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/AIDS/AIDS_A.txt' | |||
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | |||
# Gn, y_all = loadDataset(dataset) | |||
# dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
# dataset = '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
# dataset = '../../datasets/Letter-med/Letter-med_A.txt' | |||
# dataset = '../../datasets/AIDS/AIDS_A.txt' | |||
# dataset = '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | |||
# Gn, y_all = loadDataset(dataset) | |||
pass |
@@ -11,7 +11,6 @@ from sklearn.model_selection import KFold, train_test_split, ParameterGrid | |||
from multiprocessing import Pool, Array | |||
from functools import partial | |||
import sys | |||
sys.path.insert(0, "../") | |||
import os | |||
import time | |||
import datetime | |||
@@ -74,8 +73,6 @@ def model_selection_for_precomputed_kernel(datafile, | |||
Examples | |||
-------- | |||
>>> import numpy as np | |||
>>> import sys | |||
>>> sys.path.insert(0, "../") | |||
>>> from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel | |||
>>> from gklearn.kernels.untilHPathKernel import untilhpathkernel | |||
>>> | |||
@@ -46466,7 +46466,7 @@ | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.6.8" | |||
"version": "3.6.9" | |||
} | |||
}, | |||
"nbformat": 4, | |||
@@ -7,10 +7,6 @@ Created on Tue Jan 7 15:25:36 2020 | |||
""" | |||
# draw all the praphs | |||
import sys | |||
import pathlib | |||
sys.path.insert(0, "../../") | |||
import matplotlib.pyplot as plt | |||
import networkx as nx | |||
@@ -19,6 +15,58 @@ from gklearn.utils.graphfiles import loadDataset, loadGXL | |||
def main(): | |||
# MUTAG dataset. | |||
dataset, y = loadDataset("../../datasets/MUTAG/MUTAG_A.txt") | |||
for idx in [65]:#[6]: | |||
G = dataset[idx] | |||
for node in G.nodes: | |||
if G.nodes[node]['atom'] == '0': | |||
G.nodes[node]['atom'] = 'C' | |||
elif G.nodes[node]['atom'] == '1': | |||
G.nodes[node]['atom'] = 'N' | |||
elif G.nodes[node]['atom'] == '2': | |||
G.nodes[node]['atom'] = 'O' | |||
elif G.nodes[node]['atom'] == '3': | |||
G.nodes[node]['atom'] = 'F' | |||
elif G.nodes[node]['atom'] == '4': | |||
G.nodes[node]['atom'] = 'I' | |||
elif G.nodes[node]['atom'] == '5': | |||
G.nodes[node]['atom'] = 'Cl' | |||
elif G.nodes[node]['atom'] == '6': | |||
G.nodes[node]['atom'] = 'Br' | |||
ecolors = [] | |||
for edge in G.edges: | |||
if G.edges[edge]['bond_type'] == '0': | |||
ecolors.append('orange') | |||
elif G.edges[edge]['bond_type'] == '1': | |||
ecolors.append('r') | |||
elif G.edges[edge]['bond_type'] == '2': | |||
ecolors.append('purple') | |||
elif G.edges[edge]['bond_type'] == '3': | |||
ecolors.append('orange') | |||
print(idx) | |||
print(nx.get_node_attributes(G, 'atom')) | |||
edge_labels = nx.get_edge_attributes(G, 'bond_type') | |||
print(edge_labels) | |||
pos=nx.spring_layout(G) | |||
nx.draw(G, | |||
pos, | |||
node_size=500, | |||
labels=nx.get_node_attributes(G, 'atom'), | |||
node_color='blue', | |||
font_color='w', | |||
edge_color=ecolors, | |||
width=3, | |||
with_labels=True) | |||
# edge_labels = nx.draw_networkx_edge_labels(G, pos, | |||
# edge_labels=edge_labels, | |||
# font_color='pink') | |||
plt.savefig('mol1_graph.svg', format='svg', dpi=300) | |||
plt.show() | |||
plt.clf() | |||
# # monoterpenoides dataset. | |||
# dataset, y = loadDataset("../../datasets/monoterpenoides/dataset_10+.ds") | |||
# for idx in [12,22,29,74]: | |||
@@ -67,35 +115,35 @@ def main(): | |||
# draw_Fingerprint_graph(Gn[idx], file_prefix='') | |||
# SYNTHETIC dataset. | |||
dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
Gn, y_all = loadDataset(dataset) | |||
idx_no_node = [] | |||
idx_no_edge = [] | |||
idx_no_both = [] | |||
for idx, G in enumerate(Gn): | |||
if nx.number_of_nodes(G) == 0: | |||
idx_no_node.append(idx) | |||
if nx.number_of_edges(G) == 0: | |||
idx_no_both.append(idx) | |||
if nx.number_of_edges(G) == 0: | |||
idx_no_edge.append(idx) | |||
# file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name'] | |||
# draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True) | |||
# draw_SYNTHETIC_graph(Gn[idx]) | |||
print('nb_no_node: ', len(idx_no_node)) | |||
print('nb_no_edge: ', len(idx_no_edge)) | |||
print('nb_no_both: ', len(idx_no_both)) | |||
print('idx_no_node: ', idx_no_node) | |||
print('idx_no_edge: ', idx_no_edge) | |||
print('idx_no_both: ', idx_no_both) | |||
for idx in [0, 10, 100]: | |||
print(idx) | |||
print(Gn[idx].nodes(data=True)) | |||
print(Gn[idx].edges(data=True)) | |||
draw_SYNTHETIC_graph(Gn[idx], save=None) | |||
# # SYNTHETIC dataset. | |||
# dataset = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
# Gn, y_all = loadDataset(dataset) | |||
# | |||
# idx_no_node = [] | |||
# idx_no_edge = [] | |||
# idx_no_both = [] | |||
# for idx, G in enumerate(Gn): | |||
# if nx.number_of_nodes(G) == 0: | |||
# idx_no_node.append(idx) | |||
# if nx.number_of_edges(G) == 0: | |||
# idx_no_both.append(idx) | |||
# if nx.number_of_edges(G) == 0: | |||
# idx_no_edge.append(idx) | |||
## file_prefix = '../results/graph_images/SYNTHETIC/' + G.graph['name'] | |||
## draw_SYNTHETIC_graph(Gn[idx], file_prefix=file_prefix, save=True) | |||
## draw_SYNTHETIC_graph(Gn[idx]) | |||
# print('nb_no_node: ', len(idx_no_node)) | |||
# print('nb_no_edge: ', len(idx_no_edge)) | |||
# print('nb_no_both: ', len(idx_no_both)) | |||
# print('idx_no_node: ', idx_no_node) | |||
# print('idx_no_edge: ', idx_no_edge) | |||
# print('idx_no_both: ', idx_no_both) | |||
# | |||
# for idx in [0, 10, 100]: | |||
# print(idx) | |||
# print(Gn[idx].nodes(data=True)) | |||
# print(Gn[idx].edges(data=True)) | |||
# draw_SYNTHETIC_graph(Gn[idx], save=None) | |||
def plot_a_graph(graph_filename): | |||