Browse Source

Merge pull request #17 from jajupmochi/v0.2

V0.2
tags/v0.2.0
linlin GitHub 5 years ago
parent
commit
eadc91b9df
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 4897 additions and 2157 deletions
  1. +28
    -0
      .appveyor.yml
  2. +1
    -0
      README.md
  3. +2
    -1
      gklearn/ged/env/__init__.py
  4. +80
    -0
      gklearn/ged/env/node_map.py
  5. +642
    -143
      gklearn/ged/median/median_graph_estimator.py
  6. +87
    -9
      gklearn/ged/median/test_median_graph_estimator.py
  7. +2
    -0
      gklearn/ged/median/utils.py
  8. +2855
    -1747
      gklearn/gedlib/gedlibpy.cpp
  9. BIN
      gklearn/gedlib/gedlibpy.cpython-36m-x86_64-linux-gnu.so
  10. +44
    -19
      gklearn/gedlib/gedlibpy.pyx
  11. +3
    -2
      gklearn/gedlib/src/GedLibBind.hpp
  12. +89
    -54
      gklearn/gedlib/src/GedLibBind.ipp
  13. +1
    -1
      gklearn/kernels/__init__.py
  14. +9
    -8
      gklearn/kernels/path_up_to_h.py
  15. +9
    -8
      gklearn/kernels/treelet.py
  16. +25
    -7
      gklearn/kernels/weisfeiler_lehman.py
  17. +801
    -56
      gklearn/preimage/experiments/xp_median_preimage.py
  18. +21
    -47
      gklearn/preimage/median_preimage_generator.py
  19. +30
    -7
      gklearn/preimage/utils.py
  20. +6
    -6
      gklearn/tests/test_graph_kernels.py
  21. +1
    -0
      gklearn/utils/__init__.py
  22. +102
    -30
      gklearn/utils/dataset.py
  23. +6
    -6
      gklearn/utils/graph_files.py
  24. +53
    -6
      gklearn/utils/utils.py

+ 28
- 0
.appveyor.yml View File

@@ -0,0 +1,28 @@
environment:
matrix:
- PYTHON: "C:\\Python35"
- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python36"
- PYTHON: "C:\\Python36-x64"
- PYTHON: "C:\\Python37"
- PYTHON: "C:\\Python37-x64"
- PYTHON: "C:\\Python38"
- PYTHON: "C:\\Python38-x64"

# skip_commits:
# files:
# - "*.yml"
# - "*.rst"
# - "LICENSE"

install:
- "%PYTHON%\\python.exe -m pip install -U pip"
- "%PYTHON%\\python.exe -m pip install -U pytest"
- "%PYTHON%\\python.exe -m pip install -r requirements.txt"
- "%PYTHON%\\python.exe -m pip install wheel"

build: off

test_script:
- "%PYTHON%\\python.exe setup.py bdist_wheel"
- "%PYTHON%\\python.exe -m pytest -v gklearn/tests/"

+ 1
- 0
README.md View File

@@ -1,5 +1,6 @@
# graphkit-learn # graphkit-learn
[![Build Status](https://travis-ci.org/jajupmochi/graphkit-learn.svg?branch=master)](https://travis-ci.org/jajupmochi/graphkit-learn) [![Build Status](https://travis-ci.org/jajupmochi/graphkit-learn.svg?branch=master)](https://travis-ci.org/jajupmochi/graphkit-learn)
[![Build status](https://ci.appveyor.com/api/projects/status/bdxsolk0t1uji9rd?svg=true)](https://ci.appveyor.com/project/jajupmochi/graphkit-learn)
[![codecov](https://codecov.io/gh/jajupmochi/graphkit-learn/branch/master/graph/badge.svg)](https://codecov.io/gh/jajupmochi/graphkit-learn) [![codecov](https://codecov.io/gh/jajupmochi/graphkit-learn/branch/master/graph/badge.svg)](https://codecov.io/gh/jajupmochi/graphkit-learn)
[![Documentation Status](https://readthedocs.org/projects/graphkit-learn/badge/?version=master)](https://graphkit-learn.readthedocs.io/en/master/?badge=master) [![Documentation Status](https://readthedocs.org/projects/graphkit-learn/badge/?version=master)](https://graphkit-learn.readthedocs.io/en/master/?badge=master)
[![PyPI version](https://badge.fury.io/py/graphkit-learn.svg)](https://badge.fury.io/py/graphkit-learn) [![PyPI version](https://badge.fury.io/py/graphkit-learn.svg)](https://badge.fury.io/py/graphkit-learn)


+ 2
- 1
gklearn/ged/env/__init__.py View File

@@ -1 +1,2 @@
from gklearn.ged.env.common_types import AlgorithmState
from gklearn.ged.env.common_types import AlgorithmState
from gklearn.ged.env.node_map import NodeMap

+ 80
- 0
gklearn/ged/env/node_map.py View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 22 11:31:26 2020

@author: ljia
"""
import numpy as np

class NodeMap(object):
def __init__(self, num_nodes_g, num_nodes_h):
self.__forward_map = [np.inf] * num_nodes_g
self.__backward_map = [np.inf] * num_nodes_h
self.__induced_cost = np.inf
def num_source_nodes(self):
return len(self.__forward_map)
def num_target_nodes(self):
return len(self.__backward_map)
def image(self, node):
if node < len(self.__forward_map):
return self.__forward_map[node]
else:
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
return np.inf
def pre_image(self, node):
if node < len(self.__backward_map):
return self.__backward_map[node]
else:
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.')
return np.inf
def get_forward_map(self):
return self.__forward_map
def get_backward_map(self):
return self.__backward_map
def as_relation(self, relation):
relation.clear()
for i in range(0, len(self.__forward_map)):
k = self.__forward_map[i]
if k != np.inf:
relation.append(tuple((i, k)))
for k in range(0, len(self.__backward_map)):
i = self.__backward_map[k]
if i == np.inf:
relation.append(tuple((i, k)))
def add_assignment(self, i, k):
if i != np.inf:
if i < len(self.__forward_map):
self.__forward_map[i] = k
else:
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.')
if k != np.inf:
if k < len(self.__backward_map):
self.__backward_map[k] = i
else:
raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.')
def set_induced_cost(self, induced_cost):
self.__induced_cost = induced_cost
def induced_cost(self):
return self.__induced_cost

+ 642
- 143
gklearn/ged/median/median_graph_estimator.py
File diff suppressed because it is too large
View File


+ 87
- 9
gklearn/ged/median/test_median_graph_estimator.py View File

@@ -7,11 +7,10 @@ Created on Mon Mar 16 17:26:40 2020
""" """
def test_median_graph_estimator(): def test_median_graph_estimator():
from gklearn.utils.graphfiles import loadDataset
from gklearn.utils import load_dataset
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs from gklearn.ged.median import MedianGraphEstimator, constant_node_costs
from gklearn.gedlib import librariesImport, gedlibpy from gklearn.gedlib import librariesImport, gedlibpy
from gklearn.preimage.utils import get_same_item_indices from gklearn.preimage.utils import get_same_item_indices
from gklearn.preimage.ged import convertGraph
import multiprocessing import multiprocessing


# estimator parameters. # estimator parameters.
@@ -22,17 +21,17 @@ def test_median_graph_estimator():
# algorithm parameters. # algorithm parameters.
algo = 'IPFP' algo = 'IPFP'
initial_solutions = 40
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
initial_solutions = 1
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE '


edit_cost_name = 'LETTER2' edit_cost_name = 'LETTER2'
edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001]
ds_name = 'COIL-DEL'
ds_name = 'Letter_high'
# Load dataset. # Load dataset.
# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' # dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
dataset = '../../../datasets/Letter-high/Letter-high_A.txt' dataset = '../../../datasets/Letter-high/Letter-high_A.txt'
Gn, y_all = loadDataset(dataset)
Gn, y_all, label_names = load_dataset(dataset)
y_idx = get_same_item_indices(y_all) y_idx = get_same_item_indices(y_all)
for i, (y, values) in enumerate(y_idx.items()): for i, (y, values) in enumerate(y_idx.items()):
Gn_i = [Gn[val] for val in values] Gn_i = [Gn[val] for val in values]
@@ -43,7 +42,7 @@ def test_median_graph_estimator():
# gedlibpy.restart_env() # gedlibpy.restart_env()
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
for G in Gn_i: for G in Gn_i:
ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '')
ged_env.add_nx_graph(G, '')
graph_ids = ged_env.get_all_graph_ids() graph_ids = ged_env.get_all_graph_ids()
set_median_id = ged_env.add_graph('set_median') set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median') gen_median_id = ged_env.add_graph('gen_median')
@@ -54,11 +53,89 @@ def test_median_graph_estimator():
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng())
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng())
# Select the GED algorithm. # Select the GED algorithm.
algo_options = '--threads ' + str(threads) + algo_options_suffix algo_options = '--threads ' + str(threads) + algo_options_suffix
mge.set_options(mge_options) mge.set_options(mge_options)
mge.set_label_names(node_labels=label_names['node_labels'],
edge_labels=label_names['edge_labels'],
node_attrs=label_names['node_attrs'],
edge_attrs=label_names['edge_attrs'])
mge.set_init_method(algo, algo_options)
mge.set_descent_method(algo, algo_options)
# Run the estimator.
mge.run(graph_ids, set_median_id, gen_median_id)
# Get SODs.
sod_sm = mge.get_sum_of_distances('initialized')
sod_gm = mge.get_sum_of_distances('converged')
print('sod_sm, sod_gm: ', sod_sm, sod_gm)
# Get median graphs.
set_median = ged_env.get_nx_graph(set_median_id)
gen_median = ged_env.get_nx_graph(gen_median_id)
return set_median, gen_median


def test_median_graph_estimator_symb():
from gklearn.utils import load_dataset
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs
from gklearn.gedlib import librariesImport, gedlibpy
from gklearn.preimage.utils import get_same_item_indices
import multiprocessing

# estimator parameters.
init_type = 'MEDOID'
num_inits = 1
threads = multiprocessing.cpu_count()
time_limit = 60000
# algorithm parameters.
algo = 'IPFP'
initial_solutions = 1
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE '

edit_cost_name = 'CONSTANT'
edit_cost_constants = [4, 4, 2, 1, 1, 1]
ds_name = 'MUTAG'
# Load dataset.
dataset = '../../../datasets/MUTAG/MUTAG_A.txt'
Gn, y_all, label_names = load_dataset(dataset)
y_idx = get_same_item_indices(y_all)
for i, (y, values) in enumerate(y_idx.items()):
Gn_i = [Gn[val] for val in values]
break
Gn_i = Gn_i[0:10]
# Set up the environment.
ged_env = gedlibpy.GEDEnv()
# gedlibpy.restart_env()
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
for G in Gn_i:
ged_env.add_nx_graph(G, '')
graph_ids = ged_env.get_all_graph_ids()
set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median')
ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
# Set up the estimator.
mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng())
# Select the GED algorithm.
algo_options = '--threads ' + str(threads) + algo_options_suffix
mge.set_options(mge_options)
mge.set_label_names(node_labels=label_names['node_labels'],
edge_labels=label_names['edge_labels'],
node_attrs=label_names['node_attrs'],
edge_attrs=label_names['edge_attrs'])
mge.set_init_method(algo, algo_options) mge.set_init_method(algo, algo_options)
mge.set_descent_method(algo, algo_options) mge.set_descent_method(algo, algo_options)
@@ -78,4 +155,5 @@ def test_median_graph_estimator():




if __name__ == '__main__': if __name__ == '__main__':
set_median, gen_median = test_median_graph_estimator()
set_median, gen_median = test_median_graph_estimator()
# set_median, gen_median = test_median_graph_estimator_symb()

+ 2
- 0
gklearn/ged/median/utils.py View File

@@ -30,6 +30,8 @@ def mge_options_to_string(options):
opt_str += '--randomness ' + str(val) + ' ' opt_str += '--randomness ' + str(val) + ' '
elif key == 'verbose': elif key == 'verbose':
opt_str += '--stdout ' + str(val) + ' ' opt_str += '--stdout ' + str(val) + ' '
elif key == 'update_order':
opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'refine': elif key == 'refine':
opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'time_limit': elif key == 'time_limit':


+ 2855
- 1747
gklearn/gedlib/gedlibpy.cpp
File diff suppressed because it is too large
View File


BIN
gklearn/gedlib/gedlibpy.cpython-36m-x86_64-linux-gnu.so View File


+ 44
- 19
gklearn/gedlib/gedlibpy.pyx View File

@@ -35,8 +35,8 @@ from libcpp.pair cimport pair
from libcpp.list cimport list from libcpp.list cimport list


#Long unsigned int equivalent #Long unsigned int equivalent
cimport numpy as np
ctypedef np.npy_uint32 UINT32_t
cimport numpy as cnp
ctypedef cnp.npy_uint32 UINT32_t
from cpython cimport array from cpython cimport array


@@ -76,14 +76,14 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged":
void runMethod(size_t g, size_t h) except + void runMethod(size_t g, size_t h) except +
double getUpperBound(size_t g, size_t h) except + double getUpperBound(size_t g, size_t h) except +
double getLowerBound(size_t g, size_t h) except + double getLowerBound(size_t g, size_t h) except +
vector[np.npy_uint64] getForwardMap(size_t g, size_t h) except +
vector[np.npy_uint64] getBackwardMap(size_t g, size_t h) except +
vector[cnp.npy_uint64] getForwardMap(size_t g, size_t h) except +
vector[cnp.npy_uint64] getBackwardMap(size_t g, size_t h) except +
size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + size_t getNodeImage(size_t g, size_t h, size_t nodeId) except +
size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except +
double getInducedCost(size_t g, size_t h) except + double getInducedCost(size_t g, size_t h) except +
vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except +
vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except +
vector[vector[np.npy_uint64]] getAllMap(size_t g, size_t h) except +
vector[vector[cnp.npy_uint64]] getAllMap(size_t g, size_t h) except +
double getRuntime(size_t g, size_t h) except + double getRuntime(size_t g, size_t h) except +
bool quasimetricCosts() except + bool quasimetricCosts() except +
vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except +
@@ -105,14 +105,16 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged":
map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except +
string getInitType() except + string getInitType() except +
# double getNodeCost(size_t label1, size_t label2) except + # double getNodeCost(size_t label1, size_t label2) except +
void computeInducedCost(size_t g_id, size_t h_id) except +
double computeInducedCost(size_t g_id, size_t h_id, vector[pair[size_t,size_t]]) except +
############################# #############################
##CYTHON WRAPPER INTERFACES## ##CYTHON WRAPPER INTERFACES##
############################# #############################


import numpy as np
import networkx as nx import networkx as nx
from gklearn.ged.env import NodeMap


# import librariesImport # import librariesImport
from ctypes import * from ctypes import *
@@ -726,13 +728,30 @@ cdef class GEDEnv:
:type g: size_t :type g: size_t
:type h: size_t :type h: size_t
:return: The Node Map between the two selected graph. :return: The Node Map between the two selected graph.
:rtype: list[tuple(size_t, size_t)]
:rtype: gklearn.ged.env.NodeMap.
.. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() .. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix()
.. warning:: run_method() between the same two graph must be called before this function. .. warning:: run_method() between the same two graph must be called before this function.
.. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. .. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example.
""" """
return self.c_env.getNodeMap(g, h)
map_as_relation = self.c_env.getNodeMap(g, h)
induced_cost = self.c_env.getInducedCost(g, h) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary.
source_map = [item.first if item.first < len(map_as_relation) else np.inf for item in map_as_relation] # item.first < len(map_as_relation) is not exactly correct.
# print(source_map)
target_map = [item.second if item.second < len(map_as_relation) else np.inf for item in map_as_relation]
# print(target_map)
num_node_source = len([item for item in source_map if item != np.inf])
# print(num_node_source)
num_node_target = len([item for item in target_map if item != np.inf])
# print(num_node_target)
node_map = NodeMap(num_node_source, num_node_target)
# print(node_map.get_forward_map(), node_map.get_backward_map())
for i in range(len(source_map)):
node_map.add_assignment(source_map[i], target_map[i])
node_map.set_induced_cost(induced_cost)
return node_map
def get_assignment_matrix(self, g, h) : def get_assignment_matrix(self, g, h) :
@@ -1320,7 +1339,7 @@ cdef class GEDEnv:
return graph_id return graph_id
def compute_induced_cost(self, g_id, h_id):
def compute_induced_cost(self, g_id, h_id, node_map):
""" """
Computes the edit cost between two graphs induced by a node map. Computes the edit cost between two graphs induced by a node map.


@@ -1330,19 +1349,25 @@ cdef class GEDEnv:
ID of input graph. ID of input graph.
h_id : int h_id : int
ID of input graph. ID of input graph.
node_map: gklearn.ged.env.NodeMap.
The NodeMap instance whose reduced cost will be computed and re-assigned.


Returns Returns
------- -------
None.
Notes
-----
The induced edit cost of the node map between `g_id` and `h_id` is implictly computed and stored in `GEDEnv::node_maps_`.

"""
cost = 0.0
self.c_env.computeInducedCost(g_id, h_id)
None.
"""
relation = []
node_map.as_relation(relation)
# print(relation)
dummy_node = get_dummy_node()
# print(dummy_node)
for i, val in enumerate(relation):
val1 = dummy_node if val[0] == np.inf else val[0]
val2 = dummy_node if val[1] == np.inf else val[1]
relation[i] = tuple((val1, val2))
# print(relation)
induced_cost = self.c_env.computeInducedCost(g_id, h_id, relation)
node_map.set_induced_cost(induced_cost)


##################################################################### #####################################################################


+ 3
- 2
gklearn/gedlib/src/GedLibBind.hpp View File

@@ -475,8 +475,9 @@ public:
* @brief Computes the edit cost between two graphs induced by a node map. * @brief Computes the edit cost between two graphs induced by a node map.
* @param[in] g_id ID of input graph. * @param[in] g_id ID of input graph.
* @param[in] h_id ID of input graph. * @param[in] h_id ID of input graph.
* @return Computed induced cost.
*/ */
void computeInducedCost(std::size_t g_id, std::size_t h_id) const;
double computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const;


// /*! // /*!
// * @brief Returns node relabeling, insertion, or deletion cost. // * @brief Returns node relabeling, insertion, or deletion cost.
@@ -492,7 +493,7 @@ public:


private: private:


ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> env; // environment variable
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> * env_; // environment variable


bool initialized; // initialization boolean (because env has one but not accessible) bool initialized; // initialization boolean (because env has one but not accessible)




+ 89
- 54
gklearn/gedlib/src/GedLibBind.ipp View File

@@ -277,11 +277,16 @@ std::string toStringVectorInt(std::vector<unsigned long int> vector) {




PyGEDEnv::PyGEDEnv () { PyGEDEnv::PyGEDEnv () {
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
this->initialized = false; this->initialized = false;
} }


PyGEDEnv::~PyGEDEnv () {}
PyGEDEnv::~PyGEDEnv () {
if (env_ != NULL) {
delete env_;
env_ = NULL;
}
}


// bool initialized = false; //Initialization boolean (because Env has one but not accessible). // bool initialized = false; //Initialization boolean (because Env has one but not accessible).


@@ -290,64 +295,68 @@ bool PyGEDEnv::isInitialized() {
} }


void PyGEDEnv::restartEnv() { void PyGEDEnv::restartEnv() {
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
if (env_ != NULL) {
delete env_;
env_ = NULL;
}
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
initialized = false; initialized = false;
} }


void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) {
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(this->env.load_gxl_graph(pathFolder, pathXML,
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(env_->load_gxl_graph(pathFolder, pathXML,
(node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), (node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED),
(edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), (edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED),
std::unordered_set<std::string>(), std::unordered_set<std::string>())); std::unordered_set<std::string>(), std::unordered_set<std::string>()));
} }


std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const {
return this->env.graph_ids();
return env_->graph_ids();
} }


std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { std::vector<std::size_t> PyGEDEnv::getAllGraphIds() {
std::vector<std::size_t> listID; std::vector<std::size_t> listID;
for (std::size_t i = this->env.graph_ids().first; i != this->env.graph_ids().second; i++) {
for (std::size_t i = env_->graph_ids().first; i != env_->graph_ids().second; i++) {
listID.push_back(i); listID.push_back(i);
} }
return listID; return listID;
} }


const std::string PyGEDEnv::getGraphClass(std::size_t id) const { const std::string PyGEDEnv::getGraphClass(std::size_t id) const {
return this->env.get_graph_class(id);
return env_->get_graph_class(id);
} }


const std::string PyGEDEnv::getGraphName(std::size_t id) const { const std::string PyGEDEnv::getGraphName(std::size_t id) const {
return this->env.get_graph_name(id);
return env_->get_graph_name(id);
} }


std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) {
ged::GEDGraph::GraphID newId = this->env.add_graph(graph_name, graph_class);
ged::GEDGraph::GraphID newId = env_->add_graph(graph_name, graph_class);
initialized = false; initialized = false;
return std::stoi(std::to_string(newId)); return std::stoi(std::to_string(newId));
} }


void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) {
this->env.add_node(graphId, nodeId, nodeLabel);
env_->add_node(graphId, nodeId, nodeLabel);
initialized = false; initialized = false;
} }


/*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { /*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) {
this->env.add_edge(graphId, tail, head, edgeLabel);
env_->add_edge(graphId, tail, head, edgeLabel);
}*/ }*/


void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) {
this->env.add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates);
env_->add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates);
initialized = false; initialized = false;
} }


void PyGEDEnv::clearGraph(std::size_t graphId) { void PyGEDEnv::clearGraph(std::size_t graphId) {
this->env.clear_graph(graphId);
env_->clear_graph(graphId);
initialized = false; initialized = false;
} }


ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const {
return this->env.get_graph(graphId);
return env_->get_graph(graphId);
} }


std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) {
@@ -379,71 +388,71 @@ std::vector<std::vector<std::size_t>> PyGEDEnv::getGraphAdjacenceMatrix(std::siz
} }


void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) {
this->env.set_edit_costs(translateEditCost(editCost), editCostConstants);
env_->set_edit_costs(translateEditCost(editCost), editCostConstants);
} }


void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) {
//this->env.set_edit_costs(Your EditCost Class(editCostConstants));
//env_->set_edit_costs(Your EditCost Class(editCostConstants));
} }


// void PyGEDEnv::initEnv() { // void PyGEDEnv::initEnv() {
// this->env.init();
// env_->init();
// initialized = true; // initialized = true;
// } // }


void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) {
this->env.init(translateInitOptions(initOption), print_to_stdout);
env_->init(translateInitOptions(initOption), print_to_stdout);
initialized = true; initialized = true;
} }


void PyGEDEnv::setMethod(std::string method, const std::string & options) { void PyGEDEnv::setMethod(std::string method, const std::string & options) {
this->env.set_method(translateMethod(method), options);
env_->set_method(translateMethod(method), options);
} }


void PyGEDEnv::initMethod() { void PyGEDEnv::initMethod() {
this->env.init_method();
env_->init_method();
} }


double PyGEDEnv::getInitime() const { double PyGEDEnv::getInitime() const {
return this->env.get_init_time();
return env_->get_init_time();
} }


void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { void PyGEDEnv::runMethod(std::size_t g, std::size_t h) {
this->env.run_method(g, h);
env_->run_method(g, h);
} }


double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const {
return this->env.get_upper_bound(g, h);
return env_->get_upper_bound(g, h);
} }


double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const {
return this->env.get_lower_bound(g, h);
return env_->get_lower_bound(g, h);
} }


std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const {
return this->env.get_node_map(g, h).get_forward_map();
return env_->get_node_map(g, h).get_forward_map();
} }


std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const {
return this->env.get_node_map(g, h).get_backward_map();
return env_->get_node_map(g, h).get_backward_map();
} }


std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const {
return this->env.get_node_map(g, h).image(nodeId);
return env_->get_node_map(g, h).image(nodeId);
} }


std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const {
return this->env.get_node_map(g, h).pre_image(nodeId);
return env_->get_node_map(g, h).pre_image(nodeId);
} }


double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const {
return this->env.get_node_map(g, h).induced_cost();
return env_->get_node_map(g, h).induced_cost();
} }


std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) {
std::vector<pair<std::size_t, std::size_t>> res; std::vector<pair<std::size_t, std::size_t>> res;
std::vector<ged::NodeMap::Assignment> relation; std::vector<ged::NodeMap::Assignment> relation;
this->env.get_node_map(g, h).as_relation(relation);
env_->get_node_map(g, h).as_relation(relation);
for (const auto & assignment : relation) { for (const auto & assignment : relation) {
res.push_back(std::make_pair(assignment.first, assignment.second)); res.push_back(std::make_pair(assignment.first, assignment.second));
} }
@@ -493,11 +502,11 @@ std::vector<std::vector<unsigned long int>> PyGEDEnv::getAllMap(std::size_t g, s
} }


double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const {
return this->env.get_runtime(g, h);
return env_->get_runtime(g, h);
} }


bool PyGEDEnv::quasimetricCosts() const { bool PyGEDEnv::quasimetricCosts() const {
return this->env.quasimetric_costs();
return env_->quasimetric_costs();
} }


std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) {
@@ -542,73 +551,99 @@ std::vector<std::vector<double>> PyGEDEnv::hungarianLSAPE(std::vector<std::vecto
} }


std::size_t PyGEDEnv::getNumNodeLabels() const { std::size_t PyGEDEnv::getNumNodeLabels() const {
return this->env.num_node_labels();
return env_->num_node_labels();
} }


std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const {
return this->env.get_node_label(label_id);
return env_->get_node_label(label_id);
} }


std::size_t PyGEDEnv::getNumEdgeLabels() const { std::size_t PyGEDEnv::getNumEdgeLabels() const {
return this->env.num_edge_labels();
return env_->num_edge_labels();
} }


std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const {
return this->env.get_edge_label(label_id);
return env_->get_edge_label(label_id);
} }


// std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { // std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const {
// return this->env.get_num_nodes(graph_id);
// return env_->get_num_nodes(graph_id);
// } // }


double PyGEDEnv::getAvgNumNodes() const { double PyGEDEnv::getAvgNumNodes() const {
return this->env.get_avg_num_nodes();
return env_->get_avg_num_nodes();
} }


double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const {
return this->env.node_rel_cost(node_label_1, node_label_2);
return env_->node_rel_cost(node_label_1, node_label_2);
} }


double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const {
return this->env.node_del_cost(node_label);
return env_->node_del_cost(node_label);
} }


double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const {
return this->env.node_ins_cost(node_label);
return env_->node_ins_cost(node_label);
} }


std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const {
return this->env.median_node_label(node_labels);
return env_->median_node_label(node_labels);
} }


double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const {
return this->env.edge_rel_cost(edge_label_1, edge_label_2);
return env_->edge_rel_cost(edge_label_1, edge_label_2);
} }


double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const {
return this->env.edge_del_cost(edge_label);
return env_->edge_del_cost(edge_label);
} }


double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const {
return this->env.edge_ins_cost(edge_label);
return env_->edge_ins_cost(edge_label);
} }


std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const {
return this->env.median_edge_label(edge_labels);
return env_->median_edge_label(edge_labels);
} }


std::string PyGEDEnv::getInitType() const { std::string PyGEDEnv::getInitType() const {
return initOptionsToString(this->env.get_init_type());
return initOptionsToString(env_->get_init_type());
} }


void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {
ged::NodeMap node_map = this->env.get_node_map(g_id, h_id);
this->env.compute_induced_cost(g_id, h_id, node_map);
double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const {
ged::NodeMap node_map = ged::NodeMap(env_->get_num_nodes(g_id), env_->get_num_nodes(h_id));
for (const auto & assignment : relation) {
node_map.add_assignment(assignment.first, assignment.second);
// std::cout << assignment.first << assignment.second << endl;
}
const std::vector<ged::GEDGraph::NodeID> forward_map = node_map.get_forward_map();
for (std::size_t i{0}; i < node_map.num_source_nodes(); i++) {
if (forward_map.at(i) == ged::GEDGraph::undefined_node()) {
node_map.add_assignment(i, ged::GEDGraph::dummy_node());
}
}
const std::vector<ged::GEDGraph::NodeID> backward_map = node_map.get_backward_map();
for (std::size_t i{0}; i < node_map.num_target_nodes(); i++) {
if (backward_map.at(i) == ged::GEDGraph::undefined_node()) {
node_map.add_assignment(ged::GEDGraph::dummy_node(), i);
}
}
// for (auto & map : node_map.get_forward_map()) {
// std::cout << map << ", ";
// }
// std::cout << endl;
// for (auto & map : node_map.get_backward_map()) {
// std::cout << map << ", ";
// }
env_->compute_induced_cost(g_id, h_id, node_map);
return node_map.induced_cost();
} }






// double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { // double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const {
// return this->env.ged_data_node_cost(label1, label2);
// return env_->ged_data_node_cost(label1, label2);
// } // }




@@ -630,7 +665,7 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {


/*loadGXLGraph(pathFolder, pathXML); /*loadGXLGraph(pathFolder, pathXML);
std::vector<std::size_t> graph_ids = getAllGraphIds(); std::vector<std::size_t> graph_ids = getAllGraphIds();
std::size_t median_id = this->env.add_graph("median", "");
std::size_t median_id = env_->add_graph("median", "");


initEnv(initOption); initEnv(initOption);


@@ -640,10 +675,10 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {
median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed);
median_estimator.run(graph_ids, median_id); median_estimator.run(graph_ids, median_id);
std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl");
this->env.save_as_gxl_graph(median_id, gxl_file_name);*/
env_->save_as_gxl_graph(median_id, gxl_file_name);*/


/*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); /*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex");
save_letter_graph_as_tikz_file(this->env.get_graph(median_id), tikz_file_name);*/
save_letter_graph_as_tikz_file(env_->get_graph(median_id), tikz_file_name);*/
//} //}


} }


+ 1
- 1
gklearn/kernels/__init__.py View File

@@ -12,4 +12,4 @@ from gklearn.kernels.structural_sp import StructuralSP
from gklearn.kernels.shortest_path import ShortestPath from gklearn.kernels.shortest_path import ShortestPath
from gklearn.kernels.path_up_to_h import PathUpToH from gklearn.kernels.path_up_to_h import PathUpToH
from gklearn.kernels.treelet import Treelet from gklearn.kernels.treelet import Treelet
from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman
from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree

+ 9
- 8
gklearn/kernels/path_up_to_h.py View File

@@ -18,6 +18,7 @@ import numpy as np
import networkx as nx import networkx as nx
from collections import Counter from collections import Counter
from functools import partial from functools import partial
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel
from gklearn.utils import Trie from gklearn.utils import Trie
@@ -582,11 +583,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def __add_dummy_labels(self, Gn): def __add_dummy_labels(self, Gn):
if self.__k_func is not None: if self.__k_func is not None:
if len(self.__node_labels) == 0:
for G in Gn:
nx.set_node_attributes(G, '0', 'dummy')
self.__node_labels.append('dummy')
if len(self.__edge_labels) == 0:
for G in Gn:
nx.set_edge_attributes(G, '0', 'dummy')
self.__edge_labels.append('dummy')
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 9
- 8
gklearn/kernels/treelet.py View File

@@ -18,6 +18,7 @@ import numpy as np
import networkx as nx import networkx as nx
from collections import Counter from collections import Counter
from itertools import chain from itertools import chain
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel
@@ -495,11 +496,11 @@ class Treelet(GraphKernel):
def __add_dummy_labels(self, Gn): def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0:
for G in Gn:
nx.set_node_attributes(G, '0', 'dummy')
self.__node_labels.append('dummy')
if len(self.__edge_labels) == 0:
for G in Gn:
nx.set_edge_attributes(G, '0', 'dummy')
self.__edge_labels.append('dummy')
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 25
- 7
gklearn/kernels/weisfeiler_lehman.py View File

@@ -16,6 +16,7 @@ import numpy as np
import networkx as nx import networkx as nx
from collections import Counter from collections import Counter
from functools import partial from functools import partial
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm from gklearn.utils.parallel import parallel_gm
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel


@@ -32,6 +33,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge




def _compute_gm_series(self): def _compute_gm_series(self):
if self._verbose >= 2:
import warnings
warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(self._graphs) self.__add_dummy_node_labels(self._graphs)
# for WL subtree kernel # for WL subtree kernel
@@ -55,11 +60,16 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
if self._verbose >= 2: if self._verbose >= 2:
raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.')
import warnings
warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.')
return self._compute_gm_series() return self._compute_gm_series()
def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better.
if self._verbose >= 2:
import warnings
warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(g_list + [g1]) self.__add_dummy_node_labels(g_list + [g1])
# for WL subtree kernel # for WL subtree kernel
@@ -83,8 +93,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
if self._verbose >= 2: if self._verbose >= 2:
raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.')
return self._compute_gm_imap_unordered()
import warnings
warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.')
return self._compute_kernel_list_series(g1, g_list)
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
@@ -459,7 +470,14 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def __add_dummy_node_labels(self, Gn): def __add_dummy_node_labels(self, Gn):
if len(self.__node_labels) == 0:
for G in Gn:
nx.set_node_attributes(G, '0', 'dummy')
self.__node_labels.append('dummy')
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
class WLSubtree(WeisfeilerLehman):
def __init__(self, **kwargs):
kwargs['base_kernel'] = 'subtree'
super().__init__(**kwargs)

+ 801
- 56
gklearn/preimage/experiments/xp_median_preimage.py
File diff suppressed because it is too large
View File


+ 21
- 47
gklearn/preimage/median_preimage_generator.py View File

@@ -18,6 +18,7 @@ from gklearn.ged.median import MedianGraphEstimator
from gklearn.ged.median import constant_node_costs,mge_options_to_string from gklearn.ged.median import constant_node_costs,mge_options_to_string
from gklearn.gedlib import librariesImport, gedlibpy from gklearn.gedlib import librariesImport, gedlibpy
from gklearn.utils import Timer from gklearn.utils import Timer
from gklearn.utils.utils import get_graph_kernel_by_name
# from gklearn.utils.dataset import Dataset # from gklearn.utils.dataset import Dataset


class MedianPreimageGenerator(PreimageGenerator): class MedianPreimageGenerator(PreimageGenerator):
@@ -81,7 +82,13 @@ class MedianPreimageGenerator(PreimageGenerator):
def run(self): def run(self):
self.__set_graph_kernel_by_name()
self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'],
node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
kernel_options=self._kernel_options)
# record start time. # record start time.
start = time.time() start = time.time()
@@ -180,6 +187,10 @@ class MedianPreimageGenerator(PreimageGenerator):
results['itrs'] = self.__itrs results['itrs'] = self.__itrs
results['converged'] = self.__converged results['converged'] = self.__converged
results['num_updates_ecc'] = self.__num_updates_ecc results['num_updates_ecc'] = self.__num_updates_ecc
results['mge'] = {}
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents()
return results return results


@@ -653,27 +664,27 @@ class MedianPreimageGenerator(PreimageGenerator):
ged_env.init(init_option=self.__ged_options['init_option']) ged_env.init(init_option=self.__ged_options['init_option'])
# Set up the madian graph estimator. # Set up the madian graph estimator.
mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
options = self.__mge_options.copy() options = self.__mge_options.copy()
if not 'seed' in options: if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
# Select the GED algorithm. # Select the GED algorithm.
mge.set_options(mge_options_to_string(options))
mge.set_label_names(node_labels=self._dataset.node_labels,
self.__mge.set_options(mge_options_to_string(options))
self.__mge.set_label_names(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels, edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs, node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs) edge_attrs=self._dataset.edge_attrs)
mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
# Run the estimator. # Run the estimator.
mge.run(graph_ids, set_median_id, gen_median_id)
self.__mge.run(graph_ids, set_median_id, gen_median_id)
# Get SODs. # Get SODs.
self.__sod_set_median = mge.get_sum_of_distances('initialized')
self.__sod_gen_median = mge.get_sum_of_distances('converged')
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
# Get median graphs. # Get median graphs.
self.__set_median = ged_env.get_nx_graph(set_median_id) self.__set_median = ged_env.get_nx_graph(set_median_id)
@@ -722,43 +733,6 @@ class MedianPreimageGenerator(PreimageGenerator):
print('distance in kernel space for generalized median:', self.__k_dis_gen_median) print('distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('distance in kernel space for each graph in median set:', k_dis_median_set) print('distance in kernel space for each graph in median set:', k_dis_median_set)

def __set_graph_kernel_by_name(self):
if self._kernel_options['name'] == 'ShortestPath':
from gklearn.kernels import ShortestPath
self._graph_kernel = ShortestPath(node_labels=self._dataset.node_labels,
node_attrs=self._dataset.node_attrs,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
**self._kernel_options)
elif self._kernel_options['name'] == 'StructuralSP':
from gklearn.kernels import StructuralSP
self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
**self._kernel_options)
elif self._kernel_options['name'] == 'PathUpToH':
from gklearn.kernels import PathUpToH
self._graph_kernel = PathUpToH(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
**self._kernel_options)
elif self._kernel_options['name'] == 'Treelet':
from gklearn.kernels import Treelet
self._graph_kernel = Treelet(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
**self._kernel_options)
elif self._kernel_options['name'] == 'WeisfeilerLehman':
from gklearn.kernels import WeisfeilerLehman
self._graph_kernel = WeisfeilerLehman(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
**self._kernel_options)
else:
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WeisfeilerLehman".')
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):


+ 30
- 7
gklearn/preimage/utils.py View File

@@ -25,7 +25,7 @@ import networkx as nx
import os import os




def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False):
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None):
import os.path import os.path
from gklearn.preimage import MedianPreimageGenerator from gklearn.preimage import MedianPreimageGenerator
from gklearn.utils import split_dataset_by_target from gklearn.utils import split_dataset_by_target
@@ -38,7 +38,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
dataset_all.trim_dataset(edge_required=edge_required) dataset_all.trim_dataset(edge_required=edge_required)
if irrelevant_labels is not None: if irrelevant_labels is not None:
dataset_all.remove_labels(**irrelevant_labels) dataset_all.remove_labels(**irrelevant_labels)
# dataset_all.cut_graphs(range(0, 10))
if cut_range is not None:
dataset_all.cut_graphs(cut_range)
datasets = split_dataset_by_target(dataset_all) datasets = split_dataset_by_target(dataset_all)


if save_results: if save_results:
@@ -57,6 +58,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
itrs_list = [] itrs_list = []
converged_list = [] converged_list = []
num_updates_ecc_list = [] num_updates_ecc_list = []
mge_decrease_order_list = []
mge_increase_order_list = []
mge_converged_order_list = []
nb_sod_sm2gm = [0, 0, 0] nb_sod_sm2gm = [0, 0, 0]
nb_dis_k_sm2gm = [0, 0, 0] nb_dis_k_sm2gm = [0, 0, 0]
nb_dis_k_gi2sm = [0, 0, 0] nb_dis_k_gi2sm = [0, 0, 0]
@@ -148,7 +152,10 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
results['runtime_precompute_gm'], results['runtime_optimize_ec'], results['runtime_precompute_gm'], results['runtime_optimize_ec'],
results['runtime_generate_preimage'], results['runtime_total'], results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['converged'], results['itrs'], results['converged'],
results['num_updates_ecc']])
results['num_updates_ecc'],
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge
results['mge']['num_increase_order'] > 0,
results['mge']['num_converged_descents'] > 0])
f_detail.close() f_detail.close()
# compute result summary. # compute result summary.
@@ -164,6 +171,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
itrs_list.append(results['itrs']) itrs_list.append(results['itrs'])
converged_list.append(results['converged']) converged_list.append(results['converged'])
num_updates_ecc_list.append(results['num_updates_ecc']) num_updates_ecc_list.append(results['num_updates_ecc'])
mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0)
mge_increase_order_list.append(results['mge']['num_increase_order'] > 0)
mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0)
# # SOD SM -> GM # # SOD SM -> GM
if results['sod_set_median'] > results['sod_gen_median']: if results['sod_set_median'] > results['sod_gen_median']:
nb_sod_sm2gm[0] += 1 nb_sod_sm2gm[0] += 1
@@ -210,7 +220,11 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
results['runtime_precompute_gm'], results['runtime_optimize_ec'], results['runtime_precompute_gm'], results['runtime_optimize_ec'],
results['runtime_generate_preimage'], results['runtime_total'], results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['converged'], results['itrs'], results['converged'],
results['num_updates_ecc'], nb_sod_sm2gm,
results['num_updates_ecc'],
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge
results['mge']['num_increase_order'] > 0,
results['mge']['num_converged_descents'] > 0,
nb_sod_sm2gm,
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm])
f_summary.close() f_summary.close()
@@ -256,6 +270,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
itrs_mean = np.mean(itrs_list) itrs_mean = np.mean(itrs_list)
num_converged = np.sum(converged_list) num_converged = np.sum(converged_list)
num_updates_ecc_mean = np.mean(num_updates_ecc_list) num_updates_ecc_mean = np.mean(num_updates_ecc_list)
num_mge_decrease_order = np.sum(mge_decrease_order_list)
num_mge_increase_order = np.sum(mge_increase_order_list)
num_mge_converged = np.sum(mge_converged_order_list)
sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean))
dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
@@ -270,7 +287,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
dis_k_gi2sm_mean, dis_k_gi2gm_mean, dis_k_gi2sm_mean, dis_k_gi2gm_mean,
time_precompute_gm_mean, time_optimize_ec_mean, time_precompute_gm_mean, time_optimize_ec_mean,
time_generate_mean, time_total_mean, itrs_mean, time_generate_mean, time_total_mean, itrs_mean,
num_converged, num_updates_ecc_mean])
num_converged, num_updates_ecc_mean,
num_mge_decrease_order, num_mge_increase_order,
num_mge_converged])
f_summary.close() f_summary.close()
# save total pairwise kernel distances. # save total pairwise kernel distances.
@@ -300,7 +319,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output):
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm',
'time optimize ec', 'time generate preimage', 'time total', 'time optimize ec', 'time generate preimage', 'time total',
'itrs', 'converged', 'num updates ecc'])
'itrs', 'converged', 'num updates ecc', 'mge decrease order',
'mge increase order', 'mge converged'])
f_detail.close() f_detail.close()
# fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' # fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
@@ -312,7 +332,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output):
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec',
'time generate preimage', 'time total', 'itrs', 'num converged', 'time generate preimage', 'time total', 'itrs', 'num converged',
'num updates ecc', '# SOD SM -> GM', '# dis_k SM -> GM',
'num updates ecc', 'mge num decrease order', 'mge num increase order',
'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM',
'# dis_k gi -> SM', '# dis_k gi -> GM']) '# dis_k gi -> SM', '# dis_k gi -> GM'])
# 'repeats better SOD SM -> GM', # 'repeats better SOD SM -> GM',
# 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', # 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM',
@@ -418,6 +439,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='
Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
height=4, base_kernel='subtree', parallel=None, height=4, base_kernel='subtree', parallel=None,
n_jobs=multiprocessing.cpu_count(), verbose=verbose) n_jobs=multiprocessing.cpu_count(), verbose=verbose)
else:
raise Exception('The graph kernel "', graph_kernel, '" is not defined.')
# normalization # normalization
Kmatrix_diag = Kmatrix.diagonal().copy() Kmatrix_diag = Kmatrix.diagonal().copy()


+ 6
- 6
gklearn/tests/test_graph_kernels.py View File

@@ -260,20 +260,20 @@ def test_Treelet(ds_name, parallel):
@pytest.mark.parametrize('ds_name', ['Acyclic']) @pytest.mark.parametrize('ds_name', ['Acyclic'])
#@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
@pytest.mark.parametrize('base_kernel', ['subtree'])
# @pytest.mark.parametrize('base_kernel', ['subtree'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) @pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_WeisfeilerLehman(ds_name, parallel, base_kernel):
"""Test Weisfeiler-Lehman kernel.
def test_WLSubtree(ds_name, parallel):
"""Test Weisfeiler-Lehman subtree kernel.
""" """
from gklearn.kernels import WeisfeilerLehman
from gklearn.kernels import WLSubtree
dataset = chooseDataset(ds_name) dataset = chooseDataset(ds_name)


try: try:
graph_kernel = WeisfeilerLehman(node_labels=dataset.node_labels,
graph_kernel = WLSubtree(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels, edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']), ds_infos=dataset.get_dataset_infos(keys=['directed']),
height=2, base_kernel=base_kernel)
height=2)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],


+ 1
- 0
gklearn/utils/__init__.py View File

@@ -20,4 +20,5 @@ from gklearn.utils.graph_files import load_dataset, save_dataset
from gklearn.utils.timer import Timer from gklearn.utils.timer import Timer
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
from gklearn.utils.utils import compute_gram_matrices_by_class from gklearn.utils.utils import compute_gram_matrices_by_class
from gklearn.utils.utils import SpecialLabel
from gklearn.utils.trie import Trie from gklearn.utils.trie import Trie

+ 102
- 30
gklearn/utils/dataset.py View File

@@ -56,13 +56,14 @@ class Dataset(object):
self.__node_attrs = label_names['node_attrs'] self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels'] self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs'] self.__edge_attrs = label_names['edge_attrs']
self.clean_labels()
def load_graphs(self, graphs, targets=None): def load_graphs(self, graphs, targets=None):
# this has to be followed by set_labels(). # this has to be followed by set_labels().
self.__graphs = graphs self.__graphs = graphs
self.__targets = targets self.__targets = targets
# self.set_labels_attrs()
# self.set_labels_attrs() # @todo
def load_predefined_dataset(self, ds_name): def load_predefined_dataset(self, ds_name):
@@ -89,6 +90,9 @@ class Dataset(object):
elif ds_name == 'Cuneiform': elif ds_name == 'Cuneiform':
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'DD':
ds_file = current_path + '../../datasets/DD/DD_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'Fingerprint': elif ds_name == 'Fingerprint':
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__graphs, self.__targets, label_names = load_dataset(ds_file)
@@ -113,6 +117,9 @@ class Dataset(object):
elif ds_name == 'MUTAG': elif ds_name == 'MUTAG':
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'PAH':
ds_file = current_path + '../../datasets/PAH/dataset.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'SYNTHETIC': elif ds_name == 'SYNTHETIC':
pass pass
elif ds_name == 'SYNTHETICnew': elif ds_name == 'SYNTHETICnew':
@@ -120,11 +127,14 @@ class Dataset(object):
self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'Synthie': elif ds_name == 'Synthie':
pass pass
else:
raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
self.__node_labels = label_names['node_labels'] self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs'] self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels'] self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs'] self.__edge_attrs = label_names['edge_attrs']
self.clean_labels()


def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]):
@@ -138,27 +148,27 @@ class Dataset(object):
# @todo: remove labels which have only one possible values. # @todo: remove labels which have only one possible values.
if node_labels is None: if node_labels is None:
self.__node_labels = self.__graphs[0].graph['node_labels'] self.__node_labels = self.__graphs[0].graph['node_labels']
# # graphs are considered node unlabeled if all nodes have the same label.
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False})
# # graphs are considered node unlabeled if all nodes have the same label.
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False})
if node_attrs is None: if node_attrs is None:
self.__node_attrs = self.__graphs[0].graph['node_attrs'] self.__node_attrs = self.__graphs[0].graph['node_attrs']
# for G in Gn:
# for n in G.nodes(data=True):
# if 'attributes' in n[1]:
# return len(n[1]['attributes'])
# return 0
# for G in Gn:
# for n in G.nodes(data=True):
# if 'attributes' in n[1]:
# return len(n[1]['attributes'])
# return 0
if edge_labels is None: if edge_labels is None:
self.__edge_labels = self.__graphs[0].graph['edge_labels'] self.__edge_labels = self.__graphs[0].graph['edge_labels']
# # graphs are considered edge unlabeled if all edges have the same label.
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False})
# # graphs are considered edge unlabeled if all edges have the same label.
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False})
if edge_attrs is None: if edge_attrs is None:
self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] self.__edge_attrs = self.__graphs[0].graph['edge_attrs']
# for G in Gn:
# if nx.number_of_edges(G) > 0:
# for e in G.edges(data=True):
# if 'attributes' in e[2]:
# return len(e[2]['attributes'])
# return 0
# for G in Gn:
# if nx.number_of_edges(G) > 0:
# for e in G.edges(data=True):
# if 'attributes' in e[2]:
# return len(e[2]['attributes'])
# return 0
def get_dataset_infos(self, keys=None): def get_dataset_infos(self, keys=None):
@@ -323,7 +333,7 @@ class Dataset(object):
if self.__node_label_nums is None: if self.__node_label_nums is None:
self.__node_label_nums = {} self.__node_label_nums = {}
for node_label in self.__node_labels: for node_label in self.__node_labels:
self.__node_label_nums[node_label] = self.get_node_label_num(node_label)
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label)
infos['node_label_nums'] = self.__node_label_nums infos['node_label_nums'] = self.__node_label_nums
if 'edge_label_dim' in keys: if 'edge_label_dim' in keys:
@@ -335,7 +345,7 @@ class Dataset(object):
if self.__edge_label_nums is None: if self.__edge_label_nums is None:
self.__edge_label_nums = {} self.__edge_label_nums = {}
for edge_label in self.__edge_labels: for edge_label in self.__edge_labels:
self.__edge_label_nums[edge_label] = self.get_edge_label_num(edge_label)
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label)
infos['edge_label_nums'] = self.__edge_label_nums infos['edge_label_nums'] = self.__edge_label_nums
if 'directed' in keys or 'substructures' in keys: if 'directed' in keys or 'substructures' in keys:
@@ -411,33 +421,95 @@ class Dataset(object):
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
node_labels = [item for item in node_labels if item in self.__node_labels]
edge_labels = [item for item in edge_labels if item in self.__edge_labels]
node_attrs = [item for item in node_attrs if item in self.__node_attrs]
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs]

for g in self.__graphs: for g in self.__graphs:
for nd in g.nodes(): for nd in g.nodes():
for nl in node_labels: for nl in node_labels:
del g.nodes[nd][nl]
del g.nodes[nd][nl]
for na in node_attrs: for na in node_attrs:
del g.nodes[nd][na] del g.nodes[nd][na]
for ed in g.edges(): for ed in g.edges():
for el in edge_labels: for el in edge_labels:
del g.edges[ed][el]
del g.edges[ed][el]
for ea in edge_attrs: for ea in edge_attrs:
del g.edges[ed][ea]
del g.edges[ed][ea]
if len(node_labels) > 0: if len(node_labels) > 0:
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
if len(edge_labels) > 0: if len(edge_labels) > 0:
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
if len(node_attrs) > 0: if len(node_attrs) > 0:
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
if len(edge_attrs) > 0: if len(edge_attrs) > 0:
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
def clean_labels(self):
labels = []
for name in self.__node_labels:
label = set()
for G in self.__graphs:
label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for nd in G.nodes():
del G.nodes[nd][name]
self.__node_labels = labels

labels = []
for name in self.__edge_labels:
label = set()
for G in self.__graphs:
label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for ed in G.edges():
del G.edges[ed][name]
self.__edge_labels = labels

labels = []
for name in self.__node_attrs:
label = set()
for G in self.__graphs:
label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for nd in G.nodes():
del G.nodes[nd][name]
self.__node_attrs = labels

labels = []
for name in self.__edge_attrs:
label = set()
for G in self.__graphs:
label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for ed in G.edges():
del G.edges[ed][name]
self.__edge_attrs = labels
def cut_graphs(self, range_): def cut_graphs(self, range_):
self.__graphs = [self.__graphs[i] for i in range_] self.__graphs = [self.__graphs[i] for i in range_]
if self.__targets is not None: if self.__targets is not None:
self.__targets = [self.__targets[i] for i in range_] self.__targets = [self.__targets[i] for i in range_]
# @todo
# self.set_labels_attrs()
self.clean_labels()




def trim_dataset(self, edge_required=False): def trim_dataset(self, edge_required=False):
@@ -448,8 +520,7 @@ class Dataset(object):
idx = [p[0] for p in trimed_pairs] idx = [p[0] for p in trimed_pairs]
self.__graphs = [p[1] for p in trimed_pairs] self.__graphs = [p[1] for p in trimed_pairs]
self.__targets = [self.__targets[i] for i in idx] self.__targets = [self.__targets[i] for i in idx]
# @todo
# self.set_labels_attrs()
self.clean_labels()
def __get_dataset_size(self): def __get_dataset_size(self):
@@ -652,4 +723,5 @@ def split_dataset_by_target(dataset):
sub_dataset.load_graphs(sub_graphs, [key] * len(val)) sub_dataset.load_graphs(sub_graphs, [key] * len(val))
sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs)
datasets.append(sub_dataset) datasets.append(sub_dataset)
# @todo: clean_labels?
return datasets return datasets

+ 6
- 6
gklearn/utils/graph_files.py View File

@@ -63,7 +63,7 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs):
return data, y, label_names return data, y, label_names




def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None):
def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', **kwargs):
"""Save list of graphs. """Save list of graphs.
""" """
import os import os
@@ -73,22 +73,22 @@ def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=Non
if not os.path.exists(dirname_ds) : if not os.path.exists(dirname_ds) :
os.makedirs(dirname_ds) os.makedirs(dirname_ds)
if xparams is not None and 'graph_dir' in xparams:
graph_dir = xparams['graph_dir'] + '/'
if 'graph_dir' in kwargs:
graph_dir = kwargs['graph_dir'] + '/'
if not os.path.exists(graph_dir): if not os.path.exists(graph_dir):
os.makedirs(graph_dir) os.makedirs(graph_dir)
del kwargs['graph_dir']
else: else:
graph_dir = dirname_ds graph_dir = dirname_ds
if group == 'xml' and gformat == 'gxl': if group == 'xml' and gformat == 'gxl':
kwargs = {'method': xparams['method']} if xparams is not None else {}
with open(filename + '.xml', 'w') as fgroup: with open(filename + '.xml', 'w') as fgroup:
fgroup.write("<?xml version=\"1.0\"?>") fgroup.write("<?xml version=\"1.0\"?>")
fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">")
fgroup.write("\n<GraphCollection>") fgroup.write("\n<GraphCollection>")
for idx, g in enumerate(Gn): for idx, g in enumerate(Gn):
fname_tmp = "graph" + str(idx) + ".gxl" fname_tmp = "graph" + str(idx) + ".gxl"
saveGXL(g, graph_dir + fname_tmp, **kwargs)
save_gxl(g, graph_dir + fname_tmp, **kwargs)
fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>")
fgroup.write("\n</GraphCollection>") fgroup.write("\n</GraphCollection>")
fgroup.close() fgroup.close()
@@ -226,7 +226,7 @@ def load_gxl(filename): # @todo: directed graphs.
return g, label_names return g, label_names




def saveGXL(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
if method == 'default': if method == 'default':
gxl_file = open(filename, 'w') gxl_file = open(filename, 'w')
gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")


+ 53
- 6
gklearn/utils/utils.py View File

@@ -1,6 +1,7 @@
import networkx as nx import networkx as nx
import numpy as np import numpy as np
from copy import deepcopy from copy import deepcopy
from enum import Enum, auto
#from itertools import product #from itertools import product


# from tqdm import tqdm # from tqdm import tqdm
@@ -299,21 +300,59 @@ def get_edge_labels(Gn, edge_label):




def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}):
if name == 'structuralspkernel':
if name == 'ShortestPath':
from gklearn.kernels import ShortestPath
graph_kernel = ShortestPath(node_labels=node_labels,
node_attrs=node_attrs,
ds_infos=ds_infos,
**kernel_options)
elif name == 'StructuralSP':
from gklearn.kernels import StructuralSP from gklearn.kernels import StructuralSP
graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels,
node_attrs=node_attrs, edge_attrs=edge_attrs,
ds_infos=ds_infos, **kernel_options)
graph_kernel = StructuralSP(node_labels=node_labels,
edge_labels=edge_labels,
node_attrs=node_attrs,
edge_attrs=edge_attrs,
ds_infos=ds_infos,
**kernel_options)
elif name == 'PathUpToH':
from gklearn.kernels import PathUpToH
graph_kernel = PathUpToH(node_labels=node_labels,
edge_labels=edge_labels,
ds_infos=ds_infos,
**kernel_options)
elif name == 'Treelet':
from gklearn.kernels import Treelet
graph_kernel = Treelet(node_labels=node_labels,
edge_labels=edge_labels,
ds_infos=ds_infos,
**kernel_options)
elif name == 'WLSubtree':
from gklearn.kernels import WLSubtree
graph_kernel = WLSubtree(node_labels=node_labels,
edge_labels=edge_labels,
ds_infos=ds_infos,
**kernel_options)
elif name == 'WeisfeilerLehman':
from gklearn.kernels import WeisfeilerLehman
graph_kernel = WeisfeilerLehman(node_labels=node_labels,
edge_labels=edge_labels,
ds_infos=ds_infos,
**kernel_options)
else:
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".')

return graph_kernel return graph_kernel




def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None):
def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False):
import os
from gklearn.utils import Dataset, split_dataset_by_target from gklearn.utils import Dataset, split_dataset_by_target
# 1. get dataset. # 1. get dataset.
print('1. getting dataset...') print('1. getting dataset...')
dataset_all = Dataset() dataset_all = Dataset()
dataset_all.load_predefined_dataset(ds_name) dataset_all.load_predefined_dataset(ds_name)
dataset_all.trim_dataset(edge_required=edge_required)
if not irrelevant_labels is None: if not irrelevant_labels is None:
dataset_all.remove_labels(**irrelevant_labels) dataset_all.remove_labels(**irrelevant_labels)
# dataset_all.cut_graphs(range(0, 10)) # dataset_all.cut_graphs(range(0, 10))
@@ -349,6 +388,8 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d
print() print()
print('4. saving results...') print('4. saving results...')
if save_results: if save_results:
if not os.path.exists(dir_save):
os.makedirs(dir_save)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)


print('\ncomplete.') print('\ncomplete.')
@@ -424,4 +465,10 @@ def get_mlti_dim_edge_attrs(G, attr_names):
attributes = [] attributes = []
for ed, attrs in G.edges(data=True): for ed, attrs in G.edges(data=True):
attributes.append(tuple(attrs[aname] for aname in attr_names)) attributes.append(tuple(attrs[aname] for aname in attr_names))
return attributes
return attributes


class SpecialLabel(Enum):
"""can be used to define special labels.
"""
DUMMY = auto # The dummy label.

Loading…
Cancel
Save