@@ -0,0 +1,28 @@ | |||||
environment: | |||||
matrix: | |||||
- PYTHON: "C:\\Python35" | |||||
- PYTHON: "C:\\Python35-x64" | |||||
- PYTHON: "C:\\Python36" | |||||
- PYTHON: "C:\\Python36-x64" | |||||
- PYTHON: "C:\\Python37" | |||||
- PYTHON: "C:\\Python37-x64" | |||||
- PYTHON: "C:\\Python38" | |||||
- PYTHON: "C:\\Python38-x64" | |||||
# skip_commits: | |||||
# files: | |||||
# - "*.yml" | |||||
# - "*.rst" | |||||
# - "LICENSE" | |||||
install: | |||||
- "%PYTHON%\\python.exe -m pip install -U pip" | |||||
- "%PYTHON%\\python.exe -m pip install -U pytest" | |||||
- "%PYTHON%\\python.exe -m pip install -r requirements.txt" | |||||
- "%PYTHON%\\python.exe -m pip install wheel" | |||||
build: off | |||||
test_script: | |||||
- "%PYTHON%\\python.exe setup.py bdist_wheel" | |||||
- "%PYTHON%\\python.exe -m pytest -v gklearn/tests/" |
@@ -1,5 +1,6 @@ | |||||
# graphkit-learn | # graphkit-learn | ||||
[](https://travis-ci.org/jajupmochi/graphkit-learn) | [](https://travis-ci.org/jajupmochi/graphkit-learn) | ||||
[](https://ci.appveyor.com/project/jajupmochi/graphkit-learn) | |||||
[](https://codecov.io/gh/jajupmochi/graphkit-learn) | [](https://codecov.io/gh/jajupmochi/graphkit-learn) | ||||
[](https://graphkit-learn.readthedocs.io/en/master/?badge=master) | [](https://graphkit-learn.readthedocs.io/en/master/?badge=master) | ||||
[](https://badge.fury.io/py/graphkit-learn) | [](https://badge.fury.io/py/graphkit-learn) | ||||
@@ -1 +1,2 @@ | |||||
from gklearn.ged.env.common_types import AlgorithmState | |||||
from gklearn.ged.env.common_types import AlgorithmState | |||||
from gklearn.ged.env.node_map import NodeMap |
@@ -0,0 +1,80 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Wed Apr 22 11:31:26 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
class NodeMap(object): | |||||
def __init__(self, num_nodes_g, num_nodes_h): | |||||
self.__forward_map = [np.inf] * num_nodes_g | |||||
self.__backward_map = [np.inf] * num_nodes_h | |||||
self.__induced_cost = np.inf | |||||
def num_source_nodes(self): | |||||
return len(self.__forward_map) | |||||
def num_target_nodes(self): | |||||
return len(self.__backward_map) | |||||
def image(self, node): | |||||
if node < len(self.__forward_map): | |||||
return self.__forward_map[node] | |||||
else: | |||||
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||||
return np.inf | |||||
def pre_image(self, node): | |||||
if node < len(self.__backward_map): | |||||
return self.__backward_map[node] | |||||
else: | |||||
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | |||||
return np.inf | |||||
def get_forward_map(self): | |||||
return self.__forward_map | |||||
def get_backward_map(self): | |||||
return self.__backward_map | |||||
def as_relation(self, relation): | |||||
relation.clear() | |||||
for i in range(0, len(self.__forward_map)): | |||||
k = self.__forward_map[i] | |||||
if k != np.inf: | |||||
relation.append(tuple((i, k))) | |||||
for k in range(0, len(self.__backward_map)): | |||||
i = self.__backward_map[k] | |||||
if i == np.inf: | |||||
relation.append(tuple((i, k))) | |||||
def add_assignment(self, i, k): | |||||
if i != np.inf: | |||||
if i < len(self.__forward_map): | |||||
self.__forward_map[i] = k | |||||
else: | |||||
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | |||||
if k != np.inf: | |||||
if k < len(self.__backward_map): | |||||
self.__backward_map[k] = i | |||||
else: | |||||
raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') | |||||
def set_induced_cost(self, induced_cost): | |||||
self.__induced_cost = induced_cost | |||||
def induced_cost(self): | |||||
return self.__induced_cost |
@@ -7,11 +7,10 @@ Created on Mon Mar 16 17:26:40 2020 | |||||
""" | """ | ||||
def test_median_graph_estimator(): | def test_median_graph_estimator(): | ||||
from gklearn.utils.graphfiles import loadDataset | |||||
from gklearn.utils import load_dataset | |||||
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | ||||
from gklearn.gedlib import librariesImport, gedlibpy | from gklearn.gedlib import librariesImport, gedlibpy | ||||
from gklearn.preimage.utils import get_same_item_indices | from gklearn.preimage.utils import get_same_item_indices | ||||
from gklearn.preimage.ged import convertGraph | |||||
import multiprocessing | import multiprocessing | ||||
# estimator parameters. | # estimator parameters. | ||||
@@ -22,17 +21,17 @@ def test_median_graph_estimator(): | |||||
# algorithm parameters. | # algorithm parameters. | ||||
algo = 'IPFP' | algo = 'IPFP' | ||||
initial_solutions = 40 | |||||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' | |||||
initial_solutions = 1 | |||||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||||
edit_cost_name = 'LETTER2' | edit_cost_name = 'LETTER2' | ||||
edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | ||||
ds_name = 'COIL-DEL' | |||||
ds_name = 'Letter_high' | |||||
# Load dataset. | # Load dataset. | ||||
# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | # dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | ||||
dataset = '../../../datasets/Letter-high/Letter-high_A.txt' | dataset = '../../../datasets/Letter-high/Letter-high_A.txt' | ||||
Gn, y_all = loadDataset(dataset) | |||||
Gn, y_all, label_names = load_dataset(dataset) | |||||
y_idx = get_same_item_indices(y_all) | y_idx = get_same_item_indices(y_all) | ||||
for i, (y, values) in enumerate(y_idx.items()): | for i, (y, values) in enumerate(y_idx.items()): | ||||
Gn_i = [Gn[val] for val in values] | Gn_i = [Gn[val] for val in values] | ||||
@@ -43,7 +42,7 @@ def test_median_graph_estimator(): | |||||
# gedlibpy.restart_env() | # gedlibpy.restart_env() | ||||
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | ||||
for G in Gn_i: | for G in Gn_i: | ||||
ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '') | |||||
ged_env.add_nx_graph(G, '') | |||||
graph_ids = ged_env.get_all_graph_ids() | graph_ids = ged_env.get_all_graph_ids() | ||||
set_median_id = ged_env.add_graph('set_median') | set_median_id = ged_env.add_graph('set_median') | ||||
gen_median_id = ged_env.add_graph('gen_median') | gen_median_id = ged_env.add_graph('gen_median') | ||||
@@ -54,11 +53,89 @@ def test_median_graph_estimator(): | |||||
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | ||||
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | ||||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng()) | |||||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng()) | |||||
# Select the GED algorithm. | # Select the GED algorithm. | ||||
algo_options = '--threads ' + str(threads) + algo_options_suffix | algo_options = '--threads ' + str(threads) + algo_options_suffix | ||||
mge.set_options(mge_options) | mge.set_options(mge_options) | ||||
mge.set_label_names(node_labels=label_names['node_labels'], | |||||
edge_labels=label_names['edge_labels'], | |||||
node_attrs=label_names['node_attrs'], | |||||
edge_attrs=label_names['edge_attrs']) | |||||
mge.set_init_method(algo, algo_options) | |||||
mge.set_descent_method(algo, algo_options) | |||||
# Run the estimator. | |||||
mge.run(graph_ids, set_median_id, gen_median_id) | |||||
# Get SODs. | |||||
sod_sm = mge.get_sum_of_distances('initialized') | |||||
sod_gm = mge.get_sum_of_distances('converged') | |||||
print('sod_sm, sod_gm: ', sod_sm, sod_gm) | |||||
# Get median graphs. | |||||
set_median = ged_env.get_nx_graph(set_median_id) | |||||
gen_median = ged_env.get_nx_graph(gen_median_id) | |||||
return set_median, gen_median | |||||
def test_median_graph_estimator_symb(): | |||||
from gklearn.utils import load_dataset | |||||
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | |||||
from gklearn.gedlib import librariesImport, gedlibpy | |||||
from gklearn.preimage.utils import get_same_item_indices | |||||
import multiprocessing | |||||
# estimator parameters. | |||||
init_type = 'MEDOID' | |||||
num_inits = 1 | |||||
threads = multiprocessing.cpu_count() | |||||
time_limit = 60000 | |||||
# algorithm parameters. | |||||
algo = 'IPFP' | |||||
initial_solutions = 1 | |||||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||||
edit_cost_name = 'CONSTANT' | |||||
edit_cost_constants = [4, 4, 2, 1, 1, 1] | |||||
ds_name = 'MUTAG' | |||||
# Load dataset. | |||||
dataset = '../../../datasets/MUTAG/MUTAG_A.txt' | |||||
Gn, y_all, label_names = load_dataset(dataset) | |||||
y_idx = get_same_item_indices(y_all) | |||||
for i, (y, values) in enumerate(y_idx.items()): | |||||
Gn_i = [Gn[val] for val in values] | |||||
break | |||||
Gn_i = Gn_i[0:10] | |||||
# Set up the environment. | |||||
ged_env = gedlibpy.GEDEnv() | |||||
# gedlibpy.restart_env() | |||||
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||||
for G in Gn_i: | |||||
ged_env.add_nx_graph(G, '') | |||||
graph_ids = ged_env.get_all_graph_ids() | |||||
set_median_id = ged_env.add_graph('set_median') | |||||
gen_median_id = ged_env.add_graph('gen_median') | |||||
ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES') | |||||
# Set up the estimator. | |||||
mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name)) | |||||
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||||
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng()) | |||||
# Select the GED algorithm. | |||||
algo_options = '--threads ' + str(threads) + algo_options_suffix | |||||
mge.set_options(mge_options) | |||||
mge.set_label_names(node_labels=label_names['node_labels'], | |||||
edge_labels=label_names['edge_labels'], | |||||
node_attrs=label_names['node_attrs'], | |||||
edge_attrs=label_names['edge_attrs']) | |||||
mge.set_init_method(algo, algo_options) | mge.set_init_method(algo, algo_options) | ||||
mge.set_descent_method(algo, algo_options) | mge.set_descent_method(algo, algo_options) | ||||
@@ -78,4 +155,5 @@ def test_median_graph_estimator(): | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
set_median, gen_median = test_median_graph_estimator() | |||||
set_median, gen_median = test_median_graph_estimator() | |||||
# set_median, gen_median = test_median_graph_estimator_symb() |
@@ -30,6 +30,8 @@ def mge_options_to_string(options): | |||||
opt_str += '--randomness ' + str(val) + ' ' | opt_str += '--randomness ' + str(val) + ' ' | ||||
elif key == 'verbose': | elif key == 'verbose': | ||||
opt_str += '--stdout ' + str(val) + ' ' | opt_str += '--stdout ' + str(val) + ' ' | ||||
elif key == 'update_order': | |||||
opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' ' | |||||
elif key == 'refine': | elif key == 'refine': | ||||
opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | ||||
elif key == 'time_limit': | elif key == 'time_limit': | ||||
@@ -35,8 +35,8 @@ from libcpp.pair cimport pair | |||||
from libcpp.list cimport list | from libcpp.list cimport list | ||||
#Long unsigned int equivalent | #Long unsigned int equivalent | ||||
cimport numpy as np | |||||
ctypedef np.npy_uint32 UINT32_t | |||||
cimport numpy as cnp | |||||
ctypedef cnp.npy_uint32 UINT32_t | |||||
from cpython cimport array | from cpython cimport array | ||||
@@ -76,14 +76,14 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||||
void runMethod(size_t g, size_t h) except + | void runMethod(size_t g, size_t h) except + | ||||
double getUpperBound(size_t g, size_t h) except + | double getUpperBound(size_t g, size_t h) except + | ||||
double getLowerBound(size_t g, size_t h) except + | double getLowerBound(size_t g, size_t h) except + | ||||
vector[np.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||||
vector[np.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||||
vector[cnp.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||||
vector[cnp.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||||
size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + | size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + | ||||
size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + | size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + | ||||
double getInducedCost(size_t g, size_t h) except + | double getInducedCost(size_t g, size_t h) except + | ||||
vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + | vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + | ||||
vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + | vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + | ||||
vector[vector[np.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||||
vector[vector[cnp.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||||
double getRuntime(size_t g, size_t h) except + | double getRuntime(size_t g, size_t h) except + | ||||
bool quasimetricCosts() except + | bool quasimetricCosts() except + | ||||
vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + | vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + | ||||
@@ -105,14 +105,16 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||||
map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + | map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + | ||||
string getInitType() except + | string getInitType() except + | ||||
# double getNodeCost(size_t label1, size_t label2) except + | # double getNodeCost(size_t label1, size_t label2) except + | ||||
void computeInducedCost(size_t g_id, size_t h_id) except + | |||||
double computeInducedCost(size_t g_id, size_t h_id, vector[pair[size_t,size_t]]) except + | |||||
############################# | ############################# | ||||
##CYTHON WRAPPER INTERFACES## | ##CYTHON WRAPPER INTERFACES## | ||||
############################# | ############################# | ||||
import numpy as np | |||||
import networkx as nx | import networkx as nx | ||||
from gklearn.ged.env import NodeMap | |||||
# import librariesImport | # import librariesImport | ||||
from ctypes import * | from ctypes import * | ||||
@@ -726,13 +728,30 @@ cdef class GEDEnv: | |||||
:type g: size_t | :type g: size_t | ||||
:type h: size_t | :type h: size_t | ||||
:return: The Node Map between the two selected graph. | :return: The Node Map between the two selected graph. | ||||
:rtype: list[tuple(size_t, size_t)] | |||||
:rtype: gklearn.ged.env.NodeMap. | |||||
.. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() | .. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() | ||||
.. warning:: run_method() between the same two graph must be called before this function. | .. warning:: run_method() between the same two graph must be called before this function. | ||||
.. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. | .. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. | ||||
""" | """ | ||||
return self.c_env.getNodeMap(g, h) | |||||
map_as_relation = self.c_env.getNodeMap(g, h) | |||||
induced_cost = self.c_env.getInducedCost(g, h) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary. | |||||
source_map = [item.first if item.first < len(map_as_relation) else np.inf for item in map_as_relation] # item.first < len(map_as_relation) is not exactly correct. | |||||
# print(source_map) | |||||
target_map = [item.second if item.second < len(map_as_relation) else np.inf for item in map_as_relation] | |||||
# print(target_map) | |||||
num_node_source = len([item for item in source_map if item != np.inf]) | |||||
# print(num_node_source) | |||||
num_node_target = len([item for item in target_map if item != np.inf]) | |||||
# print(num_node_target) | |||||
node_map = NodeMap(num_node_source, num_node_target) | |||||
# print(node_map.get_forward_map(), node_map.get_backward_map()) | |||||
for i in range(len(source_map)): | |||||
node_map.add_assignment(source_map[i], target_map[i]) | |||||
node_map.set_induced_cost(induced_cost) | |||||
return node_map | |||||
def get_assignment_matrix(self, g, h) : | def get_assignment_matrix(self, g, h) : | ||||
@@ -1320,7 +1339,7 @@ cdef class GEDEnv: | |||||
return graph_id | return graph_id | ||||
def compute_induced_cost(self, g_id, h_id): | |||||
def compute_induced_cost(self, g_id, h_id, node_map): | |||||
""" | """ | ||||
Computes the edit cost between two graphs induced by a node map. | Computes the edit cost between two graphs induced by a node map. | ||||
@@ -1330,19 +1349,25 @@ cdef class GEDEnv: | |||||
ID of input graph. | ID of input graph. | ||||
h_id : int | h_id : int | ||||
ID of input graph. | ID of input graph. | ||||
node_map: gklearn.ged.env.NodeMap. | |||||
The NodeMap instance whose reduced cost will be computed and re-assigned. | |||||
Returns | Returns | ||||
------- | ------- | ||||
None. | |||||
Notes | |||||
----- | |||||
The induced edit cost of the node map between `g_id` and `h_id` is implictly computed and stored in `GEDEnv::node_maps_`. | |||||
""" | |||||
cost = 0.0 | |||||
self.c_env.computeInducedCost(g_id, h_id) | |||||
None. | |||||
""" | |||||
relation = [] | |||||
node_map.as_relation(relation) | |||||
# print(relation) | |||||
dummy_node = get_dummy_node() | |||||
# print(dummy_node) | |||||
for i, val in enumerate(relation): | |||||
val1 = dummy_node if val[0] == np.inf else val[0] | |||||
val2 = dummy_node if val[1] == np.inf else val[1] | |||||
relation[i] = tuple((val1, val2)) | |||||
# print(relation) | |||||
induced_cost = self.c_env.computeInducedCost(g_id, h_id, relation) | |||||
node_map.set_induced_cost(induced_cost) | |||||
##################################################################### | ##################################################################### | ||||
@@ -475,8 +475,9 @@ public: | |||||
* @brief Computes the edit cost between two graphs induced by a node map. | * @brief Computes the edit cost between two graphs induced by a node map. | ||||
* @param[in] g_id ID of input graph. | * @param[in] g_id ID of input graph. | ||||
* @param[in] h_id ID of input graph. | * @param[in] h_id ID of input graph. | ||||
* @return Computed induced cost. | |||||
*/ | */ | ||||
void computeInducedCost(std::size_t g_id, std::size_t h_id) const; | |||||
double computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const; | |||||
// /*! | // /*! | ||||
// * @brief Returns node relabeling, insertion, or deletion cost. | // * @brief Returns node relabeling, insertion, or deletion cost. | ||||
@@ -492,7 +493,7 @@ public: | |||||
private: | private: | ||||
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> env; // environment variable | |||||
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> * env_; // environment variable | |||||
bool initialized; // initialization boolean (because env has one but not accessible) | bool initialized; // initialization boolean (because env has one but not accessible) | ||||
@@ -277,11 +277,16 @@ std::string toStringVectorInt(std::vector<unsigned long int> vector) { | |||||
PyGEDEnv::PyGEDEnv () { | PyGEDEnv::PyGEDEnv () { | ||||
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
this->initialized = false; | this->initialized = false; | ||||
} | } | ||||
PyGEDEnv::~PyGEDEnv () {} | |||||
PyGEDEnv::~PyGEDEnv () { | |||||
if (env_ != NULL) { | |||||
delete env_; | |||||
env_ = NULL; | |||||
} | |||||
} | |||||
// bool initialized = false; //Initialization boolean (because Env has one but not accessible). | // bool initialized = false; //Initialization boolean (because Env has one but not accessible). | ||||
@@ -290,64 +295,68 @@ bool PyGEDEnv::isInitialized() { | |||||
} | } | ||||
void PyGEDEnv::restartEnv() { | void PyGEDEnv::restartEnv() { | ||||
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
if (env_ != NULL) { | |||||
delete env_; | |||||
env_ = NULL; | |||||
} | |||||
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||||
initialized = false; | initialized = false; | ||||
} | } | ||||
void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { | void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { | ||||
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(this->env.load_gxl_graph(pathFolder, pathXML, | |||||
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(env_->load_gxl_graph(pathFolder, pathXML, | |||||
(node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | (node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | ||||
(edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | (edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | ||||
std::unordered_set<std::string>(), std::unordered_set<std::string>())); | std::unordered_set<std::string>(), std::unordered_set<std::string>())); | ||||
} | } | ||||
std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { | std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { | ||||
return this->env.graph_ids(); | |||||
return env_->graph_ids(); | |||||
} | } | ||||
std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { | std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { | ||||
std::vector<std::size_t> listID; | std::vector<std::size_t> listID; | ||||
for (std::size_t i = this->env.graph_ids().first; i != this->env.graph_ids().second; i++) { | |||||
for (std::size_t i = env_->graph_ids().first; i != env_->graph_ids().second; i++) { | |||||
listID.push_back(i); | listID.push_back(i); | ||||
} | } | ||||
return listID; | return listID; | ||||
} | } | ||||
const std::string PyGEDEnv::getGraphClass(std::size_t id) const { | const std::string PyGEDEnv::getGraphClass(std::size_t id) const { | ||||
return this->env.get_graph_class(id); | |||||
return env_->get_graph_class(id); | |||||
} | } | ||||
const std::string PyGEDEnv::getGraphName(std::size_t id) const { | const std::string PyGEDEnv::getGraphName(std::size_t id) const { | ||||
return this->env.get_graph_name(id); | |||||
return env_->get_graph_name(id); | |||||
} | } | ||||
std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { | std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { | ||||
ged::GEDGraph::GraphID newId = this->env.add_graph(graph_name, graph_class); | |||||
ged::GEDGraph::GraphID newId = env_->add_graph(graph_name, graph_class); | |||||
initialized = false; | initialized = false; | ||||
return std::stoi(std::to_string(newId)); | return std::stoi(std::to_string(newId)); | ||||
} | } | ||||
void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { | void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { | ||||
this->env.add_node(graphId, nodeId, nodeLabel); | |||||
env_->add_node(graphId, nodeId, nodeLabel); | |||||
initialized = false; | initialized = false; | ||||
} | } | ||||
/*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { | /*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { | ||||
this->env.add_edge(graphId, tail, head, edgeLabel); | |||||
env_->add_edge(graphId, tail, head, edgeLabel); | |||||
}*/ | }*/ | ||||
void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { | void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { | ||||
this->env.add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||||
env_->add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||||
initialized = false; | initialized = false; | ||||
} | } | ||||
void PyGEDEnv::clearGraph(std::size_t graphId) { | void PyGEDEnv::clearGraph(std::size_t graphId) { | ||||
this->env.clear_graph(graphId); | |||||
env_->clear_graph(graphId); | |||||
initialized = false; | initialized = false; | ||||
} | } | ||||
ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { | ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { | ||||
return this->env.get_graph(graphId); | |||||
return env_->get_graph(graphId); | |||||
} | } | ||||
std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { | std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { | ||||
@@ -379,71 +388,71 @@ std::vector<std::vector<std::size_t>> PyGEDEnv::getGraphAdjacenceMatrix(std::siz | |||||
} | } | ||||
void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { | void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { | ||||
this->env.set_edit_costs(translateEditCost(editCost), editCostConstants); | |||||
env_->set_edit_costs(translateEditCost(editCost), editCostConstants); | |||||
} | } | ||||
void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { | void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { | ||||
//this->env.set_edit_costs(Your EditCost Class(editCostConstants)); | |||||
//env_->set_edit_costs(Your EditCost Class(editCostConstants)); | |||||
} | } | ||||
// void PyGEDEnv::initEnv() { | // void PyGEDEnv::initEnv() { | ||||
// this->env.init(); | |||||
// env_->init(); | |||||
// initialized = true; | // initialized = true; | ||||
// } | // } | ||||
void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { | void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { | ||||
this->env.init(translateInitOptions(initOption), print_to_stdout); | |||||
env_->init(translateInitOptions(initOption), print_to_stdout); | |||||
initialized = true; | initialized = true; | ||||
} | } | ||||
void PyGEDEnv::setMethod(std::string method, const std::string & options) { | void PyGEDEnv::setMethod(std::string method, const std::string & options) { | ||||
this->env.set_method(translateMethod(method), options); | |||||
env_->set_method(translateMethod(method), options); | |||||
} | } | ||||
void PyGEDEnv::initMethod() { | void PyGEDEnv::initMethod() { | ||||
this->env.init_method(); | |||||
env_->init_method(); | |||||
} | } | ||||
double PyGEDEnv::getInitime() const { | double PyGEDEnv::getInitime() const { | ||||
return this->env.get_init_time(); | |||||
return env_->get_init_time(); | |||||
} | } | ||||
void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { | void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { | ||||
this->env.run_method(g, h); | |||||
env_->run_method(g, h); | |||||
} | } | ||||
double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { | double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { | ||||
return this->env.get_upper_bound(g, h); | |||||
return env_->get_upper_bound(g, h); | |||||
} | } | ||||
double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { | double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { | ||||
return this->env.get_lower_bound(g, h); | |||||
return env_->get_lower_bound(g, h); | |||||
} | } | ||||
std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { | std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { | ||||
return this->env.get_node_map(g, h).get_forward_map(); | |||||
return env_->get_node_map(g, h).get_forward_map(); | |||||
} | } | ||||
std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { | std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { | ||||
return this->env.get_node_map(g, h).get_backward_map(); | |||||
return env_->get_node_map(g, h).get_backward_map(); | |||||
} | } | ||||
std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | ||||
return this->env.get_node_map(g, h).image(nodeId); | |||||
return env_->get_node_map(g, h).image(nodeId); | |||||
} | } | ||||
std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | ||||
return this->env.get_node_map(g, h).pre_image(nodeId); | |||||
return env_->get_node_map(g, h).pre_image(nodeId); | |||||
} | } | ||||
double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { | double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { | ||||
return this->env.get_node_map(g, h).induced_cost(); | |||||
return env_->get_node_map(g, h).induced_cost(); | |||||
} | } | ||||
std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { | std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { | ||||
std::vector<pair<std::size_t, std::size_t>> res; | std::vector<pair<std::size_t, std::size_t>> res; | ||||
std::vector<ged::NodeMap::Assignment> relation; | std::vector<ged::NodeMap::Assignment> relation; | ||||
this->env.get_node_map(g, h).as_relation(relation); | |||||
env_->get_node_map(g, h).as_relation(relation); | |||||
for (const auto & assignment : relation) { | for (const auto & assignment : relation) { | ||||
res.push_back(std::make_pair(assignment.first, assignment.second)); | res.push_back(std::make_pair(assignment.first, assignment.second)); | ||||
} | } | ||||
@@ -493,11 +502,11 @@ std::vector<std::vector<unsigned long int>> PyGEDEnv::getAllMap(std::size_t g, s | |||||
} | } | ||||
double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { | double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { | ||||
return this->env.get_runtime(g, h); | |||||
return env_->get_runtime(g, h); | |||||
} | } | ||||
bool PyGEDEnv::quasimetricCosts() const { | bool PyGEDEnv::quasimetricCosts() const { | ||||
return this->env.quasimetric_costs(); | |||||
return env_->quasimetric_costs(); | |||||
} | } | ||||
std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { | std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { | ||||
@@ -542,73 +551,99 @@ std::vector<std::vector<double>> PyGEDEnv::hungarianLSAPE(std::vector<std::vecto | |||||
} | } | ||||
std::size_t PyGEDEnv::getNumNodeLabels() const { | std::size_t PyGEDEnv::getNumNodeLabels() const { | ||||
return this->env.num_node_labels(); | |||||
return env_->num_node_labels(); | |||||
} | } | ||||
std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { | std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { | ||||
return this->env.get_node_label(label_id); | |||||
return env_->get_node_label(label_id); | |||||
} | } | ||||
std::size_t PyGEDEnv::getNumEdgeLabels() const { | std::size_t PyGEDEnv::getNumEdgeLabels() const { | ||||
return this->env.num_edge_labels(); | |||||
return env_->num_edge_labels(); | |||||
} | } | ||||
std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { | std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { | ||||
return this->env.get_edge_label(label_id); | |||||
return env_->get_edge_label(label_id); | |||||
} | } | ||||
// std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { | // std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { | ||||
// return this->env.get_num_nodes(graph_id); | |||||
// return env_->get_num_nodes(graph_id); | |||||
// } | // } | ||||
double PyGEDEnv::getAvgNumNodes() const { | double PyGEDEnv::getAvgNumNodes() const { | ||||
return this->env.get_avg_num_nodes(); | |||||
return env_->get_avg_num_nodes(); | |||||
} | } | ||||
double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { | double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { | ||||
return this->env.node_rel_cost(node_label_1, node_label_2); | |||||
return env_->node_rel_cost(node_label_1, node_label_2); | |||||
} | } | ||||
double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { | double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { | ||||
return this->env.node_del_cost(node_label); | |||||
return env_->node_del_cost(node_label); | |||||
} | } | ||||
double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { | double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { | ||||
return this->env.node_ins_cost(node_label); | |||||
return env_->node_ins_cost(node_label); | |||||
} | } | ||||
std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { | std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { | ||||
return this->env.median_node_label(node_labels); | |||||
return env_->median_node_label(node_labels); | |||||
} | } | ||||
double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { | double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { | ||||
return this->env.edge_rel_cost(edge_label_1, edge_label_2); | |||||
return env_->edge_rel_cost(edge_label_1, edge_label_2); | |||||
} | } | ||||
double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { | double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { | ||||
return this->env.edge_del_cost(edge_label); | |||||
return env_->edge_del_cost(edge_label); | |||||
} | } | ||||
double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { | double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { | ||||
return this->env.edge_ins_cost(edge_label); | |||||
return env_->edge_ins_cost(edge_label); | |||||
} | } | ||||
std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { | std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { | ||||
return this->env.median_edge_label(edge_labels); | |||||
return env_->median_edge_label(edge_labels); | |||||
} | } | ||||
std::string PyGEDEnv::getInitType() const { | std::string PyGEDEnv::getInitType() const { | ||||
return initOptionsToString(this->env.get_init_type()); | |||||
return initOptionsToString(env_->get_init_type()); | |||||
} | } | ||||
void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||||
ged::NodeMap node_map = this->env.get_node_map(g_id, h_id); | |||||
this->env.compute_induced_cost(g_id, h_id, node_map); | |||||
double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const { | |||||
ged::NodeMap node_map = ged::NodeMap(env_->get_num_nodes(g_id), env_->get_num_nodes(h_id)); | |||||
for (const auto & assignment : relation) { | |||||
node_map.add_assignment(assignment.first, assignment.second); | |||||
// std::cout << assignment.first << assignment.second << endl; | |||||
} | |||||
const std::vector<ged::GEDGraph::NodeID> forward_map = node_map.get_forward_map(); | |||||
for (std::size_t i{0}; i < node_map.num_source_nodes(); i++) { | |||||
if (forward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||||
node_map.add_assignment(i, ged::GEDGraph::dummy_node()); | |||||
} | |||||
} | |||||
const std::vector<ged::GEDGraph::NodeID> backward_map = node_map.get_backward_map(); | |||||
for (std::size_t i{0}; i < node_map.num_target_nodes(); i++) { | |||||
if (backward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||||
node_map.add_assignment(ged::GEDGraph::dummy_node(), i); | |||||
} | |||||
} | |||||
// for (auto & map : node_map.get_forward_map()) { | |||||
// std::cout << map << ", "; | |||||
// } | |||||
// std::cout << endl; | |||||
// for (auto & map : node_map.get_backward_map()) { | |||||
// std::cout << map << ", "; | |||||
// } | |||||
env_->compute_induced_cost(g_id, h_id, node_map); | |||||
return node_map.induced_cost(); | |||||
} | } | ||||
// double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { | // double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { | ||||
// return this->env.ged_data_node_cost(label1, label2); | |||||
// return env_->ged_data_node_cost(label1, label2); | |||||
// } | // } | ||||
@@ -630,7 +665,7 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||||
/*loadGXLGraph(pathFolder, pathXML); | /*loadGXLGraph(pathFolder, pathXML); | ||||
std::vector<std::size_t> graph_ids = getAllGraphIds(); | std::vector<std::size_t> graph_ids = getAllGraphIds(); | ||||
std::size_t median_id = this->env.add_graph("median", ""); | |||||
std::size_t median_id = env_->add_graph("median", ""); | |||||
initEnv(initOption); | initEnv(initOption); | ||||
@@ -640,10 +675,10 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||||
median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); | median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); | ||||
median_estimator.run(graph_ids, median_id); | median_estimator.run(graph_ids, median_id); | ||||
std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); | std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); | ||||
this->env.save_as_gxl_graph(median_id, gxl_file_name);*/ | |||||
env_->save_as_gxl_graph(median_id, gxl_file_name);*/ | |||||
/*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); | /*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); | ||||
save_letter_graph_as_tikz_file(this->env.get_graph(median_id), tikz_file_name);*/ | |||||
save_letter_graph_as_tikz_file(env_->get_graph(median_id), tikz_file_name);*/ | |||||
//} | //} | ||||
} | } | ||||
@@ -12,4 +12,4 @@ from gklearn.kernels.structural_sp import StructuralSP | |||||
from gklearn.kernels.shortest_path import ShortestPath | from gklearn.kernels.shortest_path import ShortestPath | ||||
from gklearn.kernels.path_up_to_h import PathUpToH | from gklearn.kernels.path_up_to_h import PathUpToH | ||||
from gklearn.kernels.treelet import Treelet | from gklearn.kernels.treelet import Treelet | ||||
from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman | |||||
from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree |
@@ -18,6 +18,7 @@ import numpy as np | |||||
import networkx as nx | import networkx as nx | ||||
from collections import Counter | from collections import Counter | ||||
from functools import partial | from functools import partial | ||||
from gklearn.utils import SpecialLabel | |||||
from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
from gklearn.utils import Trie | from gklearn.utils import Trie | ||||
@@ -582,11 +583,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
def __add_dummy_labels(self, Gn): | def __add_dummy_labels(self, Gn): | ||||
if self.__k_func is not None: | if self.__k_func is not None: | ||||
if len(self.__node_labels) == 0: | |||||
for G in Gn: | |||||
nx.set_node_attributes(G, '0', 'dummy') | |||||
self.__node_labels.append('dummy') | |||||
if len(self.__edge_labels) == 0: | |||||
for G in Gn: | |||||
nx.set_edge_attributes(G, '0', 'dummy') | |||||
self.__edge_labels.append('dummy') | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | |||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | |||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -18,6 +18,7 @@ import numpy as np | |||||
import networkx as nx | import networkx as nx | ||||
from collections import Counter | from collections import Counter | ||||
from itertools import chain | from itertools import chain | ||||
from gklearn.utils import SpecialLabel | |||||
from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | ||||
from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
@@ -495,11 +496,11 @@ class Treelet(GraphKernel): | |||||
def __add_dummy_labels(self, Gn): | def __add_dummy_labels(self, Gn): | ||||
if len(self.__node_labels) == 0: | |||||
for G in Gn: | |||||
nx.set_node_attributes(G, '0', 'dummy') | |||||
self.__node_labels.append('dummy') | |||||
if len(self.__edge_labels) == 0: | |||||
for G in Gn: | |||||
nx.set_edge_attributes(G, '0', 'dummy') | |||||
self.__edge_labels.append('dummy') | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | |||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | |||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -16,6 +16,7 @@ import numpy as np | |||||
import networkx as nx | import networkx as nx | ||||
from collections import Counter | from collections import Counter | ||||
from functools import partial | from functools import partial | ||||
from gklearn.utils import SpecialLabel | |||||
from gklearn.utils.parallel import parallel_gm | from gklearn.utils.parallel import parallel_gm | ||||
from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
@@ -32,6 +33,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
if self._verbose >= 2: | |||||
import warnings | |||||
warnings.warn('A part of the computation is parallelized.') | |||||
self.__add_dummy_node_labels(self._graphs) | self.__add_dummy_node_labels(self._graphs) | ||||
# for WL subtree kernel | # for WL subtree kernel | ||||
@@ -55,11 +60,16 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
import warnings | |||||
warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
return self._compute_gm_series() | return self._compute_gm_series() | ||||
def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | ||||
if self._verbose >= 2: | |||||
import warnings | |||||
warnings.warn('A part of the computation is parallelized.') | |||||
self.__add_dummy_node_labels(g_list + [g1]) | self.__add_dummy_node_labels(g_list + [g1]) | ||||
# for WL subtree kernel | # for WL subtree kernel | ||||
@@ -83,8 +93,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
return self._compute_gm_imap_unordered() | |||||
import warnings | |||||
warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||||
return self._compute_kernel_list_series(g1, g_list) | |||||
def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
@@ -459,7 +470,14 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def __add_dummy_node_labels(self, Gn): | def __add_dummy_node_labels(self, Gn): | ||||
if len(self.__node_labels) == 0: | |||||
for G in Gn: | |||||
nx.set_node_attributes(G, '0', 'dummy') | |||||
self.__node_labels.append('dummy') | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | |||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
class WLSubtree(WeisfeilerLehman): | |||||
def __init__(self, **kwargs): | |||||
kwargs['base_kernel'] = 'subtree' | |||||
super().__init__(**kwargs) |
@@ -18,6 +18,7 @@ from gklearn.ged.median import MedianGraphEstimator | |||||
from gklearn.ged.median import constant_node_costs,mge_options_to_string | from gklearn.ged.median import constant_node_costs,mge_options_to_string | ||||
from gklearn.gedlib import librariesImport, gedlibpy | from gklearn.gedlib import librariesImport, gedlibpy | ||||
from gklearn.utils import Timer | from gklearn.utils import Timer | ||||
from gklearn.utils.utils import get_graph_kernel_by_name | |||||
# from gklearn.utils.dataset import Dataset | # from gklearn.utils.dataset import Dataset | ||||
class MedianPreimageGenerator(PreimageGenerator): | class MedianPreimageGenerator(PreimageGenerator): | ||||
@@ -81,7 +82,13 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
def run(self): | def run(self): | ||||
self.__set_graph_kernel_by_name() | |||||
self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], | |||||
node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | |||||
node_attrs=self._dataset.node_attrs, | |||||
edge_attrs=self._dataset.edge_attrs, | |||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
kernel_options=self._kernel_options) | |||||
# record start time. | # record start time. | ||||
start = time.time() | start = time.time() | ||||
@@ -180,6 +187,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
results['itrs'] = self.__itrs | results['itrs'] = self.__itrs | ||||
results['converged'] = self.__converged | results['converged'] = self.__converged | ||||
results['num_updates_ecc'] = self.__num_updates_ecc | results['num_updates_ecc'] = self.__num_updates_ecc | ||||
results['mge'] = {} | |||||
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() | |||||
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() | |||||
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() | |||||
return results | return results | ||||
@@ -653,27 +664,27 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
ged_env.init(init_option=self.__ged_options['init_option']) | ged_env.init(init_option=self.__ged_options['init_option']) | ||||
# Set up the madian graph estimator. | # Set up the madian graph estimator. | ||||
mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||||
mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||||
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
options = self.__mge_options.copy() | options = self.__mge_options.copy() | ||||
if not 'seed' in options: | if not 'seed' in options: | ||||
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | ||||
# Select the GED algorithm. | # Select the GED algorithm. | ||||
mge.set_options(mge_options_to_string(options)) | |||||
mge.set_label_names(node_labels=self._dataset.node_labels, | |||||
self.__mge.set_options(mge_options_to_string(options)) | |||||
self.__mge.set_label_names(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | edge_labels=self._dataset.edge_labels, | ||||
node_attrs=self._dataset.node_attrs, | node_attrs=self._dataset.node_attrs, | ||||
edge_attrs=self._dataset.edge_attrs) | edge_attrs=self._dataset.edge_attrs) | ||||
mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
# Run the estimator. | # Run the estimator. | ||||
mge.run(graph_ids, set_median_id, gen_median_id) | |||||
self.__mge.run(graph_ids, set_median_id, gen_median_id) | |||||
# Get SODs. | # Get SODs. | ||||
self.__sod_set_median = mge.get_sum_of_distances('initialized') | |||||
self.__sod_gen_median = mge.get_sum_of_distances('converged') | |||||
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') | |||||
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') | |||||
# Get median graphs. | # Get median graphs. | ||||
self.__set_median = ged_env.get_nx_graph(set_median_id) | self.__set_median = ged_env.get_nx_graph(set_median_id) | ||||
@@ -722,43 +733,6 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | ||||
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | ||||
print('distance in kernel space for each graph in median set:', k_dis_median_set) | print('distance in kernel space for each graph in median set:', k_dis_median_set) | ||||
def __set_graph_kernel_by_name(self): | |||||
if self._kernel_options['name'] == 'ShortestPath': | |||||
from gklearn.kernels import ShortestPath | |||||
self._graph_kernel = ShortestPath(node_labels=self._dataset.node_labels, | |||||
node_attrs=self._dataset.node_attrs, | |||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
**self._kernel_options) | |||||
elif self._kernel_options['name'] == 'StructuralSP': | |||||
from gklearn.kernels import StructuralSP | |||||
self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | |||||
node_attrs=self._dataset.node_attrs, | |||||
edge_attrs=self._dataset.edge_attrs, | |||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
**self._kernel_options) | |||||
elif self._kernel_options['name'] == 'PathUpToH': | |||||
from gklearn.kernels import PathUpToH | |||||
self._graph_kernel = PathUpToH(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | |||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
**self._kernel_options) | |||||
elif self._kernel_options['name'] == 'Treelet': | |||||
from gklearn.kernels import Treelet | |||||
self._graph_kernel = Treelet(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | |||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
**self._kernel_options) | |||||
elif self._kernel_options['name'] == 'WeisfeilerLehman': | |||||
from gklearn.kernels import WeisfeilerLehman | |||||
self._graph_kernel = WeisfeilerLehman(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | |||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
**self._kernel_options) | |||||
else: | |||||
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WeisfeilerLehman".') | |||||
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | ||||
@@ -25,7 +25,7 @@ import networkx as nx | |||||
import os | import os | ||||
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): | |||||
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||||
import os.path | import os.path | ||||
from gklearn.preimage import MedianPreimageGenerator | from gklearn.preimage import MedianPreimageGenerator | ||||
from gklearn.utils import split_dataset_by_target | from gklearn.utils import split_dataset_by_target | ||||
@@ -38,7 +38,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
dataset_all.trim_dataset(edge_required=edge_required) | dataset_all.trim_dataset(edge_required=edge_required) | ||||
if irrelevant_labels is not None: | if irrelevant_labels is not None: | ||||
dataset_all.remove_labels(**irrelevant_labels) | dataset_all.remove_labels(**irrelevant_labels) | ||||
# dataset_all.cut_graphs(range(0, 10)) | |||||
if cut_range is not None: | |||||
dataset_all.cut_graphs(cut_range) | |||||
datasets = split_dataset_by_target(dataset_all) | datasets = split_dataset_by_target(dataset_all) | ||||
if save_results: | if save_results: | ||||
@@ -57,6 +58,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
itrs_list = [] | itrs_list = [] | ||||
converged_list = [] | converged_list = [] | ||||
num_updates_ecc_list = [] | num_updates_ecc_list = [] | ||||
mge_decrease_order_list = [] | |||||
mge_increase_order_list = [] | |||||
mge_converged_order_list = [] | |||||
nb_sod_sm2gm = [0, 0, 0] | nb_sod_sm2gm = [0, 0, 0] | ||||
nb_dis_k_sm2gm = [0, 0, 0] | nb_dis_k_sm2gm = [0, 0, 0] | ||||
nb_dis_k_gi2sm = [0, 0, 0] | nb_dis_k_gi2sm = [0, 0, 0] | ||||
@@ -148,7 +152,10 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
results['runtime_precompute_gm'], results['runtime_optimize_ec'], | results['runtime_precompute_gm'], results['runtime_optimize_ec'], | ||||
results['runtime_generate_preimage'], results['runtime_total'], | results['runtime_generate_preimage'], results['runtime_total'], | ||||
results['itrs'], results['converged'], | results['itrs'], results['converged'], | ||||
results['num_updates_ecc']]) | |||||
results['num_updates_ecc'], | |||||
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||||
results['mge']['num_increase_order'] > 0, | |||||
results['mge']['num_converged_descents'] > 0]) | |||||
f_detail.close() | f_detail.close() | ||||
# compute result summary. | # compute result summary. | ||||
@@ -164,6 +171,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
itrs_list.append(results['itrs']) | itrs_list.append(results['itrs']) | ||||
converged_list.append(results['converged']) | converged_list.append(results['converged']) | ||||
num_updates_ecc_list.append(results['num_updates_ecc']) | num_updates_ecc_list.append(results['num_updates_ecc']) | ||||
mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0) | |||||
mge_increase_order_list.append(results['mge']['num_increase_order'] > 0) | |||||
mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0) | |||||
# # SOD SM -> GM | # # SOD SM -> GM | ||||
if results['sod_set_median'] > results['sod_gen_median']: | if results['sod_set_median'] > results['sod_gen_median']: | ||||
nb_sod_sm2gm[0] += 1 | nb_sod_sm2gm[0] += 1 | ||||
@@ -210,7 +220,11 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
results['runtime_precompute_gm'], results['runtime_optimize_ec'], | results['runtime_precompute_gm'], results['runtime_optimize_ec'], | ||||
results['runtime_generate_preimage'], results['runtime_total'], | results['runtime_generate_preimage'], results['runtime_total'], | ||||
results['itrs'], results['converged'], | results['itrs'], results['converged'], | ||||
results['num_updates_ecc'], nb_sod_sm2gm, | |||||
results['num_updates_ecc'], | |||||
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||||
results['mge']['num_increase_order'] > 0, | |||||
results['mge']['num_converged_descents'] > 0, | |||||
nb_sod_sm2gm, | |||||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | ||||
f_summary.close() | f_summary.close() | ||||
@@ -256,6 +270,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
itrs_mean = np.mean(itrs_list) | itrs_mean = np.mean(itrs_list) | ||||
num_converged = np.sum(converged_list) | num_converged = np.sum(converged_list) | ||||
num_updates_ecc_mean = np.mean(num_updates_ecc_list) | num_updates_ecc_mean = np.mean(num_updates_ecc_list) | ||||
num_mge_decrease_order = np.sum(mge_decrease_order_list) | |||||
num_mge_increase_order = np.sum(mge_increase_order_list) | |||||
num_mge_converged = np.sum(mge_converged_order_list) | |||||
sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) | sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) | ||||
dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | ||||
dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | ||||
@@ -270,7 +287,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
dis_k_gi2sm_mean, dis_k_gi2gm_mean, | dis_k_gi2sm_mean, dis_k_gi2gm_mean, | ||||
time_precompute_gm_mean, time_optimize_ec_mean, | time_precompute_gm_mean, time_optimize_ec_mean, | ||||
time_generate_mean, time_total_mean, itrs_mean, | time_generate_mean, time_total_mean, itrs_mean, | ||||
num_converged, num_updates_ecc_mean]) | |||||
num_converged, num_updates_ecc_mean, | |||||
num_mge_decrease_order, num_mge_increase_order, | |||||
num_mge_converged]) | |||||
f_summary.close() | f_summary.close() | ||||
# save total pairwise kernel distances. | # save total pairwise kernel distances. | ||||
@@ -300,7 +319,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | ||||
'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | ||||
'time optimize ec', 'time generate preimage', 'time total', | 'time optimize ec', 'time generate preimage', 'time total', | ||||
'itrs', 'converged', 'num updates ecc']) | |||||
'itrs', 'converged', 'num updates ecc', 'mge decrease order', | |||||
'mge increase order', 'mge converged']) | |||||
f_detail.close() | f_detail.close() | ||||
# fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | # fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | ||||
@@ -312,7 +332,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | ||||
'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | ||||
'time generate preimage', 'time total', 'itrs', 'num converged', | 'time generate preimage', 'time total', 'itrs', 'num converged', | ||||
'num updates ecc', '# SOD SM -> GM', '# dis_k SM -> GM', | |||||
'num updates ecc', 'mge num decrease order', 'mge num increase order', | |||||
'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', | |||||
'# dis_k gi -> SM', '# dis_k gi -> GM']) | '# dis_k gi -> SM', '# dis_k gi -> GM']) | ||||
# 'repeats better SOD SM -> GM', | # 'repeats better SOD SM -> GM', | ||||
# 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | # 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | ||||
@@ -418,6 +439,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel=' | |||||
Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | ||||
height=4, base_kernel='subtree', parallel=None, | height=4, base_kernel='subtree', parallel=None, | ||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
else: | |||||
raise Exception('The graph kernel "', graph_kernel, '" is not defined.') | |||||
# normalization | # normalization | ||||
Kmatrix_diag = Kmatrix.diagonal().copy() | Kmatrix_diag = Kmatrix.diagonal().copy() | ||||
@@ -260,20 +260,20 @@ def test_Treelet(ds_name, parallel): | |||||
@pytest.mark.parametrize('ds_name', ['Acyclic']) | @pytest.mark.parametrize('ds_name', ['Acyclic']) | ||||
#@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) | #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) | ||||
@pytest.mark.parametrize('base_kernel', ['subtree']) | |||||
# @pytest.mark.parametrize('base_kernel', ['subtree']) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | ||||
def test_WeisfeilerLehman(ds_name, parallel, base_kernel): | |||||
"""Test Weisfeiler-Lehman kernel. | |||||
def test_WLSubtree(ds_name, parallel): | |||||
"""Test Weisfeiler-Lehman subtree kernel. | |||||
""" | """ | ||||
from gklearn.kernels import WeisfeilerLehman | |||||
from gklearn.kernels import WLSubtree | |||||
dataset = chooseDataset(ds_name) | dataset = chooseDataset(ds_name) | ||||
try: | try: | ||||
graph_kernel = WeisfeilerLehman(node_labels=dataset.node_labels, | |||||
graph_kernel = WLSubtree(node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | edge_labels=dataset.edge_labels, | ||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | ds_infos=dataset.get_dataset_infos(keys=['directed']), | ||||
height=2, base_kernel=base_kernel) | |||||
height=2) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | ||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | ||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | ||||
@@ -20,4 +20,5 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||||
from gklearn.utils.timer import Timer | from gklearn.utils.timer import Timer | ||||
from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
from gklearn.utils.utils import compute_gram_matrices_by_class | from gklearn.utils.utils import compute_gram_matrices_by_class | ||||
from gklearn.utils.utils import SpecialLabel | |||||
from gklearn.utils.trie import Trie | from gklearn.utils.trie import Trie |
@@ -56,13 +56,14 @@ class Dataset(object): | |||||
self.__node_attrs = label_names['node_attrs'] | self.__node_attrs = label_names['node_attrs'] | ||||
self.__edge_labels = label_names['edge_labels'] | self.__edge_labels = label_names['edge_labels'] | ||||
self.__edge_attrs = label_names['edge_attrs'] | self.__edge_attrs = label_names['edge_attrs'] | ||||
self.clean_labels() | |||||
def load_graphs(self, graphs, targets=None): | def load_graphs(self, graphs, targets=None): | ||||
# this has to be followed by set_labels(). | # this has to be followed by set_labels(). | ||||
self.__graphs = graphs | self.__graphs = graphs | ||||
self.__targets = targets | self.__targets = targets | ||||
# self.set_labels_attrs() | |||||
# self.set_labels_attrs() # @todo | |||||
def load_predefined_dataset(self, ds_name): | def load_predefined_dataset(self, ds_name): | ||||
@@ -89,6 +90,9 @@ class Dataset(object): | |||||
elif ds_name == 'Cuneiform': | elif ds_name == 'Cuneiform': | ||||
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'DD': | |||||
ds_file = current_path + '../../datasets/DD/DD_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Fingerprint': | elif ds_name == 'Fingerprint': | ||||
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
@@ -113,6 +117,9 @@ class Dataset(object): | |||||
elif ds_name == 'MUTAG': | elif ds_name == 'MUTAG': | ||||
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'PAH': | |||||
ds_file = current_path + '../../datasets/PAH/dataset.ds' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'SYNTHETIC': | elif ds_name == 'SYNTHETIC': | ||||
pass | pass | ||||
elif ds_name == 'SYNTHETICnew': | elif ds_name == 'SYNTHETICnew': | ||||
@@ -120,11 +127,14 @@ class Dataset(object): | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'Synthie': | elif ds_name == 'Synthie': | ||||
pass | pass | ||||
else: | |||||
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | |||||
self.__node_labels = label_names['node_labels'] | self.__node_labels = label_names['node_labels'] | ||||
self.__node_attrs = label_names['node_attrs'] | self.__node_attrs = label_names['node_attrs'] | ||||
self.__edge_labels = label_names['edge_labels'] | self.__edge_labels = label_names['edge_labels'] | ||||
self.__edge_attrs = label_names['edge_attrs'] | self.__edge_attrs = label_names['edge_attrs'] | ||||
self.clean_labels() | |||||
def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | ||||
@@ -138,27 +148,27 @@ class Dataset(object): | |||||
# @todo: remove labels which have only one possible values. | # @todo: remove labels which have only one possible values. | ||||
if node_labels is None: | if node_labels is None: | ||||
self.__node_labels = self.__graphs[0].graph['node_labels'] | self.__node_labels = self.__graphs[0].graph['node_labels'] | ||||
# # graphs are considered node unlabeled if all nodes have the same label. | |||||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||||
# # graphs are considered node unlabeled if all nodes have the same label. | |||||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||||
if node_attrs is None: | if node_attrs is None: | ||||
self.__node_attrs = self.__graphs[0].graph['node_attrs'] | self.__node_attrs = self.__graphs[0].graph['node_attrs'] | ||||
# for G in Gn: | |||||
# for n in G.nodes(data=True): | |||||
# if 'attributes' in n[1]: | |||||
# return len(n[1]['attributes']) | |||||
# return 0 | |||||
# for G in Gn: | |||||
# for n in G.nodes(data=True): | |||||
# if 'attributes' in n[1]: | |||||
# return len(n[1]['attributes']) | |||||
# return 0 | |||||
if edge_labels is None: | if edge_labels is None: | ||||
self.__edge_labels = self.__graphs[0].graph['edge_labels'] | self.__edge_labels = self.__graphs[0].graph['edge_labels'] | ||||
# # graphs are considered edge unlabeled if all edges have the same label. | |||||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||||
# # graphs are considered edge unlabeled if all edges have the same label. | |||||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||||
if edge_attrs is None: | if edge_attrs is None: | ||||
self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | ||||
# for G in Gn: | |||||
# if nx.number_of_edges(G) > 0: | |||||
# for e in G.edges(data=True): | |||||
# if 'attributes' in e[2]: | |||||
# return len(e[2]['attributes']) | |||||
# return 0 | |||||
# for G in Gn: | |||||
# if nx.number_of_edges(G) > 0: | |||||
# for e in G.edges(data=True): | |||||
# if 'attributes' in e[2]: | |||||
# return len(e[2]['attributes']) | |||||
# return 0 | |||||
def get_dataset_infos(self, keys=None): | def get_dataset_infos(self, keys=None): | ||||
@@ -323,7 +333,7 @@ class Dataset(object): | |||||
if self.__node_label_nums is None: | if self.__node_label_nums is None: | ||||
self.__node_label_nums = {} | self.__node_label_nums = {} | ||||
for node_label in self.__node_labels: | for node_label in self.__node_labels: | ||||
self.__node_label_nums[node_label] = self.get_node_label_num(node_label) | |||||
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) | |||||
infos['node_label_nums'] = self.__node_label_nums | infos['node_label_nums'] = self.__node_label_nums | ||||
if 'edge_label_dim' in keys: | if 'edge_label_dim' in keys: | ||||
@@ -335,7 +345,7 @@ class Dataset(object): | |||||
if self.__edge_label_nums is None: | if self.__edge_label_nums is None: | ||||
self.__edge_label_nums = {} | self.__edge_label_nums = {} | ||||
for edge_label in self.__edge_labels: | for edge_label in self.__edge_labels: | ||||
self.__edge_label_nums[edge_label] = self.get_edge_label_num(edge_label) | |||||
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) | |||||
infos['edge_label_nums'] = self.__edge_label_nums | infos['edge_label_nums'] = self.__edge_label_nums | ||||
if 'directed' in keys or 'substructures' in keys: | if 'directed' in keys or 'substructures' in keys: | ||||
@@ -411,33 +421,95 @@ class Dataset(object): | |||||
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | ||||
node_labels = [item for item in node_labels if item in self.__node_labels] | |||||
edge_labels = [item for item in edge_labels if item in self.__edge_labels] | |||||
node_attrs = [item for item in node_attrs if item in self.__node_attrs] | |||||
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs] | |||||
for g in self.__graphs: | for g in self.__graphs: | ||||
for nd in g.nodes(): | for nd in g.nodes(): | ||||
for nl in node_labels: | for nl in node_labels: | ||||
del g.nodes[nd][nl] | |||||
del g.nodes[nd][nl] | |||||
for na in node_attrs: | for na in node_attrs: | ||||
del g.nodes[nd][na] | del g.nodes[nd][na] | ||||
for ed in g.edges(): | for ed in g.edges(): | ||||
for el in edge_labels: | for el in edge_labels: | ||||
del g.edges[ed][el] | |||||
del g.edges[ed][el] | |||||
for ea in edge_attrs: | for ea in edge_attrs: | ||||
del g.edges[ed][ea] | |||||
del g.edges[ed][ea] | |||||
if len(node_labels) > 0: | if len(node_labels) > 0: | ||||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||||
if len(edge_labels) > 0: | if len(edge_labels) > 0: | ||||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||||
if len(node_attrs) > 0: | if len(node_attrs) > 0: | ||||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||||
if len(edge_attrs) > 0: | if len(edge_attrs) > 0: | ||||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||||
def clean_labels(self): | |||||
labels = [] | |||||
for name in self.__node_labels: | |||||
label = set() | |||||
for G in self.__graphs: | |||||
label = label | set(nx.get_node_attributes(G, name).values()) | |||||
if len(label) > 1: | |||||
labels.append(name) | |||||
break | |||||
if len(label) < 2: | |||||
for G in self.__graphs: | |||||
for nd in G.nodes(): | |||||
del G.nodes[nd][name] | |||||
self.__node_labels = labels | |||||
labels = [] | |||||
for name in self.__edge_labels: | |||||
label = set() | |||||
for G in self.__graphs: | |||||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||||
if len(label) > 1: | |||||
labels.append(name) | |||||
break | |||||
if len(label) < 2: | |||||
for G in self.__graphs: | |||||
for ed in G.edges(): | |||||
del G.edges[ed][name] | |||||
self.__edge_labels = labels | |||||
labels = [] | |||||
for name in self.__node_attrs: | |||||
label = set() | |||||
for G in self.__graphs: | |||||
label = label | set(nx.get_node_attributes(G, name).values()) | |||||
if len(label) > 1: | |||||
labels.append(name) | |||||
break | |||||
if len(label) < 2: | |||||
for G in self.__graphs: | |||||
for nd in G.nodes(): | |||||
del G.nodes[nd][name] | |||||
self.__node_attrs = labels | |||||
labels = [] | |||||
for name in self.__edge_attrs: | |||||
label = set() | |||||
for G in self.__graphs: | |||||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||||
if len(label) > 1: | |||||
labels.append(name) | |||||
break | |||||
if len(label) < 2: | |||||
for G in self.__graphs: | |||||
for ed in G.edges(): | |||||
del G.edges[ed][name] | |||||
self.__edge_attrs = labels | |||||
def cut_graphs(self, range_): | def cut_graphs(self, range_): | ||||
self.__graphs = [self.__graphs[i] for i in range_] | self.__graphs = [self.__graphs[i] for i in range_] | ||||
if self.__targets is not None: | if self.__targets is not None: | ||||
self.__targets = [self.__targets[i] for i in range_] | self.__targets = [self.__targets[i] for i in range_] | ||||
# @todo | |||||
# self.set_labels_attrs() | |||||
self.clean_labels() | |||||
def trim_dataset(self, edge_required=False): | def trim_dataset(self, edge_required=False): | ||||
@@ -448,8 +520,7 @@ class Dataset(object): | |||||
idx = [p[0] for p in trimed_pairs] | idx = [p[0] for p in trimed_pairs] | ||||
self.__graphs = [p[1] for p in trimed_pairs] | self.__graphs = [p[1] for p in trimed_pairs] | ||||
self.__targets = [self.__targets[i] for i in idx] | self.__targets = [self.__targets[i] for i in idx] | ||||
# @todo | |||||
# self.set_labels_attrs() | |||||
self.clean_labels() | |||||
def __get_dataset_size(self): | def __get_dataset_size(self): | ||||
@@ -652,4 +723,5 @@ def split_dataset_by_target(dataset): | |||||
sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | ||||
sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | ||||
datasets.append(sub_dataset) | datasets.append(sub_dataset) | ||||
# @todo: clean_labels? | |||||
return datasets | return datasets |
@@ -63,7 +63,7 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs): | |||||
return data, y, label_names | return data, y, label_names | ||||
def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None): | |||||
def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', **kwargs): | |||||
"""Save list of graphs. | """Save list of graphs. | ||||
""" | """ | ||||
import os | import os | ||||
@@ -73,22 +73,22 @@ def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=Non | |||||
if not os.path.exists(dirname_ds) : | if not os.path.exists(dirname_ds) : | ||||
os.makedirs(dirname_ds) | os.makedirs(dirname_ds) | ||||
if xparams is not None and 'graph_dir' in xparams: | |||||
graph_dir = xparams['graph_dir'] + '/' | |||||
if 'graph_dir' in kwargs: | |||||
graph_dir = kwargs['graph_dir'] + '/' | |||||
if not os.path.exists(graph_dir): | if not os.path.exists(graph_dir): | ||||
os.makedirs(graph_dir) | os.makedirs(graph_dir) | ||||
del kwargs['graph_dir'] | |||||
else: | else: | ||||
graph_dir = dirname_ds | graph_dir = dirname_ds | ||||
if group == 'xml' and gformat == 'gxl': | if group == 'xml' and gformat == 'gxl': | ||||
kwargs = {'method': xparams['method']} if xparams is not None else {} | |||||
with open(filename + '.xml', 'w') as fgroup: | with open(filename + '.xml', 'w') as fgroup: | ||||
fgroup.write("<?xml version=\"1.0\"?>") | fgroup.write("<?xml version=\"1.0\"?>") | ||||
fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | ||||
fgroup.write("\n<GraphCollection>") | fgroup.write("\n<GraphCollection>") | ||||
for idx, g in enumerate(Gn): | for idx, g in enumerate(Gn): | ||||
fname_tmp = "graph" + str(idx) + ".gxl" | fname_tmp = "graph" + str(idx) + ".gxl" | ||||
saveGXL(g, graph_dir + fname_tmp, **kwargs) | |||||
save_gxl(g, graph_dir + fname_tmp, **kwargs) | |||||
fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | ||||
fgroup.write("\n</GraphCollection>") | fgroup.write("\n</GraphCollection>") | ||||
fgroup.close() | fgroup.close() | ||||
@@ -226,7 +226,7 @@ def load_gxl(filename): # @todo: directed graphs. | |||||
return g, label_names | return g, label_names | ||||
def saveGXL(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||||
def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||||
if method == 'default': | if method == 'default': | ||||
gxl_file = open(filename, 'w') | gxl_file = open(filename, 'w') | ||||
gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | ||||
@@ -1,6 +1,7 @@ | |||||
import networkx as nx | import networkx as nx | ||||
import numpy as np | import numpy as np | ||||
from copy import deepcopy | from copy import deepcopy | ||||
from enum import Enum, auto | |||||
#from itertools import product | #from itertools import product | ||||
# from tqdm import tqdm | # from tqdm import tqdm | ||||
@@ -299,21 +300,59 @@ def get_edge_labels(Gn, edge_label): | |||||
def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | ||||
if name == 'structuralspkernel': | |||||
if name == 'ShortestPath': | |||||
from gklearn.kernels import ShortestPath | |||||
graph_kernel = ShortestPath(node_labels=node_labels, | |||||
node_attrs=node_attrs, | |||||
ds_infos=ds_infos, | |||||
**kernel_options) | |||||
elif name == 'StructuralSP': | |||||
from gklearn.kernels import StructuralSP | from gklearn.kernels import StructuralSP | ||||
graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels, | |||||
node_attrs=node_attrs, edge_attrs=edge_attrs, | |||||
ds_infos=ds_infos, **kernel_options) | |||||
graph_kernel = StructuralSP(node_labels=node_labels, | |||||
edge_labels=edge_labels, | |||||
node_attrs=node_attrs, | |||||
edge_attrs=edge_attrs, | |||||
ds_infos=ds_infos, | |||||
**kernel_options) | |||||
elif name == 'PathUpToH': | |||||
from gklearn.kernels import PathUpToH | |||||
graph_kernel = PathUpToH(node_labels=node_labels, | |||||
edge_labels=edge_labels, | |||||
ds_infos=ds_infos, | |||||
**kernel_options) | |||||
elif name == 'Treelet': | |||||
from gklearn.kernels import Treelet | |||||
graph_kernel = Treelet(node_labels=node_labels, | |||||
edge_labels=edge_labels, | |||||
ds_infos=ds_infos, | |||||
**kernel_options) | |||||
elif name == 'WLSubtree': | |||||
from gklearn.kernels import WLSubtree | |||||
graph_kernel = WLSubtree(node_labels=node_labels, | |||||
edge_labels=edge_labels, | |||||
ds_infos=ds_infos, | |||||
**kernel_options) | |||||
elif name == 'WeisfeilerLehman': | |||||
from gklearn.kernels import WeisfeilerLehman | |||||
graph_kernel = WeisfeilerLehman(node_labels=node_labels, | |||||
edge_labels=edge_labels, | |||||
ds_infos=ds_infos, | |||||
**kernel_options) | |||||
else: | |||||
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".') | |||||
return graph_kernel | return graph_kernel | ||||
def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None): | |||||
def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | |||||
import os | |||||
from gklearn.utils import Dataset, split_dataset_by_target | from gklearn.utils import Dataset, split_dataset_by_target | ||||
# 1. get dataset. | # 1. get dataset. | ||||
print('1. getting dataset...') | print('1. getting dataset...') | ||||
dataset_all = Dataset() | dataset_all = Dataset() | ||||
dataset_all.load_predefined_dataset(ds_name) | dataset_all.load_predefined_dataset(ds_name) | ||||
dataset_all.trim_dataset(edge_required=edge_required) | |||||
if not irrelevant_labels is None: | if not irrelevant_labels is None: | ||||
dataset_all.remove_labels(**irrelevant_labels) | dataset_all.remove_labels(**irrelevant_labels) | ||||
# dataset_all.cut_graphs(range(0, 10)) | # dataset_all.cut_graphs(range(0, 10)) | ||||
@@ -349,6 +388,8 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||||
print() | print() | ||||
print('4. saving results...') | print('4. saving results...') | ||||
if save_results: | if save_results: | ||||
if not os.path.exists(dir_save): | |||||
os.makedirs(dir_save) | |||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | ||||
print('\ncomplete.') | print('\ncomplete.') | ||||
@@ -424,4 +465,10 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||||
attributes = [] | attributes = [] | ||||
for ed, attrs in G.edges(data=True): | for ed, attrs in G.edges(data=True): | ||||
attributes.append(tuple(attrs[aname] for aname in attr_names)) | attributes.append(tuple(attrs[aname] for aname in attr_names)) | ||||
return attributes | |||||
return attributes | |||||
class SpecialLabel(Enum): | |||||
"""can be used to define special labels. | |||||
""" | |||||
DUMMY = auto # The dummy label. |