@@ -0,0 +1,28 @@ | |||
environment: | |||
matrix: | |||
- PYTHON: "C:\\Python35" | |||
- PYTHON: "C:\\Python35-x64" | |||
- PYTHON: "C:\\Python36" | |||
- PYTHON: "C:\\Python36-x64" | |||
- PYTHON: "C:\\Python37" | |||
- PYTHON: "C:\\Python37-x64" | |||
- PYTHON: "C:\\Python38" | |||
- PYTHON: "C:\\Python38-x64" | |||
# skip_commits: | |||
# files: | |||
# - "*.yml" | |||
# - "*.rst" | |||
# - "LICENSE" | |||
install: | |||
- "%PYTHON%\\python.exe -m pip install -U pip" | |||
- "%PYTHON%\\python.exe -m pip install -U pytest" | |||
- "%PYTHON%\\python.exe -m pip install -r requirements.txt" | |||
- "%PYTHON%\\python.exe -m pip install wheel" | |||
build: off | |||
test_script: | |||
- "%PYTHON%\\python.exe setup.py bdist_wheel" | |||
- "%PYTHON%\\python.exe -m pytest -v gklearn/tests/" |
@@ -1,5 +1,6 @@ | |||
# graphkit-learn | |||
[](https://travis-ci.org/jajupmochi/graphkit-learn) | |||
[](https://ci.appveyor.com/project/jajupmochi/graphkit-learn) | |||
[](https://codecov.io/gh/jajupmochi/graphkit-learn) | |||
[](https://graphkit-learn.readthedocs.io/en/master/?badge=master) | |||
[](https://badge.fury.io/py/graphkit-learn) | |||
@@ -1 +1,2 @@ | |||
from gklearn.ged.env.common_types import AlgorithmState | |||
from gklearn.ged.env.common_types import AlgorithmState | |||
from gklearn.ged.env.node_map import NodeMap |
@@ -0,0 +1,80 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Apr 22 11:31:26 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
class NodeMap(object): | |||
def __init__(self, num_nodes_g, num_nodes_h): | |||
self.__forward_map = [np.inf] * num_nodes_g | |||
self.__backward_map = [np.inf] * num_nodes_h | |||
self.__induced_cost = np.inf | |||
def num_source_nodes(self): | |||
return len(self.__forward_map) | |||
def num_target_nodes(self): | |||
return len(self.__backward_map) | |||
def image(self, node): | |||
if node < len(self.__forward_map): | |||
return self.__forward_map[node] | |||
else: | |||
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||
return np.inf | |||
def pre_image(self, node): | |||
if node < len(self.__backward_map): | |||
return self.__backward_map[node] | |||
else: | |||
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | |||
return np.inf | |||
def get_forward_map(self): | |||
return self.__forward_map | |||
def get_backward_map(self): | |||
return self.__backward_map | |||
def as_relation(self, relation): | |||
relation.clear() | |||
for i in range(0, len(self.__forward_map)): | |||
k = self.__forward_map[i] | |||
if k != np.inf: | |||
relation.append(tuple((i, k))) | |||
for k in range(0, len(self.__backward_map)): | |||
i = self.__backward_map[k] | |||
if i == np.inf: | |||
relation.append(tuple((i, k))) | |||
def add_assignment(self, i, k): | |||
if i != np.inf: | |||
if i < len(self.__forward_map): | |||
self.__forward_map[i] = k | |||
else: | |||
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | |||
if k != np.inf: | |||
if k < len(self.__backward_map): | |||
self.__backward_map[k] = i | |||
else: | |||
raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') | |||
def set_induced_cost(self, induced_cost): | |||
self.__induced_cost = induced_cost | |||
def induced_cost(self): | |||
return self.__induced_cost |
@@ -7,11 +7,10 @@ Created on Mon Mar 16 17:26:40 2020 | |||
""" | |||
def test_median_graph_estimator(): | |||
from gklearn.utils.graphfiles import loadDataset | |||
from gklearn.utils import load_dataset | |||
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | |||
from gklearn.gedlib import librariesImport, gedlibpy | |||
from gklearn.preimage.utils import get_same_item_indices | |||
from gklearn.preimage.ged import convertGraph | |||
import multiprocessing | |||
# estimator parameters. | |||
@@ -22,17 +21,17 @@ def test_median_graph_estimator(): | |||
# algorithm parameters. | |||
algo = 'IPFP' | |||
initial_solutions = 40 | |||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' | |||
initial_solutions = 1 | |||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||
edit_cost_name = 'LETTER2' | |||
edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | |||
ds_name = 'COIL-DEL' | |||
ds_name = 'Letter_high' | |||
# Load dataset. | |||
# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||
dataset = '../../../datasets/Letter-high/Letter-high_A.txt' | |||
Gn, y_all = loadDataset(dataset) | |||
Gn, y_all, label_names = load_dataset(dataset) | |||
y_idx = get_same_item_indices(y_all) | |||
for i, (y, values) in enumerate(y_idx.items()): | |||
Gn_i = [Gn[val] for val in values] | |||
@@ -43,7 +42,7 @@ def test_median_graph_estimator(): | |||
# gedlibpy.restart_env() | |||
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||
for G in Gn_i: | |||
ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '') | |||
ged_env.add_nx_graph(G, '') | |||
graph_ids = ged_env.get_all_graph_ids() | |||
set_median_id = ged_env.add_graph('set_median') | |||
gen_median_id = ged_env.add_graph('gen_median') | |||
@@ -54,11 +53,89 @@ def test_median_graph_estimator(): | |||
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng()) | |||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng()) | |||
# Select the GED algorithm. | |||
algo_options = '--threads ' + str(threads) + algo_options_suffix | |||
mge.set_options(mge_options) | |||
mge.set_label_names(node_labels=label_names['node_labels'], | |||
edge_labels=label_names['edge_labels'], | |||
node_attrs=label_names['node_attrs'], | |||
edge_attrs=label_names['edge_attrs']) | |||
mge.set_init_method(algo, algo_options) | |||
mge.set_descent_method(algo, algo_options) | |||
# Run the estimator. | |||
mge.run(graph_ids, set_median_id, gen_median_id) | |||
# Get SODs. | |||
sod_sm = mge.get_sum_of_distances('initialized') | |||
sod_gm = mge.get_sum_of_distances('converged') | |||
print('sod_sm, sod_gm: ', sod_sm, sod_gm) | |||
# Get median graphs. | |||
set_median = ged_env.get_nx_graph(set_median_id) | |||
gen_median = ged_env.get_nx_graph(gen_median_id) | |||
return set_median, gen_median | |||
def test_median_graph_estimator_symb(): | |||
from gklearn.utils import load_dataset | |||
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | |||
from gklearn.gedlib import librariesImport, gedlibpy | |||
from gklearn.preimage.utils import get_same_item_indices | |||
import multiprocessing | |||
# estimator parameters. | |||
init_type = 'MEDOID' | |||
num_inits = 1 | |||
threads = multiprocessing.cpu_count() | |||
time_limit = 60000 | |||
# algorithm parameters. | |||
algo = 'IPFP' | |||
initial_solutions = 1 | |||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||
edit_cost_name = 'CONSTANT' | |||
edit_cost_constants = [4, 4, 2, 1, 1, 1] | |||
ds_name = 'MUTAG' | |||
# Load dataset. | |||
dataset = '../../../datasets/MUTAG/MUTAG_A.txt' | |||
Gn, y_all, label_names = load_dataset(dataset) | |||
y_idx = get_same_item_indices(y_all) | |||
for i, (y, values) in enumerate(y_idx.items()): | |||
Gn_i = [Gn[val] for val in values] | |||
break | |||
Gn_i = Gn_i[0:10] | |||
# Set up the environment. | |||
ged_env = gedlibpy.GEDEnv() | |||
# gedlibpy.restart_env() | |||
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||
for G in Gn_i: | |||
ged_env.add_nx_graph(G, '') | |||
graph_ids = ged_env.get_all_graph_ids() | |||
set_median_id = ged_env.add_graph('set_median') | |||
gen_median_id = ged_env.add_graph('gen_median') | |||
ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES') | |||
# Set up the estimator. | |||
mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name)) | |||
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng()) | |||
# Select the GED algorithm. | |||
algo_options = '--threads ' + str(threads) + algo_options_suffix | |||
mge.set_options(mge_options) | |||
mge.set_label_names(node_labels=label_names['node_labels'], | |||
edge_labels=label_names['edge_labels'], | |||
node_attrs=label_names['node_attrs'], | |||
edge_attrs=label_names['edge_attrs']) | |||
mge.set_init_method(algo, algo_options) | |||
mge.set_descent_method(algo, algo_options) | |||
@@ -78,4 +155,5 @@ def test_median_graph_estimator(): | |||
if __name__ == '__main__': | |||
set_median, gen_median = test_median_graph_estimator() | |||
set_median, gen_median = test_median_graph_estimator() | |||
# set_median, gen_median = test_median_graph_estimator_symb() |
@@ -30,6 +30,8 @@ def mge_options_to_string(options): | |||
opt_str += '--randomness ' + str(val) + ' ' | |||
elif key == 'verbose': | |||
opt_str += '--stdout ' + str(val) + ' ' | |||
elif key == 'update_order': | |||
opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' ' | |||
elif key == 'refine': | |||
opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | |||
elif key == 'time_limit': | |||
@@ -35,8 +35,8 @@ from libcpp.pair cimport pair | |||
from libcpp.list cimport list | |||
#Long unsigned int equivalent | |||
cimport numpy as np | |||
ctypedef np.npy_uint32 UINT32_t | |||
cimport numpy as cnp | |||
ctypedef cnp.npy_uint32 UINT32_t | |||
from cpython cimport array | |||
@@ -76,14 +76,14 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||
void runMethod(size_t g, size_t h) except + | |||
double getUpperBound(size_t g, size_t h) except + | |||
double getLowerBound(size_t g, size_t h) except + | |||
vector[np.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||
vector[np.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||
vector[cnp.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||
vector[cnp.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||
size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + | |||
size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + | |||
double getInducedCost(size_t g, size_t h) except + | |||
vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + | |||
vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + | |||
vector[vector[np.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||
vector[vector[cnp.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||
double getRuntime(size_t g, size_t h) except + | |||
bool quasimetricCosts() except + | |||
vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + | |||
@@ -105,14 +105,16 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||
map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + | |||
string getInitType() except + | |||
# double getNodeCost(size_t label1, size_t label2) except + | |||
void computeInducedCost(size_t g_id, size_t h_id) except + | |||
double computeInducedCost(size_t g_id, size_t h_id, vector[pair[size_t,size_t]]) except + | |||
############################# | |||
##CYTHON WRAPPER INTERFACES## | |||
############################# | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.ged.env import NodeMap | |||
# import librariesImport | |||
from ctypes import * | |||
@@ -726,13 +728,30 @@ cdef class GEDEnv: | |||
:type g: size_t | |||
:type h: size_t | |||
:return: The Node Map between the two selected graph. | |||
:rtype: list[tuple(size_t, size_t)] | |||
:rtype: gklearn.ged.env.NodeMap. | |||
.. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() | |||
.. warning:: run_method() between the same two graph must be called before this function. | |||
.. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. | |||
""" | |||
return self.c_env.getNodeMap(g, h) | |||
map_as_relation = self.c_env.getNodeMap(g, h) | |||
induced_cost = self.c_env.getInducedCost(g, h) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary. | |||
source_map = [item.first if item.first < len(map_as_relation) else np.inf for item in map_as_relation] # item.first < len(map_as_relation) is not exactly correct. | |||
# print(source_map) | |||
target_map = [item.second if item.second < len(map_as_relation) else np.inf for item in map_as_relation] | |||
# print(target_map) | |||
num_node_source = len([item for item in source_map if item != np.inf]) | |||
# print(num_node_source) | |||
num_node_target = len([item for item in target_map if item != np.inf]) | |||
# print(num_node_target) | |||
node_map = NodeMap(num_node_source, num_node_target) | |||
# print(node_map.get_forward_map(), node_map.get_backward_map()) | |||
for i in range(len(source_map)): | |||
node_map.add_assignment(source_map[i], target_map[i]) | |||
node_map.set_induced_cost(induced_cost) | |||
return node_map | |||
def get_assignment_matrix(self, g, h) : | |||
@@ -1320,7 +1339,7 @@ cdef class GEDEnv: | |||
return graph_id | |||
def compute_induced_cost(self, g_id, h_id): | |||
def compute_induced_cost(self, g_id, h_id, node_map): | |||
""" | |||
Computes the edit cost between two graphs induced by a node map. | |||
@@ -1330,19 +1349,25 @@ cdef class GEDEnv: | |||
ID of input graph. | |||
h_id : int | |||
ID of input graph. | |||
node_map: gklearn.ged.env.NodeMap. | |||
The NodeMap instance whose reduced cost will be computed and re-assigned. | |||
Returns | |||
------- | |||
None. | |||
Notes | |||
----- | |||
The induced edit cost of the node map between `g_id` and `h_id` is implictly computed and stored in `GEDEnv::node_maps_`. | |||
""" | |||
cost = 0.0 | |||
self.c_env.computeInducedCost(g_id, h_id) | |||
None. | |||
""" | |||
relation = [] | |||
node_map.as_relation(relation) | |||
# print(relation) | |||
dummy_node = get_dummy_node() | |||
# print(dummy_node) | |||
for i, val in enumerate(relation): | |||
val1 = dummy_node if val[0] == np.inf else val[0] | |||
val2 = dummy_node if val[1] == np.inf else val[1] | |||
relation[i] = tuple((val1, val2)) | |||
# print(relation) | |||
induced_cost = self.c_env.computeInducedCost(g_id, h_id, relation) | |||
node_map.set_induced_cost(induced_cost) | |||
##################################################################### | |||
@@ -475,8 +475,9 @@ public: | |||
* @brief Computes the edit cost between two graphs induced by a node map. | |||
* @param[in] g_id ID of input graph. | |||
* @param[in] h_id ID of input graph. | |||
* @return Computed induced cost. | |||
*/ | |||
void computeInducedCost(std::size_t g_id, std::size_t h_id) const; | |||
double computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const; | |||
// /*! | |||
// * @brief Returns node relabeling, insertion, or deletion cost. | |||
@@ -492,7 +493,7 @@ public: | |||
private: | |||
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> env; // environment variable | |||
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> * env_; // environment variable | |||
bool initialized; // initialization boolean (because env has one but not accessible) | |||
@@ -277,11 +277,16 @@ std::string toStringVectorInt(std::vector<unsigned long int> vector) { | |||
PyGEDEnv::PyGEDEnv () { | |||
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
this->initialized = false; | |||
} | |||
PyGEDEnv::~PyGEDEnv () {} | |||
PyGEDEnv::~PyGEDEnv () { | |||
if (env_ != NULL) { | |||
delete env_; | |||
env_ = NULL; | |||
} | |||
} | |||
// bool initialized = false; //Initialization boolean (because Env has one but not accessible). | |||
@@ -290,64 +295,68 @@ bool PyGEDEnv::isInitialized() { | |||
} | |||
void PyGEDEnv::restartEnv() { | |||
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
if (env_ != NULL) { | |||
delete env_; | |||
env_ = NULL; | |||
} | |||
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
initialized = false; | |||
} | |||
void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { | |||
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(this->env.load_gxl_graph(pathFolder, pathXML, | |||
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(env_->load_gxl_graph(pathFolder, pathXML, | |||
(node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | |||
(edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | |||
std::unordered_set<std::string>(), std::unordered_set<std::string>())); | |||
} | |||
std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { | |||
return this->env.graph_ids(); | |||
return env_->graph_ids(); | |||
} | |||
std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { | |||
std::vector<std::size_t> listID; | |||
for (std::size_t i = this->env.graph_ids().first; i != this->env.graph_ids().second; i++) { | |||
for (std::size_t i = env_->graph_ids().first; i != env_->graph_ids().second; i++) { | |||
listID.push_back(i); | |||
} | |||
return listID; | |||
} | |||
const std::string PyGEDEnv::getGraphClass(std::size_t id) const { | |||
return this->env.get_graph_class(id); | |||
return env_->get_graph_class(id); | |||
} | |||
const std::string PyGEDEnv::getGraphName(std::size_t id) const { | |||
return this->env.get_graph_name(id); | |||
return env_->get_graph_name(id); | |||
} | |||
std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { | |||
ged::GEDGraph::GraphID newId = this->env.add_graph(graph_name, graph_class); | |||
ged::GEDGraph::GraphID newId = env_->add_graph(graph_name, graph_class); | |||
initialized = false; | |||
return std::stoi(std::to_string(newId)); | |||
} | |||
void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { | |||
this->env.add_node(graphId, nodeId, nodeLabel); | |||
env_->add_node(graphId, nodeId, nodeLabel); | |||
initialized = false; | |||
} | |||
/*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { | |||
this->env.add_edge(graphId, tail, head, edgeLabel); | |||
env_->add_edge(graphId, tail, head, edgeLabel); | |||
}*/ | |||
void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { | |||
this->env.add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||
env_->add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||
initialized = false; | |||
} | |||
void PyGEDEnv::clearGraph(std::size_t graphId) { | |||
this->env.clear_graph(graphId); | |||
env_->clear_graph(graphId); | |||
initialized = false; | |||
} | |||
ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { | |||
return this->env.get_graph(graphId); | |||
return env_->get_graph(graphId); | |||
} | |||
std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { | |||
@@ -379,71 +388,71 @@ std::vector<std::vector<std::size_t>> PyGEDEnv::getGraphAdjacenceMatrix(std::siz | |||
} | |||
void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { | |||
this->env.set_edit_costs(translateEditCost(editCost), editCostConstants); | |||
env_->set_edit_costs(translateEditCost(editCost), editCostConstants); | |||
} | |||
void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { | |||
//this->env.set_edit_costs(Your EditCost Class(editCostConstants)); | |||
//env_->set_edit_costs(Your EditCost Class(editCostConstants)); | |||
} | |||
// void PyGEDEnv::initEnv() { | |||
// this->env.init(); | |||
// env_->init(); | |||
// initialized = true; | |||
// } | |||
void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { | |||
this->env.init(translateInitOptions(initOption), print_to_stdout); | |||
env_->init(translateInitOptions(initOption), print_to_stdout); | |||
initialized = true; | |||
} | |||
void PyGEDEnv::setMethod(std::string method, const std::string & options) { | |||
this->env.set_method(translateMethod(method), options); | |||
env_->set_method(translateMethod(method), options); | |||
} | |||
void PyGEDEnv::initMethod() { | |||
this->env.init_method(); | |||
env_->init_method(); | |||
} | |||
double PyGEDEnv::getInitime() const { | |||
return this->env.get_init_time(); | |||
return env_->get_init_time(); | |||
} | |||
void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { | |||
this->env.run_method(g, h); | |||
env_->run_method(g, h); | |||
} | |||
double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { | |||
return this->env.get_upper_bound(g, h); | |||
return env_->get_upper_bound(g, h); | |||
} | |||
double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { | |||
return this->env.get_lower_bound(g, h); | |||
return env_->get_lower_bound(g, h); | |||
} | |||
std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { | |||
return this->env.get_node_map(g, h).get_forward_map(); | |||
return env_->get_node_map(g, h).get_forward_map(); | |||
} | |||
std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { | |||
return this->env.get_node_map(g, h).get_backward_map(); | |||
return env_->get_node_map(g, h).get_backward_map(); | |||
} | |||
std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | |||
return this->env.get_node_map(g, h).image(nodeId); | |||
return env_->get_node_map(g, h).image(nodeId); | |||
} | |||
std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | |||
return this->env.get_node_map(g, h).pre_image(nodeId); | |||
return env_->get_node_map(g, h).pre_image(nodeId); | |||
} | |||
double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { | |||
return this->env.get_node_map(g, h).induced_cost(); | |||
return env_->get_node_map(g, h).induced_cost(); | |||
} | |||
std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { | |||
std::vector<pair<std::size_t, std::size_t>> res; | |||
std::vector<ged::NodeMap::Assignment> relation; | |||
this->env.get_node_map(g, h).as_relation(relation); | |||
env_->get_node_map(g, h).as_relation(relation); | |||
for (const auto & assignment : relation) { | |||
res.push_back(std::make_pair(assignment.first, assignment.second)); | |||
} | |||
@@ -493,11 +502,11 @@ std::vector<std::vector<unsigned long int>> PyGEDEnv::getAllMap(std::size_t g, s | |||
} | |||
double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { | |||
return this->env.get_runtime(g, h); | |||
return env_->get_runtime(g, h); | |||
} | |||
bool PyGEDEnv::quasimetricCosts() const { | |||
return this->env.quasimetric_costs(); | |||
return env_->quasimetric_costs(); | |||
} | |||
std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { | |||
@@ -542,73 +551,99 @@ std::vector<std::vector<double>> PyGEDEnv::hungarianLSAPE(std::vector<std::vecto | |||
} | |||
std::size_t PyGEDEnv::getNumNodeLabels() const { | |||
return this->env.num_node_labels(); | |||
return env_->num_node_labels(); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { | |||
return this->env.get_node_label(label_id); | |||
return env_->get_node_label(label_id); | |||
} | |||
std::size_t PyGEDEnv::getNumEdgeLabels() const { | |||
return this->env.num_edge_labels(); | |||
return env_->num_edge_labels(); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { | |||
return this->env.get_edge_label(label_id); | |||
return env_->get_edge_label(label_id); | |||
} | |||
// std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { | |||
// return this->env.get_num_nodes(graph_id); | |||
// return env_->get_num_nodes(graph_id); | |||
// } | |||
double PyGEDEnv::getAvgNumNodes() const { | |||
return this->env.get_avg_num_nodes(); | |||
return env_->get_avg_num_nodes(); | |||
} | |||
double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { | |||
return this->env.node_rel_cost(node_label_1, node_label_2); | |||
return env_->node_rel_cost(node_label_1, node_label_2); | |||
} | |||
double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { | |||
return this->env.node_del_cost(node_label); | |||
return env_->node_del_cost(node_label); | |||
} | |||
double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { | |||
return this->env.node_ins_cost(node_label); | |||
return env_->node_ins_cost(node_label); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { | |||
return this->env.median_node_label(node_labels); | |||
return env_->median_node_label(node_labels); | |||
} | |||
double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { | |||
return this->env.edge_rel_cost(edge_label_1, edge_label_2); | |||
return env_->edge_rel_cost(edge_label_1, edge_label_2); | |||
} | |||
double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { | |||
return this->env.edge_del_cost(edge_label); | |||
return env_->edge_del_cost(edge_label); | |||
} | |||
double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { | |||
return this->env.edge_ins_cost(edge_label); | |||
return env_->edge_ins_cost(edge_label); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { | |||
return this->env.median_edge_label(edge_labels); | |||
return env_->median_edge_label(edge_labels); | |||
} | |||
std::string PyGEDEnv::getInitType() const { | |||
return initOptionsToString(this->env.get_init_type()); | |||
return initOptionsToString(env_->get_init_type()); | |||
} | |||
void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
ged::NodeMap node_map = this->env.get_node_map(g_id, h_id); | |||
this->env.compute_induced_cost(g_id, h_id, node_map); | |||
double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const { | |||
ged::NodeMap node_map = ged::NodeMap(env_->get_num_nodes(g_id), env_->get_num_nodes(h_id)); | |||
for (const auto & assignment : relation) { | |||
node_map.add_assignment(assignment.first, assignment.second); | |||
// std::cout << assignment.first << assignment.second << endl; | |||
} | |||
const std::vector<ged::GEDGraph::NodeID> forward_map = node_map.get_forward_map(); | |||
for (std::size_t i{0}; i < node_map.num_source_nodes(); i++) { | |||
if (forward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||
node_map.add_assignment(i, ged::GEDGraph::dummy_node()); | |||
} | |||
} | |||
const std::vector<ged::GEDGraph::NodeID> backward_map = node_map.get_backward_map(); | |||
for (std::size_t i{0}; i < node_map.num_target_nodes(); i++) { | |||
if (backward_map.at(i) == ged::GEDGraph::undefined_node()) { | |||
node_map.add_assignment(ged::GEDGraph::dummy_node(), i); | |||
} | |||
} | |||
// for (auto & map : node_map.get_forward_map()) { | |||
// std::cout << map << ", "; | |||
// } | |||
// std::cout << endl; | |||
// for (auto & map : node_map.get_backward_map()) { | |||
// std::cout << map << ", "; | |||
// } | |||
env_->compute_induced_cost(g_id, h_id, node_map); | |||
return node_map.induced_cost(); | |||
} | |||
// double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { | |||
// return this->env.ged_data_node_cost(label1, label2); | |||
// return env_->ged_data_node_cost(label1, label2); | |||
// } | |||
@@ -630,7 +665,7 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
/*loadGXLGraph(pathFolder, pathXML); | |||
std::vector<std::size_t> graph_ids = getAllGraphIds(); | |||
std::size_t median_id = this->env.add_graph("median", ""); | |||
std::size_t median_id = env_->add_graph("median", ""); | |||
initEnv(initOption); | |||
@@ -640,10 +675,10 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); | |||
median_estimator.run(graph_ids, median_id); | |||
std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); | |||
this->env.save_as_gxl_graph(median_id, gxl_file_name);*/ | |||
env_->save_as_gxl_graph(median_id, gxl_file_name);*/ | |||
/*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); | |||
save_letter_graph_as_tikz_file(this->env.get_graph(median_id), tikz_file_name);*/ | |||
save_letter_graph_as_tikz_file(env_->get_graph(median_id), tikz_file_name);*/ | |||
//} | |||
} | |||
@@ -12,4 +12,4 @@ from gklearn.kernels.structural_sp import StructuralSP | |||
from gklearn.kernels.shortest_path import ShortestPath | |||
from gklearn.kernels.path_up_to_h import PathUpToH | |||
from gklearn.kernels.treelet import Treelet | |||
from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman | |||
from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree |
@@ -18,6 +18,7 @@ import numpy as np | |||
import networkx as nx | |||
from collections import Counter | |||
from functools import partial | |||
from gklearn.utils import SpecialLabel | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.kernels import GraphKernel | |||
from gklearn.utils import Trie | |||
@@ -582,11 +583,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||
def __add_dummy_labels(self, Gn): | |||
if self.__k_func is not None: | |||
if len(self.__node_labels) == 0: | |||
for G in Gn: | |||
nx.set_node_attributes(G, '0', 'dummy') | |||
self.__node_labels.append('dummy') | |||
if len(self.__edge_labels) == 0: | |||
for G in Gn: | |||
nx.set_edge_attributes(G, '0', 'dummy') | |||
self.__edge_labels.append('dummy') | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -18,6 +18,7 @@ import numpy as np | |||
import networkx as nx | |||
from collections import Counter | |||
from itertools import chain | |||
from gklearn.utils import SpecialLabel | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | |||
from gklearn.kernels import GraphKernel | |||
@@ -495,11 +496,11 @@ class Treelet(GraphKernel): | |||
def __add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0: | |||
for G in Gn: | |||
nx.set_node_attributes(G, '0', 'dummy') | |||
self.__node_labels.append('dummy') | |||
if len(self.__edge_labels) == 0: | |||
for G in Gn: | |||
nx.set_edge_attributes(G, '0', 'dummy') | |||
self.__edge_labels.append('dummy') | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -16,6 +16,7 @@ import numpy as np | |||
import networkx as nx | |||
from collections import Counter | |||
from functools import partial | |||
from gklearn.utils import SpecialLabel | |||
from gklearn.utils.parallel import parallel_gm | |||
from gklearn.kernels import GraphKernel | |||
@@ -32,6 +33,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _compute_gm_series(self): | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('A part of the computation is parallelized.') | |||
self.__add_dummy_node_labels(self._graphs) | |||
# for WL subtree kernel | |||
@@ -55,11 +60,16 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _compute_gm_imap_unordered(self): | |||
if self._verbose >= 2: | |||
raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
import warnings | |||
warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
return self._compute_gm_series() | |||
def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('A part of the computation is parallelized.') | |||
self.__add_dummy_node_labels(g_list + [g1]) | |||
# for WL subtree kernel | |||
@@ -83,8 +93,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
if self._verbose >= 2: | |||
raise Warning('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
return self._compute_gm_imap_unordered() | |||
import warnings | |||
warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') | |||
return self._compute_kernel_list_series(g1, g_list) | |||
def _wrapper_kernel_list_do(self, itr): | |||
@@ -459,7 +470,14 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def __add_dummy_node_labels(self, Gn): | |||
if len(self.__node_labels) == 0: | |||
for G in Gn: | |||
nx.set_node_attributes(G, '0', 'dummy') | |||
self.__node_labels.append('dummy') | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
class WLSubtree(WeisfeilerLehman): | |||
def __init__(self, **kwargs): | |||
kwargs['base_kernel'] = 'subtree' | |||
super().__init__(**kwargs) |
@@ -18,6 +18,7 @@ from gklearn.ged.median import MedianGraphEstimator | |||
from gklearn.ged.median import constant_node_costs,mge_options_to_string | |||
from gklearn.gedlib import librariesImport, gedlibpy | |||
from gklearn.utils import Timer | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
# from gklearn.utils.dataset import Dataset | |||
class MedianPreimageGenerator(PreimageGenerator): | |||
@@ -81,7 +82,13 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
def run(self): | |||
self.__set_graph_kernel_by_name() | |||
self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], | |||
node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
node_attrs=self._dataset.node_attrs, | |||
edge_attrs=self._dataset.edge_attrs, | |||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
kernel_options=self._kernel_options) | |||
# record start time. | |||
start = time.time() | |||
@@ -180,6 +187,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
results['itrs'] = self.__itrs | |||
results['converged'] = self.__converged | |||
results['num_updates_ecc'] = self.__num_updates_ecc | |||
results['mge'] = {} | |||
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() | |||
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() | |||
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() | |||
return results | |||
@@ -653,27 +664,27 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
ged_env.init(init_option=self.__ged_options['init_option']) | |||
# Set up the madian graph estimator. | |||
mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
options = self.__mge_options.copy() | |||
if not 'seed' in options: | |||
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | |||
# Select the GED algorithm. | |||
mge.set_options(mge_options_to_string(options)) | |||
mge.set_label_names(node_labels=self._dataset.node_labels, | |||
self.__mge.set_options(mge_options_to_string(options)) | |||
self.__mge.set_label_names(node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
node_attrs=self._dataset.node_attrs, | |||
edge_attrs=self._dataset.edge_attrs) | |||
mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
# Run the estimator. | |||
mge.run(graph_ids, set_median_id, gen_median_id) | |||
self.__mge.run(graph_ids, set_median_id, gen_median_id) | |||
# Get SODs. | |||
self.__sod_set_median = mge.get_sum_of_distances('initialized') | |||
self.__sod_gen_median = mge.get_sum_of_distances('converged') | |||
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') | |||
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') | |||
# Get median graphs. | |||
self.__set_median = ged_env.get_nx_graph(set_median_id) | |||
@@ -722,43 +733,6 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | |||
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | |||
print('distance in kernel space for each graph in median set:', k_dis_median_set) | |||
def __set_graph_kernel_by_name(self): | |||
if self._kernel_options['name'] == 'ShortestPath': | |||
from gklearn.kernels import ShortestPath | |||
self._graph_kernel = ShortestPath(node_labels=self._dataset.node_labels, | |||
node_attrs=self._dataset.node_attrs, | |||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
**self._kernel_options) | |||
elif self._kernel_options['name'] == 'StructuralSP': | |||
from gklearn.kernels import StructuralSP | |||
self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
node_attrs=self._dataset.node_attrs, | |||
edge_attrs=self._dataset.edge_attrs, | |||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
**self._kernel_options) | |||
elif self._kernel_options['name'] == 'PathUpToH': | |||
from gklearn.kernels import PathUpToH | |||
self._graph_kernel = PathUpToH(node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
**self._kernel_options) | |||
elif self._kernel_options['name'] == 'Treelet': | |||
from gklearn.kernels import Treelet | |||
self._graph_kernel = Treelet(node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
**self._kernel_options) | |||
elif self._kernel_options['name'] == 'WeisfeilerLehman': | |||
from gklearn.kernels import WeisfeilerLehman | |||
self._graph_kernel = WeisfeilerLehman(node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
**self._kernel_options) | |||
else: | |||
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WeisfeilerLehman".') | |||
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
@@ -25,7 +25,7 @@ import networkx as nx | |||
import os | |||
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): | |||
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||
import os.path | |||
from gklearn.preimage import MedianPreimageGenerator | |||
from gklearn.utils import split_dataset_by_target | |||
@@ -38,7 +38,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
dataset_all.trim_dataset(edge_required=edge_required) | |||
if irrelevant_labels is not None: | |||
dataset_all.remove_labels(**irrelevant_labels) | |||
# dataset_all.cut_graphs(range(0, 10)) | |||
if cut_range is not None: | |||
dataset_all.cut_graphs(cut_range) | |||
datasets = split_dataset_by_target(dataset_all) | |||
if save_results: | |||
@@ -57,6 +58,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
itrs_list = [] | |||
converged_list = [] | |||
num_updates_ecc_list = [] | |||
mge_decrease_order_list = [] | |||
mge_increase_order_list = [] | |||
mge_converged_order_list = [] | |||
nb_sod_sm2gm = [0, 0, 0] | |||
nb_dis_k_sm2gm = [0, 0, 0] | |||
nb_dis_k_gi2sm = [0, 0, 0] | |||
@@ -148,7 +152,10 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
results['runtime_precompute_gm'], results['runtime_optimize_ec'], | |||
results['runtime_generate_preimage'], results['runtime_total'], | |||
results['itrs'], results['converged'], | |||
results['num_updates_ecc']]) | |||
results['num_updates_ecc'], | |||
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||
results['mge']['num_increase_order'] > 0, | |||
results['mge']['num_converged_descents'] > 0]) | |||
f_detail.close() | |||
# compute result summary. | |||
@@ -164,6 +171,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
itrs_list.append(results['itrs']) | |||
converged_list.append(results['converged']) | |||
num_updates_ecc_list.append(results['num_updates_ecc']) | |||
mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0) | |||
mge_increase_order_list.append(results['mge']['num_increase_order'] > 0) | |||
mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0) | |||
# # SOD SM -> GM | |||
if results['sod_set_median'] > results['sod_gen_median']: | |||
nb_sod_sm2gm[0] += 1 | |||
@@ -210,7 +220,11 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
results['runtime_precompute_gm'], results['runtime_optimize_ec'], | |||
results['runtime_generate_preimage'], results['runtime_total'], | |||
results['itrs'], results['converged'], | |||
results['num_updates_ecc'], nb_sod_sm2gm, | |||
results['num_updates_ecc'], | |||
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||
results['mge']['num_increase_order'] > 0, | |||
results['mge']['num_converged_descents'] > 0, | |||
nb_sod_sm2gm, | |||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | |||
f_summary.close() | |||
@@ -256,6 +270,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
itrs_mean = np.mean(itrs_list) | |||
num_converged = np.sum(converged_list) | |||
num_updates_ecc_mean = np.mean(num_updates_ecc_list) | |||
num_mge_decrease_order = np.sum(mge_decrease_order_list) | |||
num_mge_increase_order = np.sum(mge_increase_order_list) | |||
num_mge_converged = np.sum(mge_converged_order_list) | |||
sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) | |||
dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | |||
dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | |||
@@ -270,7 +287,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
dis_k_gi2sm_mean, dis_k_gi2gm_mean, | |||
time_precompute_gm_mean, time_optimize_ec_mean, | |||
time_generate_mean, time_total_mean, itrs_mean, | |||
num_converged, num_updates_ecc_mean]) | |||
num_converged, num_updates_ecc_mean, | |||
num_mge_decrease_order, num_mge_increase_order, | |||
num_mge_converged]) | |||
f_summary.close() | |||
# save total pairwise kernel distances. | |||
@@ -300,7 +319,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | |||
'time optimize ec', 'time generate preimage', 'time total', | |||
'itrs', 'converged', 'num updates ecc']) | |||
'itrs', 'converged', 'num updates ecc', 'mge decrease order', | |||
'mge increase order', 'mge converged']) | |||
f_detail.close() | |||
# fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | |||
@@ -312,7 +332,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | |||
'time generate preimage', 'time total', 'itrs', 'num converged', | |||
'num updates ecc', '# SOD SM -> GM', '# dis_k SM -> GM', | |||
'num updates ecc', 'mge num decrease order', 'mge num increase order', | |||
'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', | |||
'# dis_k gi -> SM', '# dis_k gi -> GM']) | |||
# 'repeats better SOD SM -> GM', | |||
# 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | |||
@@ -418,6 +439,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel=' | |||
Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | |||
height=4, base_kernel='subtree', parallel=None, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
else: | |||
raise Exception('The graph kernel "', graph_kernel, '" is not defined.') | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
@@ -260,20 +260,20 @@ def test_Treelet(ds_name, parallel): | |||
@pytest.mark.parametrize('ds_name', ['Acyclic']) | |||
#@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) | |||
@pytest.mark.parametrize('base_kernel', ['subtree']) | |||
# @pytest.mark.parametrize('base_kernel', ['subtree']) | |||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
def test_WeisfeilerLehman(ds_name, parallel, base_kernel): | |||
"""Test Weisfeiler-Lehman kernel. | |||
def test_WLSubtree(ds_name, parallel): | |||
"""Test Weisfeiler-Lehman subtree kernel. | |||
""" | |||
from gklearn.kernels import WeisfeilerLehman | |||
from gklearn.kernels import WLSubtree | |||
dataset = chooseDataset(ds_name) | |||
try: | |||
graph_kernel = WeisfeilerLehman(node_labels=dataset.node_labels, | |||
graph_kernel = WLSubtree(node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
height=2, base_kernel=base_kernel) | |||
height=2) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
@@ -20,4 +20,5 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||
from gklearn.utils.timer import Timer | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
from gklearn.utils.utils import compute_gram_matrices_by_class | |||
from gklearn.utils.utils import SpecialLabel | |||
from gklearn.utils.trie import Trie |
@@ -56,13 +56,14 @@ class Dataset(object): | |||
self.__node_attrs = label_names['node_attrs'] | |||
self.__edge_labels = label_names['edge_labels'] | |||
self.__edge_attrs = label_names['edge_attrs'] | |||
self.clean_labels() | |||
def load_graphs(self, graphs, targets=None): | |||
# this has to be followed by set_labels(). | |||
self.__graphs = graphs | |||
self.__targets = targets | |||
# self.set_labels_attrs() | |||
# self.set_labels_attrs() # @todo | |||
def load_predefined_dataset(self, ds_name): | |||
@@ -89,6 +90,9 @@ class Dataset(object): | |||
elif ds_name == 'Cuneiform': | |||
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'DD': | |||
ds_file = current_path + '../../datasets/DD/DD_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Fingerprint': | |||
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
@@ -113,6 +117,9 @@ class Dataset(object): | |||
elif ds_name == 'MUTAG': | |||
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'PAH': | |||
ds_file = current_path + '../../datasets/PAH/dataset.ds' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'SYNTHETIC': | |||
pass | |||
elif ds_name == 'SYNTHETICnew': | |||
@@ -120,11 +127,14 @@ class Dataset(object): | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Synthie': | |||
pass | |||
else: | |||
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | |||
self.__node_labels = label_names['node_labels'] | |||
self.__node_attrs = label_names['node_attrs'] | |||
self.__edge_labels = label_names['edge_labels'] | |||
self.__edge_attrs = label_names['edge_attrs'] | |||
self.clean_labels() | |||
def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | |||
@@ -138,27 +148,27 @@ class Dataset(object): | |||
# @todo: remove labels which have only one possible values. | |||
if node_labels is None: | |||
self.__node_labels = self.__graphs[0].graph['node_labels'] | |||
# # graphs are considered node unlabeled if all nodes have the same label. | |||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||
# # graphs are considered node unlabeled if all nodes have the same label. | |||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||
if node_attrs is None: | |||
self.__node_attrs = self.__graphs[0].graph['node_attrs'] | |||
# for G in Gn: | |||
# for n in G.nodes(data=True): | |||
# if 'attributes' in n[1]: | |||
# return len(n[1]['attributes']) | |||
# return 0 | |||
# for G in Gn: | |||
# for n in G.nodes(data=True): | |||
# if 'attributes' in n[1]: | |||
# return len(n[1]['attributes']) | |||
# return 0 | |||
if edge_labels is None: | |||
self.__edge_labels = self.__graphs[0].graph['edge_labels'] | |||
# # graphs are considered edge unlabeled if all edges have the same label. | |||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||
# # graphs are considered edge unlabeled if all edges have the same label. | |||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||
if edge_attrs is None: | |||
self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | |||
# for G in Gn: | |||
# if nx.number_of_edges(G) > 0: | |||
# for e in G.edges(data=True): | |||
# if 'attributes' in e[2]: | |||
# return len(e[2]['attributes']) | |||
# return 0 | |||
# for G in Gn: | |||
# if nx.number_of_edges(G) > 0: | |||
# for e in G.edges(data=True): | |||
# if 'attributes' in e[2]: | |||
# return len(e[2]['attributes']) | |||
# return 0 | |||
def get_dataset_infos(self, keys=None): | |||
@@ -323,7 +333,7 @@ class Dataset(object): | |||
if self.__node_label_nums is None: | |||
self.__node_label_nums = {} | |||
for node_label in self.__node_labels: | |||
self.__node_label_nums[node_label] = self.get_node_label_num(node_label) | |||
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) | |||
infos['node_label_nums'] = self.__node_label_nums | |||
if 'edge_label_dim' in keys: | |||
@@ -335,7 +345,7 @@ class Dataset(object): | |||
if self.__edge_label_nums is None: | |||
self.__edge_label_nums = {} | |||
for edge_label in self.__edge_labels: | |||
self.__edge_label_nums[edge_label] = self.get_edge_label_num(edge_label) | |||
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) | |||
infos['edge_label_nums'] = self.__edge_label_nums | |||
if 'directed' in keys or 'substructures' in keys: | |||
@@ -411,33 +421,95 @@ class Dataset(object): | |||
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
node_labels = [item for item in node_labels if item in self.__node_labels] | |||
edge_labels = [item for item in edge_labels if item in self.__edge_labels] | |||
node_attrs = [item for item in node_attrs if item in self.__node_attrs] | |||
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs] | |||
for g in self.__graphs: | |||
for nd in g.nodes(): | |||
for nl in node_labels: | |||
del g.nodes[nd][nl] | |||
del g.nodes[nd][nl] | |||
for na in node_attrs: | |||
del g.nodes[nd][na] | |||
for ed in g.edges(): | |||
for el in edge_labels: | |||
del g.edges[ed][el] | |||
del g.edges[ed][el] | |||
for ea in edge_attrs: | |||
del g.edges[ed][ea] | |||
del g.edges[ed][ea] | |||
if len(node_labels) > 0: | |||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
if len(edge_labels) > 0: | |||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
if len(node_attrs) > 0: | |||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
if len(edge_attrs) > 0: | |||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
def clean_labels(self): | |||
labels = [] | |||
for name in self.__node_labels: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_node_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for nd in G.nodes(): | |||
del G.nodes[nd][name] | |||
self.__node_labels = labels | |||
labels = [] | |||
for name in self.__edge_labels: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for ed in G.edges(): | |||
del G.edges[ed][name] | |||
self.__edge_labels = labels | |||
labels = [] | |||
for name in self.__node_attrs: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_node_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for nd in G.nodes(): | |||
del G.nodes[nd][name] | |||
self.__node_attrs = labels | |||
labels = [] | |||
for name in self.__edge_attrs: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for ed in G.edges(): | |||
del G.edges[ed][name] | |||
self.__edge_attrs = labels | |||
def cut_graphs(self, range_): | |||
self.__graphs = [self.__graphs[i] for i in range_] | |||
if self.__targets is not None: | |||
self.__targets = [self.__targets[i] for i in range_] | |||
# @todo | |||
# self.set_labels_attrs() | |||
self.clean_labels() | |||
def trim_dataset(self, edge_required=False): | |||
@@ -448,8 +520,7 @@ class Dataset(object): | |||
idx = [p[0] for p in trimed_pairs] | |||
self.__graphs = [p[1] for p in trimed_pairs] | |||
self.__targets = [self.__targets[i] for i in idx] | |||
# @todo | |||
# self.set_labels_attrs() | |||
self.clean_labels() | |||
def __get_dataset_size(self): | |||
@@ -652,4 +723,5 @@ def split_dataset_by_target(dataset): | |||
sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | |||
sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | |||
datasets.append(sub_dataset) | |||
# @todo: clean_labels? | |||
return datasets |
@@ -63,7 +63,7 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs): | |||
return data, y, label_names | |||
def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None): | |||
def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', **kwargs): | |||
"""Save list of graphs. | |||
""" | |||
import os | |||
@@ -73,22 +73,22 @@ def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=Non | |||
if not os.path.exists(dirname_ds) : | |||
os.makedirs(dirname_ds) | |||
if xparams is not None and 'graph_dir' in xparams: | |||
graph_dir = xparams['graph_dir'] + '/' | |||
if 'graph_dir' in kwargs: | |||
graph_dir = kwargs['graph_dir'] + '/' | |||
if not os.path.exists(graph_dir): | |||
os.makedirs(graph_dir) | |||
del kwargs['graph_dir'] | |||
else: | |||
graph_dir = dirname_ds | |||
if group == 'xml' and gformat == 'gxl': | |||
kwargs = {'method': xparams['method']} if xparams is not None else {} | |||
with open(filename + '.xml', 'w') as fgroup: | |||
fgroup.write("<?xml version=\"1.0\"?>") | |||
fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | |||
fgroup.write("\n<GraphCollection>") | |||
for idx, g in enumerate(Gn): | |||
fname_tmp = "graph" + str(idx) + ".gxl" | |||
saveGXL(g, graph_dir + fname_tmp, **kwargs) | |||
save_gxl(g, graph_dir + fname_tmp, **kwargs) | |||
fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | |||
fgroup.write("\n</GraphCollection>") | |||
fgroup.close() | |||
@@ -226,7 +226,7 @@ def load_gxl(filename): # @todo: directed graphs. | |||
return g, label_names | |||
def saveGXL(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
if method == 'default': | |||
gxl_file = open(filename, 'w') | |||
gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | |||
@@ -1,6 +1,7 @@ | |||
import networkx as nx | |||
import numpy as np | |||
from copy import deepcopy | |||
from enum import Enum, auto | |||
#from itertools import product | |||
# from tqdm import tqdm | |||
@@ -299,21 +300,59 @@ def get_edge_labels(Gn, edge_label): | |||
def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): | |||
if name == 'structuralspkernel': | |||
if name == 'ShortestPath': | |||
from gklearn.kernels import ShortestPath | |||
graph_kernel = ShortestPath(node_labels=node_labels, | |||
node_attrs=node_attrs, | |||
ds_infos=ds_infos, | |||
**kernel_options) | |||
elif name == 'StructuralSP': | |||
from gklearn.kernels import StructuralSP | |||
graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels, | |||
node_attrs=node_attrs, edge_attrs=edge_attrs, | |||
ds_infos=ds_infos, **kernel_options) | |||
graph_kernel = StructuralSP(node_labels=node_labels, | |||
edge_labels=edge_labels, | |||
node_attrs=node_attrs, | |||
edge_attrs=edge_attrs, | |||
ds_infos=ds_infos, | |||
**kernel_options) | |||
elif name == 'PathUpToH': | |||
from gklearn.kernels import PathUpToH | |||
graph_kernel = PathUpToH(node_labels=node_labels, | |||
edge_labels=edge_labels, | |||
ds_infos=ds_infos, | |||
**kernel_options) | |||
elif name == 'Treelet': | |||
from gklearn.kernels import Treelet | |||
graph_kernel = Treelet(node_labels=node_labels, | |||
edge_labels=edge_labels, | |||
ds_infos=ds_infos, | |||
**kernel_options) | |||
elif name == 'WLSubtree': | |||
from gklearn.kernels import WLSubtree | |||
graph_kernel = WLSubtree(node_labels=node_labels, | |||
edge_labels=edge_labels, | |||
ds_infos=ds_infos, | |||
**kernel_options) | |||
elif name == 'WeisfeilerLehman': | |||
from gklearn.kernels import WeisfeilerLehman | |||
graph_kernel = WeisfeilerLehman(node_labels=node_labels, | |||
edge_labels=edge_labels, | |||
ds_infos=ds_infos, | |||
**kernel_options) | |||
else: | |||
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet", "WLSubtree", "WeisfeilerLehman".') | |||
return graph_kernel | |||
def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None): | |||
def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False): | |||
import os | |||
from gklearn.utils import Dataset, split_dataset_by_target | |||
# 1. get dataset. | |||
print('1. getting dataset...') | |||
dataset_all = Dataset() | |||
dataset_all.load_predefined_dataset(ds_name) | |||
dataset_all.trim_dataset(edge_required=edge_required) | |||
if not irrelevant_labels is None: | |||
dataset_all.remove_labels(**irrelevant_labels) | |||
# dataset_all.cut_graphs(range(0, 10)) | |||
@@ -349,6 +388,8 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||
print() | |||
print('4. saving results...') | |||
if save_results: | |||
if not os.path.exists(dir_save): | |||
os.makedirs(dir_save) | |||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | |||
print('\ncomplete.') | |||
@@ -424,4 +465,10 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||
attributes = [] | |||
for ed, attrs in G.edges(data=True): | |||
attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||
return attributes | |||
return attributes | |||
class SpecialLabel(Enum): | |||
"""can be used to define special labels. | |||
""" | |||
DUMMY = auto # The dummy label. |