@@ -1,9 +1,5 @@ | |||
environment: | |||
matrix: | |||
- PYTHON: "C:\\Python33" | |||
- PYTHON: "C:\\Python33-x64" | |||
- PYTHON: "C:\\Python34" | |||
- PYTHON: "C:\\Python34-x64" | |||
- PYTHON: "C:\\Python35" | |||
- PYTHON: "C:\\Python35-x64" | |||
- PYTHON: "C:\\Python36" | |||
@@ -1 +1,2 @@ | |||
from gklearn.ged.env.common_types import AlgorithmState | |||
from gklearn.ged.env.common_types import AlgorithmState | |||
from gklearn.ged.env.node_map import NodeMap |
@@ -0,0 +1,68 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Apr 22 11:31:26 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
class NodeMap(object): | |||
def __init__(self, num_nodes_g, num_nodes_h): | |||
self.__forward_map = [np.inf] * num_nodes_g | |||
self.__backward_map = [np.inf] * num_nodes_h | |||
self.__induced_cost = np.inf | |||
def num_source_nodes(self): | |||
return len(self.__forward_map) | |||
def num_target_nodes(self): | |||
return len(self.__backward_map) | |||
def image(self, node): | |||
if node < len(self.__forward_map): | |||
return self.__forward_map[node] | |||
else: | |||
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||
return np.inf | |||
def pre_image(self, node): | |||
if node < len(self.__backward_map): | |||
return self.__backward_map[node] | |||
else: | |||
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | |||
return np.inf | |||
def get_forward_map(self): | |||
return self.__forward_map | |||
def get_backward_map(self): | |||
return self.__backward_map | |||
def add_assignment(self, i, k): | |||
if i != np.inf: | |||
if i < len(self.__forward_map): | |||
self.__forward_map[i] = k | |||
else: | |||
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | |||
if k != np.inf: | |||
if k < len(self.__backward_map): | |||
self.__backward_map[k] = i | |||
else: | |||
raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') | |||
def set_induced_cost(self, induced_cost): | |||
self.__induced_cost = induced_cost | |||
def induced_cost(self): | |||
return self.__induced_cost |
@@ -47,6 +47,7 @@ class MedianGraphEstimator(object): | |||
self.__desired_num_random_inits = 10 | |||
self.__use_real_randomness = True | |||
self.__seed = 0 | |||
self.__update_order = True | |||
self.__refine = True | |||
self.__time_limit_in_sec = 0 | |||
self.__epsilon = 0.0001 | |||
@@ -126,6 +127,16 @@ class MedianGraphEstimator(object): | |||
else: | |||
raise Exception('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"') | |||
elif opt_name == 'update-order': | |||
if opt_val == 'TRUE': | |||
self.__update_order = True | |||
elif opt_val == 'FALSE': | |||
self.__update_order = False | |||
else: | |||
raise Exception('Invalid argument "' + opt_val + '" for option update-order. Usage: options = "[--update-order TRUE|FALSE] [...]"') | |||
elif opt_name == 'refine': | |||
if opt_val == 'TRUE': | |||
self.__refine = True | |||
@@ -298,11 +309,11 @@ class MedianGraphEstimator(object): | |||
for graph_id in graph_ids: | |||
# @todo: get_nx_graph() function may need to be modified according to the coming code. | |||
graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False) | |||
# print(self.__ged_env.get_graph_internal_id(0)) | |||
# print(graphs[0].graph) | |||
# print(graphs[0].nodes(data=True)) | |||
# print(graphs[0].edges(data=True)) | |||
# print(nx.adjacency_matrix(graphs[0])) | |||
# print(self.__ged_env.get_graph_internal_id(0)) | |||
# print(graphs[0].graph) | |||
# print(graphs[0].nodes(data=True)) | |||
# print(graphs[0].edges(data=True)) | |||
# print(nx.adjacency_matrix(graphs[0])) | |||
# Construct initial medians. | |||
@@ -310,10 +321,10 @@ class MedianGraphEstimator(object): | |||
self.__construct_initial_medians(graph_ids, timer, medians) | |||
end_init = time.time() | |||
self.__runtime_initialized = end_init - start | |||
# print(medians[0].graph) | |||
# print(medians[0].nodes(data=True)) | |||
# print(medians[0].edges(data=True)) | |||
# print(nx.adjacency_matrix(medians[0])) | |||
# print(medians[0].graph) | |||
# print(medians[0].nodes(data=True)) | |||
# print(medians[0].edges(data=True)) | |||
# print(nx.adjacency_matrix(medians[0])) | |||
# Reset information about iterations and number of times the median decreases and increases. | |||
self.__itrs = [0] * len(medians) | |||
@@ -353,12 +364,12 @@ class MedianGraphEstimator(object): | |||
# Compute node maps and sum of distances for initial median. | |||
self.__sum_of_distances = 0 | |||
self.__node_maps_from_median.clear() # @todo | |||
self.__node_maps_from_median.clear() | |||
for graph_id in graph_ids: | |||
self.__ged_env.run_method(gen_median_id, graph_id) | |||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id) | |||
# print(self.__node_maps_from_median[graph_id]) | |||
self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary. | |||
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost() | |||
# print(self.__sum_of_distances) | |||
# Print information about current iteration. | |||
if self.__print_to_stdout == 2: | |||
@@ -366,7 +377,7 @@ class MedianGraphEstimator(object): | |||
self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances) | |||
self.__ged_env.load_nx_graph(median, set_median_id) | |||
# print(self.__best_init_sum_of_distances) | |||
print(self.__best_init_sum_of_distances) | |||
# Print information about current iteration. | |||
if self.__print_to_stdout == 2: | |||
@@ -391,10 +402,11 @@ class MedianGraphEstimator(object): | |||
# Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!! | |||
median_modified = self.__update_median(graphs, median) | |||
if not median_modified or self.__itrs[median_pos] == 0: | |||
decreased_order = self.__decrease_order(graphs, median) | |||
if not decreased_order or self.__itrs[median_pos] == 0: | |||
increased_order = False | |||
if self.__update_order: | |||
if not median_modified or self.__itrs[median_pos] == 0: | |||
decreased_order = self.__decrease_order(graphs, median) | |||
if not decreased_order or self.__itrs[median_pos] == 0: | |||
increased_order = False | |||
# Update the number of iterations without update of the median. | |||
if median_modified or decreased_order or increased_order: | |||
@@ -421,11 +433,11 @@ class MedianGraphEstimator(object): | |||
# Compute induced costs of the old node maps w.r.t. the updated median. | |||
for graph_id in graph_ids: | |||
# print(self.__ged_env.get_induced_cost(gen_median_id, graph_id)) | |||
# @todo: watch out if compute_induced_cost is correct, this may influence: increase/decrease order, induced_cost() in the following code.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |||
self.__ged_env.compute_induced_cost(gen_median_id, graph_id) | |||
# print('---------------------------------------') | |||
# print(self.__ged_env.get_induced_cost(gen_median_id, graph_id)) | |||
# print(self.__node_maps_from_median[graph_id].induced_cost()) | |||
self.__ged_env.compute_induced_cost(gen_median_id, graph_id, self.__node_maps_from_median[graph_id]) | |||
# print('---------------------------------------') | |||
# print(self.__node_maps_from_median[graph_id].induced_cost()) | |||
# @todo:!!!!!!!!!!!!!!!!!!!!!!!!!!!!This value is a slight different from the c++ program, which might be a bug! Use it very carefully! | |||
# Print information about current iteration. | |||
if self.__print_to_stdout == 2: | |||
@@ -439,8 +451,9 @@ class MedianGraphEstimator(object): | |||
# Update the sum of distances. | |||
old_sum_of_distances = self.__sum_of_distances | |||
self.__sum_of_distances = 0 | |||
for graph_id in self.__node_maps_from_median: | |||
self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: see above. | |||
for graph_id, node_map in self.__node_maps_from_median.items(): | |||
self.__sum_of_distances += node_map.induced_cost() | |||
# print(self.__sum_of_distances) | |||
# Print information about current iteration. | |||
if self.__print_to_stdout == 2: | |||
@@ -460,7 +473,7 @@ class MedianGraphEstimator(object): | |||
# Update the best median. | |||
if self.__sum_of_distances < best_sum_of_distances: | |||
best_sum_of_distances = self.__sum_of_distances | |||
node_maps_from_best_median = self.__node_maps_from_median | |||
node_maps_from_best_median = self.__node_maps_from_median.copy() # @todo: this is a shallow copy, not sure if it is enough. | |||
best_median = median | |||
# Update the number of converged descents. | |||
@@ -543,6 +556,7 @@ class MedianGraphEstimator(object): | |||
self.__desired_num_random_inits = 10 | |||
self.__use_real_randomness = True | |||
self.__seed = 0 | |||
self.__update_order = True | |||
self.__refine = True | |||
self.__time_limit_in_sec = 0 | |||
self.__epsilon = 0.0001 | |||
@@ -568,16 +582,16 @@ class MedianGraphEstimator(object): | |||
self.__compute_medoid(graph_ids, timer, initial_medians) | |||
elif self.__init_type == 'MAX': | |||
pass # @todo | |||
# compute_max_order_graph_(graph_ids, initial_medians) | |||
# compute_max_order_graph_(graph_ids, initial_medians) | |||
elif self.__init_type == 'MIN': | |||
pass # @todo | |||
# compute_min_order_graph_(graph_ids, initial_medians) | |||
# compute_min_order_graph_(graph_ids, initial_medians) | |||
elif self.__init_type == 'MEAN': | |||
pass # @todo | |||
# compute_mean_order_graph_(graph_ids, initial_medians) | |||
# compute_mean_order_graph_(graph_ids, initial_medians) | |||
else: | |||
pass # @todo | |||
# sample_initial_medians_(graph_ids, initial_medians) | |||
# sample_initial_medians_(graph_ids, initial_medians) | |||
# Print information about current iteration. | |||
if self.__print_to_stdout == 2: | |||
@@ -655,20 +669,20 @@ class MedianGraphEstimator(object): | |||
# Iterate through all nodes of the median. | |||
for i in range(0, nx.number_of_nodes(median)): | |||
# print('i: ', i) | |||
# print('i: ', i) | |||
# Collect the labels of the substituted nodes. | |||
node_labels = [] | |||
for graph_id, graph in graphs.items(): | |||
# print('graph_id: ', graph_id) | |||
# print(self.__node_maps_from_median[graph_id]) | |||
k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i) | |||
# print('k: ', k) | |||
# print('graph_id: ', graph_id) | |||
# print(self.__node_maps_from_median[graph_id]) | |||
k = self.__node_maps_from_median[graph_id].image(i) | |||
# print('k: ', k) | |||
if k != np.inf: | |||
node_labels.append(graph.nodes[k]) | |||
# Compute the median label and update the median. | |||
if len(node_labels) > 0: | |||
# median_label = self.__ged_env.get_median_node_label(node_labels) | |||
# median_label = self.__ged_env.get_median_node_label(node_labels) | |||
median_label = self.__get_median_node_label(node_labels) | |||
if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon: | |||
nx.set_node_attributes(median, {i: median_label}) | |||
@@ -679,10 +693,10 @@ class MedianGraphEstimator(object): | |||
if self.__print_to_stdout == 2: | |||
print('edges ... ', end='') | |||
# Clear the adjacency lists of the median and reset number of edges to 0. | |||
median_edges = list(median.edges) | |||
for (head, tail) in median_edges: | |||
median.remove_edge(head, tail) | |||
# # Clear the adjacency lists of the median and reset number of edges to 0. | |||
# median_edges = list(median.edges) | |||
# for (head, tail) in median_edges: | |||
# median.remove_edge(head, tail) | |||
# @todo: what if edge is not labeled? | |||
# Iterate through all possible edges (i,j) of the median. | |||
@@ -692,8 +706,8 @@ class MedianGraphEstimator(object): | |||
# Collect the labels of the edges to which (i,j) is mapped by the node maps. | |||
edge_labels = [] | |||
for graph_id, graph in graphs.items(): | |||
k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i) | |||
l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j) | |||
k = self.__node_maps_from_median[graph_id].image(i) | |||
l = self.__node_maps_from_median[graph_id].image(j) | |||
if k != np.inf and l != np.inf: | |||
if graph.has_edge(k, l): | |||
edge_labels.append(graph.edges[(k, l)]) | |||
@@ -711,11 +725,13 @@ class MedianGraphEstimator(object): | |||
rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label) | |||
# Update the median. | |||
if median.has_edge(i, j): | |||
median.remove_edge(i, j) | |||
if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs): | |||
median.add_edge(i, j, **median_label) | |||
else: | |||
if median.has_edge(i, j): | |||
median.remove_edge(i, j) | |||
# else: | |||
# if median.has_edge(i, j): | |||
# median.remove_edge(i, j) | |||
def __update_node_maps(self): | |||
@@ -725,10 +741,12 @@ class MedianGraphEstimator(object): | |||
# Update the node maps. | |||
node_maps_were_modified = False | |||
for graph_id in self.__node_maps_from_median: | |||
for graph_id, node_map in self.__node_maps_from_median.items(): | |||
self.__ged_env.run_method(self.__median_id, graph_id) | |||
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < self.__ged_env.get_induced_cost(self.__median_id, graph_id) - self.__epsilon: # @todo: see above. | |||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) # @todo: node_map may not assigned. | |||
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < node_map.induced_cost() - self.__epsilon: | |||
# xxx = self.__node_maps_from_median[graph_id] | |||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) | |||
# yyy = self.__node_maps_from_median[graph_id] | |||
node_maps_were_modified = True | |||
# Print information about current iteration. | |||
if self.__print_to_stdout == 2: | |||
@@ -748,13 +766,13 @@ class MedianGraphEstimator(object): | |||
print('Trying to decrease order: ... ', end='') | |||
# Initialize ID of the node that is to be deleted. | |||
id_deleted_node = None # @todo: or np.inf | |||
id_deleted_node = [None] # @todo: or np.inf | |||
decreased_order = False | |||
# Decrease the order as long as the best deletion delta is negative. | |||
while self.__compute_best_deletion_delta(graphs, median, [id_deleted_node]) < -self.__epsilon: | |||
while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon: # @todo | |||
decreased_order = True | |||
self.__delete_node_from_median(id_deleted_node, median) | |||
self.__delete_node_from_median(id_deleted_node[0], median) | |||
# Print information about current iteration. | |||
if self.__print_to_stdout == 2: | |||
@@ -777,7 +795,7 @@ class MedianGraphEstimator(object): | |||
delta -= self.__node_del_cost | |||
else: | |||
delta += self.__node_ins_cost - self.__ged_env.get_node_rel_cost(median.nodes[i], graph.nodes[k]) | |||
for j, j_label in median[i]: | |||
for j, j_label in median[i].items(): | |||
l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j) | |||
if k == np.inf or l == np.inf: | |||
delta -= self.__edge_del_cost | |||
@@ -790,32 +808,34 @@ class MedianGraphEstimator(object): | |||
if delta < best_delta - self.__epsilon: | |||
best_delta = delta | |||
id_deleted_node[0] = i | |||
id_deleted_node[0] = i # @todo: | |||
return best_delta | |||
def __delete_node_from_median(self, id_deleted_node, median): | |||
# Update the nodes of the median. | |||
median.remove_node(id_deleted_node) # @todo: test if it is right. | |||
def __delete_node_from_median(self, id_deleted_node, median): # @todo: update env.node_map? | |||
# Update the median. | |||
median.remove_node(id_deleted_node) | |||
# Update the node maps. | |||
for _, node_map in self.__node_maps_from_median.items(): | |||
new_node_map = {nx.number_of_nodes(median): ''} # @todo | |||
is_unassigned_target_node = ['', True] | |||
new_node_map = [] # @todo | |||
is_unassigned_target_node = [True] * len(node_map) | |||
for i in range(0, nx.number_of_nodes(median)): | |||
if i != id_deleted_node: | |||
new_i = (i if i < id_deleted_node else i - 1) | |||
k = self.__get_node_image_from_map(node_map, i) | |||
new_node_map["ds"] # @todo | |||
new_node_map.append((new_i, k)) # @todo | |||
if k != np.inf: | |||
is_unassigned_target_node[k] = False | |||
for k in range(0, ''): | |||
for k in range(0, len(node_map)): | |||
if is_unassigned_target_node[k]: | |||
new_node_map.sdf[] | |||
node_map = new_node_map | |||
new_node_map.append(np.inf, k) | |||
node_map = new_node_map # @todo | |||
# Increase overall number of decreases. | |||
self.__num_decrease_order += 1 | |||
def __improve_sum_of_distances(self, timer): | |||
pass | |||
@@ -825,37 +845,37 @@ class MedianGraphEstimator(object): | |||
return self.__median_id != np.inf | |||
def __get_node_image_from_map(self, node_map, node): | |||
""" | |||
Return ID of the node mapping of `node` in `node_map`. | |||
# def __get_node_image_from_map(self, node_map, node): | |||
# """ | |||
# Return ID of the node mapping of `node` in `node_map`. | |||
Parameters | |||
---------- | |||
node_map : list[tuple(int, int)] | |||
List of node maps where the mapping node is found. | |||
node : int | |||
The mapping node of this node is returned | |||
# Parameters | |||
# ---------- | |||
# node_map : list[tuple(int, int)] | |||
# List of node maps where the mapping node is found. | |||
# | |||
# node : int | |||
# The mapping node of this node is returned | |||
Raises | |||
------ | |||
Exception | |||
If the node with ID `node` is not contained in the source nodes of the node map. | |||
# Raises | |||
# ------ | |||
# Exception | |||
# If the node with ID `node` is not contained in the source nodes of the node map. | |||
Returns | |||
------- | |||
int | |||
ID of the mapping of `node`. | |||
Notes | |||
----- | |||
This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function. | |||
""" | |||
if node < len(node_map): | |||
return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf | |||
else: | |||
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||
return np.inf | |||
# Returns | |||
# ------- | |||
# int | |||
# ID of the mapping of `node`. | |||
# | |||
# Notes | |||
# ----- | |||
# This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function. | |||
# """ | |||
# if node < len(node_map): | |||
# return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf | |||
# else: | |||
# raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||
# return np.inf | |||
def __are_graphs_equal(self, g1, g2): | |||
@@ -958,9 +978,9 @@ class MedianGraphEstimator(object): | |||
for label in labels: | |||
coords = {} | |||
for key, val in label.items(): | |||
label = float(val) | |||
sums[key] += label | |||
coords[key] = label | |||
label_f = float(val) | |||
sums[key] += label_f | |||
coords[key] = label_f | |||
labels_as_coords.append(coords) | |||
median = {} | |||
for key, val in sums.items(): | |||
@@ -980,7 +1000,7 @@ class MedianGraphEstimator(object): | |||
norm = 0 | |||
for key, val in label_as_coord.items(): | |||
norm += (val - median[key]) ** 2 | |||
norm += np.sqrt(norm) | |||
norm = np.sqrt(norm) | |||
if norm > 0: | |||
for key, val in label_as_coord.items(): | |||
numerator[key] += val / norm | |||
@@ -1005,64 +1025,64 @@ class MedianGraphEstimator(object): | |||
return median_label | |||
# def __get_median_edge_label_symbolic(self, edge_labels): | |||
# pass | |||
# def __get_median_edge_label_symbolic(self, edge_labels): | |||
# pass | |||
# def __get_median_edge_label_nonsymbolic(self, edge_labels): | |||
# if len(edge_labels) == 0: | |||
# return {} | |||
# else: | |||
# # Transform the labels into coordinates and compute mean label as initial solution. | |||
# edge_labels_as_coords = [] | |||
# sums = {} | |||
# for key, val in edge_labels[0].items(): | |||
# sums[key] = 0 | |||
# for edge_label in edge_labels: | |||
# coords = {} | |||
# for key, val in edge_label.items(): | |||
# label = float(val) | |||
# sums[key] += label | |||
# coords[key] = label | |||
# edge_labels_as_coords.append(coords) | |||
# median = {} | |||
# for key, val in sums.items(): | |||
# median[key] = val / len(edge_labels) | |||
# | |||
# # Run main loop of Weiszfeld's Algorithm. | |||
# epsilon = 0.0001 | |||
# delta = 1.0 | |||
# num_itrs = 0 | |||
# all_equal = False | |||
# while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): | |||
# numerator = {} | |||
# for key, val in sums.items(): | |||
# numerator[key] = 0 | |||
# denominator = 0 | |||
# for edge_label_as_coord in edge_labels_as_coords: | |||
# norm = 0 | |||
# for key, val in edge_label_as_coord.items(): | |||
# norm += (val - median[key]) ** 2 | |||
# norm += np.sqrt(norm) | |||
# if norm > 0: | |||
# for key, val in edge_label_as_coord.items(): | |||
# numerator[key] += val / norm | |||
# denominator += 1.0 / norm | |||
# if denominator == 0: | |||
# all_equal = True | |||
# else: | |||
# new_median = {} | |||
# delta = 0.0 | |||
# for key, val in numerator.items(): | |||
# this_median = val / denominator | |||
# new_median[key] = this_median | |||
# delta += np.abs(median[key] - this_median) | |||
# median = new_median | |||
# | |||
# num_itrs += 1 | |||
# | |||
# # Transform the solution to ged::GXLLabel and return it. | |||
# median_label = {} | |||
# for key, val in median.items(): | |||
# median_label[key] = str(val) | |||
# return median_label | |||
# def __get_median_edge_label_nonsymbolic(self, edge_labels): | |||
# if len(edge_labels) == 0: | |||
# return {} | |||
# else: | |||
# # Transform the labels into coordinates and compute mean label as initial solution. | |||
# edge_labels_as_coords = [] | |||
# sums = {} | |||
# for key, val in edge_labels[0].items(): | |||
# sums[key] = 0 | |||
# for edge_label in edge_labels: | |||
# coords = {} | |||
# for key, val in edge_label.items(): | |||
# label = float(val) | |||
# sums[key] += label | |||
# coords[key] = label | |||
# edge_labels_as_coords.append(coords) | |||
# median = {} | |||
# for key, val in sums.items(): | |||
# median[key] = val / len(edge_labels) | |||
# | |||
# # Run main loop of Weiszfeld's Algorithm. | |||
# epsilon = 0.0001 | |||
# delta = 1.0 | |||
# num_itrs = 0 | |||
# all_equal = False | |||
# while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): | |||
# numerator = {} | |||
# for key, val in sums.items(): | |||
# numerator[key] = 0 | |||
# denominator = 0 | |||
# for edge_label_as_coord in edge_labels_as_coords: | |||
# norm = 0 | |||
# for key, val in edge_label_as_coord.items(): | |||
# norm += (val - median[key]) ** 2 | |||
# norm += np.sqrt(norm) | |||
# if norm > 0: | |||
# for key, val in edge_label_as_coord.items(): | |||
# numerator[key] += val / norm | |||
# denominator += 1.0 / norm | |||
# if denominator == 0: | |||
# all_equal = True | |||
# else: | |||
# new_median = {} | |||
# delta = 0.0 | |||
# for key, val in numerator.items(): | |||
# this_median = val / denominator | |||
# new_median[key] = this_median | |||
# delta += np.abs(median[key] - this_median) | |||
# median = new_median | |||
# | |||
# num_itrs += 1 | |||
# | |||
# # Transform the solution to ged::GXLLabel and return it. | |||
# median_label = {} | |||
# for key, val in median.items(): | |||
# median_label[key] = str(val) | |||
# return median_label |
@@ -7,11 +7,10 @@ Created on Mon Mar 16 17:26:40 2020 | |||
""" | |||
def test_median_graph_estimator(): | |||
from gklearn.utils.graphfiles import loadDataset | |||
from gklearn.utils import load_dataset | |||
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs | |||
from gklearn.gedlib import librariesImport, gedlibpy | |||
from gklearn.preimage.utils import get_same_item_indices | |||
from gklearn.preimage.ged import convertGraph | |||
import multiprocessing | |||
# estimator parameters. | |||
@@ -22,17 +21,20 @@ def test_median_graph_estimator(): | |||
# algorithm parameters. | |||
algo = 'IPFP' | |||
initial_solutions = 40 | |||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' | |||
initial_solutions = 1 | |||
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE ' | |||
edit_cost_name = 'LETTER2' | |||
# edit_cost_name = 'CONSTANT' | |||
edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | |||
# edit_cost_constants = [4, 4, 2, 1, 1, 1] | |||
ds_name = 'COIL-DEL' | |||
# Load dataset. | |||
# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||
dataset = '../../../datasets/Letter-high/Letter-high_A.txt' | |||
Gn, y_all = loadDataset(dataset) | |||
# dataset = '../../../datasets/MUTAG/MUTAG_A.txt' | |||
Gn, y_all, _ = load_dataset(dataset) | |||
y_idx = get_same_item_indices(y_all) | |||
for i, (y, values) in enumerate(y_idx.items()): | |||
Gn_i = [Gn[val] for val in values] | |||
@@ -43,7 +45,7 @@ def test_median_graph_estimator(): | |||
# gedlibpy.restart_env() | |||
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||
for G in Gn_i: | |||
ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '') | |||
ged_env.add_nx_graph(G, '') | |||
graph_ids = ged_env.get_all_graph_ids() | |||
set_median_id = ged_env.add_graph('set_median') | |||
gen_median_id = ged_env.add_graph('gen_median') | |||
@@ -54,11 +56,13 @@ def test_median_graph_estimator(): | |||
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng()) | |||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order FALSE --refine FALSE'# @todo: std::to_string(rng()) | |||
# Select the GED algorithm. | |||
algo_options = '--threads ' + str(threads) + algo_options_suffix | |||
mge.set_options(mge_options) | |||
mge.set_label_names(node_labels=[], edge_labels=[], | |||
node_attrs=['x', 'y'], edge_attrs=[]) | |||
mge.set_init_method(algo, algo_options) | |||
mge.set_descent_method(algo, algo_options) | |||
@@ -30,6 +30,8 @@ def mge_options_to_string(options): | |||
opt_str += '--randomness ' + str(val) + ' ' | |||
elif key == 'verbose': | |||
opt_str += '--stdout ' + str(val) + ' ' | |||
elif key == 'update_order': | |||
opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' ' | |||
elif key == 'refine': | |||
opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | |||
elif key == 'time_limit': | |||
@@ -35,8 +35,8 @@ from libcpp.pair cimport pair | |||
from libcpp.list cimport list | |||
#Long unsigned int equivalent | |||
cimport numpy as np | |||
ctypedef np.npy_uint32 UINT32_t | |||
cimport numpy as cnp | |||
ctypedef cnp.npy_uint32 UINT32_t | |||
from cpython cimport array | |||
@@ -76,14 +76,14 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||
void runMethod(size_t g, size_t h) except + | |||
double getUpperBound(size_t g, size_t h) except + | |||
double getLowerBound(size_t g, size_t h) except + | |||
vector[np.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||
vector[np.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||
vector[cnp.npy_uint64] getForwardMap(size_t g, size_t h) except + | |||
vector[cnp.npy_uint64] getBackwardMap(size_t g, size_t h) except + | |||
size_t getNodeImage(size_t g, size_t h, size_t nodeId) except + | |||
size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except + | |||
double getInducedCost(size_t g, size_t h) except + | |||
vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except + | |||
vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except + | |||
vector[vector[np.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||
vector[vector[cnp.npy_uint64]] getAllMap(size_t g, size_t h) except + | |||
double getRuntime(size_t g, size_t h) except + | |||
bool quasimetricCosts() except + | |||
vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except + | |||
@@ -105,14 +105,16 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||
map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + | |||
string getInitType() except + | |||
# double getNodeCost(size_t label1, size_t label2) except + | |||
void computeInducedCost(size_t g_id, size_t h_id) except + | |||
double computeInducedCost(size_t g_id, size_t h_id) except + | |||
############################# | |||
##CYTHON WRAPPER INTERFACES## | |||
############################# | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.ged.env import NodeMap | |||
# import librariesImport | |||
from ctypes import * | |||
@@ -726,13 +728,30 @@ cdef class GEDEnv: | |||
:type g: size_t | |||
:type h: size_t | |||
:return: The Node Map between the two selected graph. | |||
:rtype: list[tuple(size_t, size_t)] | |||
:rtype: gklearn.ged.env.NodeMap. | |||
.. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix() | |||
.. warning:: run_method() between the same two graph must be called before this function. | |||
.. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example. | |||
""" | |||
return self.c_env.getNodeMap(g, h) | |||
map_as_relation = self.c_env.getNodeMap(g, h) | |||
induced_cost = self.c_env.getInducedCost(g, h) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary. | |||
source_map = [item.first if item.first < len(map_as_relation) else np.inf for item in map_as_relation] # item.first < len(map_as_relation) is not exactly correct. | |||
# print(source_map) | |||
target_map = [item.second if item.second < len(map_as_relation) else np.inf for item in map_as_relation] | |||
# print(target_map) | |||
num_node_source = len([item for item in source_map if item != np.inf]) | |||
# print(num_node_source) | |||
num_node_target = len([item for item in target_map if item != np.inf]) | |||
# print(num_node_target) | |||
node_map = NodeMap(num_node_source, num_node_target) | |||
# print(node_map.get_forward_map(), node_map.get_backward_map()) | |||
for i in range(len(source_map)): | |||
node_map.add_assignment(source_map[i], target_map[i]) | |||
node_map.set_induced_cost(induced_cost) | |||
return node_map | |||
def get_assignment_matrix(self, g, h) : | |||
@@ -1320,7 +1339,7 @@ cdef class GEDEnv: | |||
return graph_id | |||
def compute_induced_cost(self, g_id, h_id): | |||
def compute_induced_cost(self, g_id, h_id, node_map): | |||
""" | |||
Computes the edit cost between two graphs induced by a node map. | |||
@@ -1330,19 +1349,15 @@ cdef class GEDEnv: | |||
ID of input graph. | |||
h_id : int | |||
ID of input graph. | |||
node_map: gklearn.ged.env.NodeMap. | |||
The NodeMap instance whose reduced cost will be computed and re-assigned. | |||
Returns | |||
------- | |||
None. | |||
Notes | |||
----- | |||
The induced edit cost of the node map between `g_id` and `h_id` is implictly computed and stored in `GEDEnv::node_maps_`. | |||
None. | |||
""" | |||
cost = 0.0 | |||
self.c_env.computeInducedCost(g_id, h_id) | |||
induced_cost = self.c_env.computeInducedCost(g_id, h_id) | |||
node_map.set_induced_cost(induced_cost) | |||
##################################################################### | |||
@@ -475,8 +475,9 @@ public: | |||
* @brief Computes the edit cost between two graphs induced by a node map. | |||
* @param[in] g_id ID of input graph. | |||
* @param[in] h_id ID of input graph. | |||
* @return Computed induced cost. | |||
*/ | |||
void computeInducedCost(std::size_t g_id, std::size_t h_id) const; | |||
double computeInducedCost(std::size_t g_id, std::size_t h_id) const; | |||
// /*! | |||
// * @brief Returns node relabeling, insertion, or deletion cost. | |||
@@ -492,7 +493,7 @@ public: | |||
private: | |||
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> env; // environment variable | |||
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> * env_; // environment variable | |||
bool initialized; // initialization boolean (because env has one but not accessible) | |||
@@ -277,11 +277,16 @@ std::string toStringVectorInt(std::vector<unsigned long int> vector) { | |||
PyGEDEnv::PyGEDEnv () { | |||
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
this->initialized = false; | |||
} | |||
PyGEDEnv::~PyGEDEnv () {} | |||
PyGEDEnv::~PyGEDEnv () { | |||
if (env_ != NULL) { | |||
delete env_; | |||
env_ = NULL; | |||
} | |||
} | |||
// bool initialized = false; //Initialization boolean (because Env has one but not accessible). | |||
@@ -290,64 +295,68 @@ bool PyGEDEnv::isInitialized() { | |||
} | |||
void PyGEDEnv::restartEnv() { | |||
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
if (env_ != NULL) { | |||
delete env_; | |||
env_ = NULL; | |||
} | |||
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>(); | |||
initialized = false; | |||
} | |||
void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) { | |||
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(this->env.load_gxl_graph(pathFolder, pathXML, | |||
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(env_->load_gxl_graph(pathFolder, pathXML, | |||
(node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | |||
(edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED), | |||
std::unordered_set<std::string>(), std::unordered_set<std::string>())); | |||
} | |||
std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const { | |||
return this->env.graph_ids(); | |||
return env_->graph_ids(); | |||
} | |||
std::vector<std::size_t> PyGEDEnv::getAllGraphIds() { | |||
std::vector<std::size_t> listID; | |||
for (std::size_t i = this->env.graph_ids().first; i != this->env.graph_ids().second; i++) { | |||
for (std::size_t i = env_->graph_ids().first; i != env_->graph_ids().second; i++) { | |||
listID.push_back(i); | |||
} | |||
return listID; | |||
} | |||
const std::string PyGEDEnv::getGraphClass(std::size_t id) const { | |||
return this->env.get_graph_class(id); | |||
return env_->get_graph_class(id); | |||
} | |||
const std::string PyGEDEnv::getGraphName(std::size_t id) const { | |||
return this->env.get_graph_name(id); | |||
return env_->get_graph_name(id); | |||
} | |||
std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) { | |||
ged::GEDGraph::GraphID newId = this->env.add_graph(graph_name, graph_class); | |||
ged::GEDGraph::GraphID newId = env_->add_graph(graph_name, graph_class); | |||
initialized = false; | |||
return std::stoi(std::to_string(newId)); | |||
} | |||
void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) { | |||
this->env.add_node(graphId, nodeId, nodeLabel); | |||
env_->add_node(graphId, nodeId, nodeLabel); | |||
initialized = false; | |||
} | |||
/*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) { | |||
this->env.add_edge(graphId, tail, head, edgeLabel); | |||
env_->add_edge(graphId, tail, head, edgeLabel); | |||
}*/ | |||
void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) { | |||
this->env.add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||
env_->add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates); | |||
initialized = false; | |||
} | |||
void PyGEDEnv::clearGraph(std::size_t graphId) { | |||
this->env.clear_graph(graphId); | |||
env_->clear_graph(graphId); | |||
initialized = false; | |||
} | |||
ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const { | |||
return this->env.get_graph(graphId); | |||
return env_->get_graph(graphId); | |||
} | |||
std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) { | |||
@@ -379,71 +388,71 @@ std::vector<std::vector<std::size_t>> PyGEDEnv::getGraphAdjacenceMatrix(std::siz | |||
} | |||
void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) { | |||
this->env.set_edit_costs(translateEditCost(editCost), editCostConstants); | |||
env_->set_edit_costs(translateEditCost(editCost), editCostConstants); | |||
} | |||
void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) { | |||
//this->env.set_edit_costs(Your EditCost Class(editCostConstants)); | |||
//env_->set_edit_costs(Your EditCost Class(editCostConstants)); | |||
} | |||
// void PyGEDEnv::initEnv() { | |||
// this->env.init(); | |||
// env_->init(); | |||
// initialized = true; | |||
// } | |||
void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) { | |||
this->env.init(translateInitOptions(initOption), print_to_stdout); | |||
env_->init(translateInitOptions(initOption), print_to_stdout); | |||
initialized = true; | |||
} | |||
void PyGEDEnv::setMethod(std::string method, const std::string & options) { | |||
this->env.set_method(translateMethod(method), options); | |||
env_->set_method(translateMethod(method), options); | |||
} | |||
void PyGEDEnv::initMethod() { | |||
this->env.init_method(); | |||
env_->init_method(); | |||
} | |||
double PyGEDEnv::getInitime() const { | |||
return this->env.get_init_time(); | |||
return env_->get_init_time(); | |||
} | |||
void PyGEDEnv::runMethod(std::size_t g, std::size_t h) { | |||
this->env.run_method(g, h); | |||
env_->run_method(g, h); | |||
} | |||
double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const { | |||
return this->env.get_upper_bound(g, h); | |||
return env_->get_upper_bound(g, h); | |||
} | |||
double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const { | |||
return this->env.get_lower_bound(g, h); | |||
return env_->get_lower_bound(g, h); | |||
} | |||
std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const { | |||
return this->env.get_node_map(g, h).get_forward_map(); | |||
return env_->get_node_map(g, h).get_forward_map(); | |||
} | |||
std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const { | |||
return this->env.get_node_map(g, h).get_backward_map(); | |||
return env_->get_node_map(g, h).get_backward_map(); | |||
} | |||
std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | |||
return this->env.get_node_map(g, h).image(nodeId); | |||
return env_->get_node_map(g, h).image(nodeId); | |||
} | |||
std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const { | |||
return this->env.get_node_map(g, h).pre_image(nodeId); | |||
return env_->get_node_map(g, h).pre_image(nodeId); | |||
} | |||
double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const { | |||
return this->env.get_node_map(g, h).induced_cost(); | |||
return env_->get_node_map(g, h).induced_cost(); | |||
} | |||
std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) { | |||
std::vector<pair<std::size_t, std::size_t>> res; | |||
std::vector<ged::NodeMap::Assignment> relation; | |||
this->env.get_node_map(g, h).as_relation(relation); | |||
env_->get_node_map(g, h).as_relation(relation); | |||
for (const auto & assignment : relation) { | |||
res.push_back(std::make_pair(assignment.first, assignment.second)); | |||
} | |||
@@ -493,11 +502,11 @@ std::vector<std::vector<unsigned long int>> PyGEDEnv::getAllMap(std::size_t g, s | |||
} | |||
double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const { | |||
return this->env.get_runtime(g, h); | |||
return env_->get_runtime(g, h); | |||
} | |||
bool PyGEDEnv::quasimetricCosts() const { | |||
return this->env.quasimetric_costs(); | |||
return env_->quasimetric_costs(); | |||
} | |||
std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) { | |||
@@ -542,73 +551,74 @@ std::vector<std::vector<double>> PyGEDEnv::hungarianLSAPE(std::vector<std::vecto | |||
} | |||
std::size_t PyGEDEnv::getNumNodeLabels() const { | |||
return this->env.num_node_labels(); | |||
return env_->num_node_labels(); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const { | |||
return this->env.get_node_label(label_id); | |||
return env_->get_node_label(label_id); | |||
} | |||
std::size_t PyGEDEnv::getNumEdgeLabels() const { | |||
return this->env.num_edge_labels(); | |||
return env_->num_edge_labels(); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const { | |||
return this->env.get_edge_label(label_id); | |||
return env_->get_edge_label(label_id); | |||
} | |||
// std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const { | |||
// return this->env.get_num_nodes(graph_id); | |||
// return env_->get_num_nodes(graph_id); | |||
// } | |||
double PyGEDEnv::getAvgNumNodes() const { | |||
return this->env.get_avg_num_nodes(); | |||
return env_->get_avg_num_nodes(); | |||
} | |||
double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const { | |||
return this->env.node_rel_cost(node_label_1, node_label_2); | |||
return env_->node_rel_cost(node_label_1, node_label_2); | |||
} | |||
double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const { | |||
return this->env.node_del_cost(node_label); | |||
return env_->node_del_cost(node_label); | |||
} | |||
double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const { | |||
return this->env.node_ins_cost(node_label); | |||
return env_->node_ins_cost(node_label); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const { | |||
return this->env.median_node_label(node_labels); | |||
return env_->median_node_label(node_labels); | |||
} | |||
double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const { | |||
return this->env.edge_rel_cost(edge_label_1, edge_label_2); | |||
return env_->edge_rel_cost(edge_label_1, edge_label_2); | |||
} | |||
double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const { | |||
return this->env.edge_del_cost(edge_label); | |||
return env_->edge_del_cost(edge_label); | |||
} | |||
double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const { | |||
return this->env.edge_ins_cost(edge_label); | |||
return env_->edge_ins_cost(edge_label); | |||
} | |||
std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const { | |||
return this->env.median_edge_label(edge_labels); | |||
return env_->median_edge_label(edge_labels); | |||
} | |||
std::string PyGEDEnv::getInitType() const { | |||
return initOptionsToString(this->env.get_init_type()); | |||
return initOptionsToString(env_->get_init_type()); | |||
} | |||
void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
ged::NodeMap node_map = this->env.get_node_map(g_id, h_id); | |||
this->env.compute_induced_cost(g_id, h_id, node_map); | |||
double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
ged::NodeMap node_map = env_->get_node_map(g_id, h_id); | |||
env_->compute_induced_cost(g_id, h_id, node_map); | |||
return node_map.induced_cost(); | |||
} | |||
// double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { | |||
// return this->env.ged_data_node_cost(label1, label2); | |||
// return env_->ged_data_node_cost(label1, label2); | |||
// } | |||
@@ -630,7 +640,7 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
/*loadGXLGraph(pathFolder, pathXML); | |||
std::vector<std::size_t> graph_ids = getAllGraphIds(); | |||
std::size_t median_id = this->env.add_graph("median", ""); | |||
std::size_t median_id = env_->add_graph("median", ""); | |||
initEnv(initOption); | |||
@@ -640,10 +650,10 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const { | |||
median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed); | |||
median_estimator.run(graph_ids, median_id); | |||
std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl"); | |||
this->env.save_as_gxl_graph(median_id, gxl_file_name);*/ | |||
env_->save_as_gxl_graph(median_id, gxl_file_name);*/ | |||
/*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex"); | |||
save_letter_graph_as_tikz_file(this->env.get_graph(median_id), tikz_file_name);*/ | |||
save_letter_graph_as_tikz_file(env_->get_graph(median_id), tikz_file_name);*/ | |||
//} | |||
} | |||
@@ -12,6 +12,132 @@ from gklearn.preimage.utils import generate_median_preimages_by_class | |||
from gklearn.utils import compute_gram_matrices_by_class | |||
def xp_median_preimage_13_1(): | |||
"""xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. | |||
""" | |||
# set parameters. | |||
ds_name = 'PAH' # | |||
mpg_options = {'fit_method': 'k-graphs', | |||
'init_ecc': [3, 3, 1, 3, 3, 0], # | |||
'ds_name': ds_name, | |||
'parallel': True, # False | |||
'time_limit_in_sec': 0, | |||
'max_itrs': 100, # | |||
'max_itrs_without_update': 3, | |||
'epsilon_residual': 0.01, | |||
'epsilon_ec': 0.1, | |||
'verbose': 2} | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
kernel_options = {'name': 'StructuralSP', | |||
'edge_weight': None, | |||
'node_kernels': sub_kernels, | |||
'edge_kernels': sub_kernels, | |||
'compute_method': 'naive', | |||
'parallel': 'imap_unordered', | |||
# 'parallel': None, | |||
'n_jobs': multiprocessing.cpu_count(), | |||
'normalize': True, | |||
'verbose': 2} | |||
ged_options = {'method': 'IPFP', | |||
'initialization_method': 'RANDOM', # 'NODE' | |||
'initial_solutions': 10, # 1 | |||
'edit_cost': 'NON_SYMBOLIC', # | |||
'attr_distance': 'euclidean', | |||
'ratio_runs_from_initial_solutions': 1, | |||
'threads': multiprocessing.cpu_count(), | |||
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||
mge_options = {'init_type': 'MEDOID', | |||
'random_inits': 10, | |||
'time_limit': 600, | |||
'verbose': 2, | |||
'refine': False} | |||
save_results = True | |||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||
irrelevant_labels = None # | |||
edge_required = False # | |||
# print settings. | |||
print('parameters:') | |||
print('dataset name:', ds_name) | |||
print('mpg_options:', mpg_options) | |||
print('kernel_options:', kernel_options) | |||
print('ged_options:', ged_options) | |||
print('mge_options:', mge_options) | |||
print('save_results:', save_results) | |||
print('irrelevant_labels:', irrelevant_labels) | |||
print() | |||
# generate preimages. | |||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||
print('\n-------------------------------------') | |||
print('fit method:', fit_method, '\n') | |||
mpg_options['fit_method'] = fit_method | |||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||
def xp_median_preimage_13_2(): | |||
"""xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. | |||
""" | |||
# set parameters. | |||
ds_name = 'PAH' # | |||
mpg_options = {'fit_method': 'k-graphs', | |||
'init_ecc': [3, 3, 1, 3, 3, 0], # | |||
'ds_name': ds_name, | |||
'parallel': True, # False | |||
'time_limit_in_sec': 0, | |||
'max_itrs': 100, | |||
'max_itrs_without_update': 3, | |||
'epsilon_residual': 0.01, | |||
'epsilon_ec': 0.1, | |||
'verbose': 2} | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
kernel_options = {'name': 'ShortestPath', | |||
'edge_weight': None, | |||
'node_kernels': sub_kernels, | |||
'parallel': 'imap_unordered', | |||
# 'parallel': None, | |||
'n_jobs': multiprocessing.cpu_count(), | |||
'normalize': True, | |||
'verbose': 2} | |||
ged_options = {'method': 'IPFP', | |||
'initialization_method': 'RANDOM', # 'NODE' | |||
'initial_solutions': 10, # 1 | |||
'edit_cost': 'NON_SYMBOLIC', # | |||
'attr_distance': 'euclidean', | |||
'ratio_runs_from_initial_solutions': 1, | |||
'threads': multiprocessing.cpu_count(), | |||
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||
mge_options = {'init_type': 'MEDOID', | |||
'random_inits': 10, | |||
'time_limit': 600, | |||
'verbose': 2, | |||
'refine': False} | |||
save_results = True | |||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' # | |||
irrelevant_labels = None # | |||
edge_required = True # | |||
# print settings. | |||
print('parameters:') | |||
print('dataset name:', ds_name) | |||
print('mpg_options:', mpg_options) | |||
print('kernel_options:', kernel_options) | |||
print('ged_options:', ged_options) | |||
print('mge_options:', mge_options) | |||
print('save_results:', save_results) | |||
print('irrelevant_labels:', irrelevant_labels) | |||
print() | |||
# generate preimages. | |||
for fit_method in ['k-graphs'] + ['random'] * 5: # | |||
print('\n-------------------------------------') | |||
print('fit method:', fit_method, '\n') | |||
mpg_options['fit_method'] = fit_method | |||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||
def xp_median_preimage_12_1(): | |||
"""xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled. | |||
""" | |||
@@ -1969,7 +2095,13 @@ if __name__ == "__main__": | |||
# xp_median_preimage_12_4() | |||
#### xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled. | |||
xp_median_preimage_12_5() | |||
# xp_median_preimage_12_5() | |||
#### xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. | |||
xp_median_preimage_13_1() | |||
#### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. | |||
# xp_median_preimage_13_2() | |||
@@ -88,7 +88,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
node_attrs=self._dataset.node_attrs, | |||
edge_attrs=self._dataset.edge_attrs, | |||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||
**self._kernel_options) | |||
kernel_options=self._kernel_options) | |||
# record start time. | |||
start = time.time() | |||
@@ -25,7 +25,7 @@ import networkx as nx | |||
import os | |||
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): | |||
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||
import os.path | |||
from gklearn.preimage import MedianPreimageGenerator | |||
from gklearn.utils import split_dataset_by_target | |||
@@ -38,7 +38,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
dataset_all.trim_dataset(edge_required=edge_required) | |||
if irrelevant_labels is not None: | |||
dataset_all.remove_labels(**irrelevant_labels) | |||
# dataset_all.cut_graphs(range(0, 10)) | |||
if cut_range is not None: | |||
dataset_all.cut_graphs(cut_range) | |||
datasets = split_dataset_by_target(dataset_all) | |||
if save_results: | |||
@@ -56,13 +56,14 @@ class Dataset(object): | |||
self.__node_attrs = label_names['node_attrs'] | |||
self.__edge_labels = label_names['edge_labels'] | |||
self.__edge_attrs = label_names['edge_attrs'] | |||
self.clean_labels() | |||
def load_graphs(self, graphs, targets=None): | |||
# this has to be followed by set_labels(). | |||
self.__graphs = graphs | |||
self.__targets = targets | |||
# self.set_labels_attrs() | |||
# self.set_labels_attrs() # @todo | |||
def load_predefined_dataset(self, ds_name): | |||
@@ -128,6 +129,7 @@ class Dataset(object): | |||
self.__node_attrs = label_names['node_attrs'] | |||
self.__edge_labels = label_names['edge_labels'] | |||
self.__edge_attrs = label_names['edge_attrs'] | |||
self.clean_labels() | |||
def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | |||
@@ -141,27 +143,27 @@ class Dataset(object): | |||
# @todo: remove labels which have only one possible values. | |||
if node_labels is None: | |||
self.__node_labels = self.__graphs[0].graph['node_labels'] | |||
# # graphs are considered node unlabeled if all nodes have the same label. | |||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||
# # graphs are considered node unlabeled if all nodes have the same label. | |||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||
if node_attrs is None: | |||
self.__node_attrs = self.__graphs[0].graph['node_attrs'] | |||
# for G in Gn: | |||
# for n in G.nodes(data=True): | |||
# if 'attributes' in n[1]: | |||
# return len(n[1]['attributes']) | |||
# return 0 | |||
# for G in Gn: | |||
# for n in G.nodes(data=True): | |||
# if 'attributes' in n[1]: | |||
# return len(n[1]['attributes']) | |||
# return 0 | |||
if edge_labels is None: | |||
self.__edge_labels = self.__graphs[0].graph['edge_labels'] | |||
# # graphs are considered edge unlabeled if all edges have the same label. | |||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||
# # graphs are considered edge unlabeled if all edges have the same label. | |||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||
if edge_attrs is None: | |||
self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | |||
# for G in Gn: | |||
# if nx.number_of_edges(G) > 0: | |||
# for e in G.edges(data=True): | |||
# if 'attributes' in e[2]: | |||
# return len(e[2]['attributes']) | |||
# return 0 | |||
# for G in Gn: | |||
# if nx.number_of_edges(G) > 0: | |||
# for e in G.edges(data=True): | |||
# if 'attributes' in e[2]: | |||
# return len(e[2]['attributes']) | |||
# return 0 | |||
def get_dataset_infos(self, keys=None): | |||
@@ -326,7 +328,7 @@ class Dataset(object): | |||
if self.__node_label_nums is None: | |||
self.__node_label_nums = {} | |||
for node_label in self.__node_labels: | |||
self.__node_label_nums[node_label] = self.get_node_label_num(node_label) | |||
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) | |||
infos['node_label_nums'] = self.__node_label_nums | |||
if 'edge_label_dim' in keys: | |||
@@ -338,7 +340,7 @@ class Dataset(object): | |||
if self.__edge_label_nums is None: | |||
self.__edge_label_nums = {} | |||
for edge_label in self.__edge_labels: | |||
self.__edge_label_nums[edge_label] = self.get_edge_label_num(edge_label) | |||
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) | |||
infos['edge_label_nums'] = self.__edge_label_nums | |||
if 'directed' in keys or 'substructures' in keys: | |||
@@ -417,30 +419,87 @@ class Dataset(object): | |||
for g in self.__graphs: | |||
for nd in g.nodes(): | |||
for nl in node_labels: | |||
del g.nodes[nd][nl] | |||
del g.nodes[nd][nl] | |||
for na in node_attrs: | |||
del g.nodes[nd][na] | |||
for ed in g.edges(): | |||
for el in edge_labels: | |||
del g.edges[ed][el] | |||
del g.edges[ed][el] | |||
for ea in edge_attrs: | |||
del g.edges[ed][ea] | |||
del g.edges[ed][ea] | |||
if len(node_labels) > 0: | |||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
if len(edge_labels) > 0: | |||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
if len(node_attrs) > 0: | |||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
if len(edge_attrs) > 0: | |||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
def clean_labels(self): | |||
labels = [] | |||
for name in self.__node_labels: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_node_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for nd in G.nodes(): | |||
del G.nodes[nd][name] | |||
self.__node_labels = labels | |||
labels = [] | |||
for name in self.__edge_labels: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for ed in G.edges(): | |||
del G.edges[ed][name] | |||
self.__edge_labels = labels | |||
labels = [] | |||
for name in self.__node_attrs: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_node_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for nd in G.nodes(): | |||
del G.nodes[nd][name] | |||
self.__node_attrs = labels | |||
labels = [] | |||
for name in self.__edge_attrs: | |||
label = set() | |||
for G in self.__graphs: | |||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for ed in G.edges(): | |||
del G.edges[ed][name] | |||
self.__edge_attrs = labels | |||
def cut_graphs(self, range_): | |||
self.__graphs = [self.__graphs[i] for i in range_] | |||
if self.__targets is not None: | |||
self.__targets = [self.__targets[i] for i in range_] | |||
# @todo | |||
# self.set_labels_attrs() | |||
self.clean_labels() | |||
def trim_dataset(self, edge_required=False): | |||
@@ -451,8 +510,7 @@ class Dataset(object): | |||
idx = [p[0] for p in trimed_pairs] | |||
self.__graphs = [p[1] for p in trimed_pairs] | |||
self.__targets = [self.__targets[i] for i in idx] | |||
# @todo | |||
# self.set_labels_attrs() | |||
self.clean_labels() | |||
def __get_dataset_size(self): | |||
@@ -655,4 +713,5 @@ def split_dataset_by_target(dataset): | |||
sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | |||
sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | |||
datasets.append(sub_dataset) | |||
# @todo: clean_labels? | |||
return datasets |