Browse Source

Fix bugs in MedianGraphEstimator, about __node_maps_from_median and updating nodes/edges.

v0.2.x
jajupmochi 5 years ago
parent
commit
25aa64a7dc
15 changed files with 2811 additions and 1972 deletions
  1. +0
    -4
      .appveyor.yml
  2. +2
    -1
      gklearn/ged/env/__init__.py
  3. +68
    -0
      gklearn/ged/env/node_map.py
  4. +170
    -150
      gklearn/ged/median/median_graph_estimator.py
  5. +11
    -7
      gklearn/ged/median/test_median_graph_estimator.py
  6. +2
    -0
      gklearn/ged/median/utils.py
  7. +2232
    -1702
      gklearn/gedlib/gedlibpy.cpp
  8. BIN
      gklearn/gedlib/gedlibpy.cpython-36m-x86_64-linux-gnu.so
  9. +33
    -18
      gklearn/gedlib/gedlibpy.pyx
  10. +3
    -2
      gklearn/gedlib/src/GedLibBind.hpp
  11. +64
    -54
      gklearn/gedlib/src/GedLibBind.ipp
  12. +133
    -1
      gklearn/preimage/experiments/xp_median_preimage.py
  13. +1
    -1
      gklearn/preimage/median_preimage_generator.py
  14. +3
    -2
      gklearn/preimage/utils.py
  15. +89
    -30
      gklearn/utils/dataset.py

+ 0
- 4
.appveyor.yml View File

@@ -1,9 +1,5 @@
environment:
matrix:
- PYTHON: "C:\\Python33"
- PYTHON: "C:\\Python33-x64"
- PYTHON: "C:\\Python34"
- PYTHON: "C:\\Python34-x64"
- PYTHON: "C:\\Python35"
- PYTHON: "C:\\Python35-x64"
- PYTHON: "C:\\Python36"


+ 2
- 1
gklearn/ged/env/__init__.py View File

@@ -1 +1,2 @@
from gklearn.ged.env.common_types import AlgorithmState
from gklearn.ged.env.common_types import AlgorithmState
from gklearn.ged.env.node_map import NodeMap

+ 68
- 0
gklearn/ged/env/node_map.py View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 22 11:31:26 2020

@author: ljia
"""
import numpy as np

class NodeMap(object):
def __init__(self, num_nodes_g, num_nodes_h):
self.__forward_map = [np.inf] * num_nodes_g
self.__backward_map = [np.inf] * num_nodes_h
self.__induced_cost = np.inf
def num_source_nodes(self):
return len(self.__forward_map)
def num_target_nodes(self):
return len(self.__backward_map)
def image(self, node):
if node < len(self.__forward_map):
return self.__forward_map[node]
else:
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
return np.inf
def pre_image(self, node):
if node < len(self.__backward_map):
return self.__backward_map[node]
else:
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.')
return np.inf
def get_forward_map(self):
return self.__forward_map
def get_backward_map(self):
return self.__backward_map
def add_assignment(self, i, k):
if i != np.inf:
if i < len(self.__forward_map):
self.__forward_map[i] = k
else:
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.')
if k != np.inf:
if k < len(self.__backward_map):
self.__backward_map[k] = i
else:
raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.')
def set_induced_cost(self, induced_cost):
self.__induced_cost = induced_cost
def induced_cost(self):
return self.__induced_cost

+ 170
- 150
gklearn/ged/median/median_graph_estimator.py View File

@@ -47,6 +47,7 @@ class MedianGraphEstimator(object):
self.__desired_num_random_inits = 10
self.__use_real_randomness = True
self.__seed = 0
self.__update_order = True
self.__refine = True
self.__time_limit_in_sec = 0
self.__epsilon = 0.0001
@@ -126,6 +127,16 @@ class MedianGraphEstimator(object):
else:
raise Exception('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
elif opt_name == 'update-order':
if opt_val == 'TRUE':
self.__update_order = True
elif opt_val == 'FALSE':
self.__update_order = False
else:
raise Exception('Invalid argument "' + opt_val + '" for option update-order. Usage: options = "[--update-order TRUE|FALSE] [...]"')
elif opt_name == 'refine':
if opt_val == 'TRUE':
self.__refine = True
@@ -298,11 +309,11 @@ class MedianGraphEstimator(object):
for graph_id in graph_ids:
# @todo: get_nx_graph() function may need to be modified according to the coming code.
graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False)
# print(self.__ged_env.get_graph_internal_id(0))
# print(graphs[0].graph)
# print(graphs[0].nodes(data=True))
# print(graphs[0].edges(data=True))
# print(nx.adjacency_matrix(graphs[0]))
# print(self.__ged_env.get_graph_internal_id(0))
# print(graphs[0].graph)
# print(graphs[0].nodes(data=True))
# print(graphs[0].edges(data=True))
# print(nx.adjacency_matrix(graphs[0]))

# Construct initial medians.
@@ -310,10 +321,10 @@ class MedianGraphEstimator(object):
self.__construct_initial_medians(graph_ids, timer, medians)
end_init = time.time()
self.__runtime_initialized = end_init - start
# print(medians[0].graph)
# print(medians[0].nodes(data=True))
# print(medians[0].edges(data=True))
# print(nx.adjacency_matrix(medians[0]))
# print(medians[0].graph)
# print(medians[0].nodes(data=True))
# print(medians[0].edges(data=True))
# print(nx.adjacency_matrix(medians[0]))
# Reset information about iterations and number of times the median decreases and increases.
self.__itrs = [0] * len(medians)
@@ -353,12 +364,12 @@ class MedianGraphEstimator(object):
# Compute node maps and sum of distances for initial median.
self.__sum_of_distances = 0
self.__node_maps_from_median.clear() # @todo
self.__node_maps_from_median.clear()
for graph_id in graph_ids:
self.__ged_env.run_method(gen_median_id, graph_id)
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
# print(self.__node_maps_from_median[graph_id])
self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary.
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost()
# print(self.__sum_of_distances)
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -366,7 +377,7 @@ class MedianGraphEstimator(object):
self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
self.__ged_env.load_nx_graph(median, set_median_id)
# print(self.__best_init_sum_of_distances)
print(self.__best_init_sum_of_distances)
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -391,10 +402,11 @@ class MedianGraphEstimator(object):
# Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!!
median_modified = self.__update_median(graphs, median)
if not median_modified or self.__itrs[median_pos] == 0:
decreased_order = self.__decrease_order(graphs, median)
if not decreased_order or self.__itrs[median_pos] == 0:
increased_order = False
if self.__update_order:
if not median_modified or self.__itrs[median_pos] == 0:
decreased_order = self.__decrease_order(graphs, median)
if not decreased_order or self.__itrs[median_pos] == 0:
increased_order = False
# Update the number of iterations without update of the median.
if median_modified or decreased_order or increased_order:
@@ -421,11 +433,11 @@ class MedianGraphEstimator(object):

# Compute induced costs of the old node maps w.r.t. the updated median.
for graph_id in graph_ids:
# print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
# @todo: watch out if compute_induced_cost is correct, this may influence: increase/decrease order, induced_cost() in the following code.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
self.__ged_env.compute_induced_cost(gen_median_id, graph_id)
# print('---------------------------------------')
# print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
# print(self.__node_maps_from_median[graph_id].induced_cost())
self.__ged_env.compute_induced_cost(gen_median_id, graph_id, self.__node_maps_from_median[graph_id])
# print('---------------------------------------')
# print(self.__node_maps_from_median[graph_id].induced_cost())
# @todo:!!!!!!!!!!!!!!!!!!!!!!!!!!!!This value is a slight different from the c++ program, which might be a bug! Use it very carefully!
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -439,8 +451,9 @@ class MedianGraphEstimator(object):
# Update the sum of distances.
old_sum_of_distances = self.__sum_of_distances
self.__sum_of_distances = 0
for graph_id in self.__node_maps_from_median:
self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: see above.
for graph_id, node_map in self.__node_maps_from_median.items():
self.__sum_of_distances += node_map.induced_cost()
# print(self.__sum_of_distances)
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -460,7 +473,7 @@ class MedianGraphEstimator(object):
# Update the best median.
if self.__sum_of_distances < best_sum_of_distances:
best_sum_of_distances = self.__sum_of_distances
node_maps_from_best_median = self.__node_maps_from_median
node_maps_from_best_median = self.__node_maps_from_median.copy() # @todo: this is a shallow copy, not sure if it is enough.
best_median = median
# Update the number of converged descents.
@@ -543,6 +556,7 @@ class MedianGraphEstimator(object):
self.__desired_num_random_inits = 10
self.__use_real_randomness = True
self.__seed = 0
self.__update_order = True
self.__refine = True
self.__time_limit_in_sec = 0
self.__epsilon = 0.0001
@@ -568,16 +582,16 @@ class MedianGraphEstimator(object):
self.__compute_medoid(graph_ids, timer, initial_medians)
elif self.__init_type == 'MAX':
pass # @todo
# compute_max_order_graph_(graph_ids, initial_medians)
# compute_max_order_graph_(graph_ids, initial_medians)
elif self.__init_type == 'MIN':
pass # @todo
# compute_min_order_graph_(graph_ids, initial_medians)
# compute_min_order_graph_(graph_ids, initial_medians)
elif self.__init_type == 'MEAN':
pass # @todo
# compute_mean_order_graph_(graph_ids, initial_medians)
# compute_mean_order_graph_(graph_ids, initial_medians)
else:
pass # @todo
# sample_initial_medians_(graph_ids, initial_medians)
# sample_initial_medians_(graph_ids, initial_medians)

# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -655,20 +669,20 @@ class MedianGraphEstimator(object):
# Iterate through all nodes of the median.
for i in range(0, nx.number_of_nodes(median)):
# print('i: ', i)
# print('i: ', i)
# Collect the labels of the substituted nodes.
node_labels = []
for graph_id, graph in graphs.items():
# print('graph_id: ', graph_id)
# print(self.__node_maps_from_median[graph_id])
k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
# print('k: ', k)
# print('graph_id: ', graph_id)
# print(self.__node_maps_from_median[graph_id])
k = self.__node_maps_from_median[graph_id].image(i)
# print('k: ', k)
if k != np.inf:
node_labels.append(graph.nodes[k])
# Compute the median label and update the median.
if len(node_labels) > 0:
# median_label = self.__ged_env.get_median_node_label(node_labels)
# median_label = self.__ged_env.get_median_node_label(node_labels)
median_label = self.__get_median_node_label(node_labels)
if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
nx.set_node_attributes(median, {i: median_label})
@@ -679,10 +693,10 @@ class MedianGraphEstimator(object):
if self.__print_to_stdout == 2:
print('edges ... ', end='')
# Clear the adjacency lists of the median and reset number of edges to 0.
median_edges = list(median.edges)
for (head, tail) in median_edges:
median.remove_edge(head, tail)
# # Clear the adjacency lists of the median and reset number of edges to 0.
# median_edges = list(median.edges)
# for (head, tail) in median_edges:
# median.remove_edge(head, tail)
# @todo: what if edge is not labeled?
# Iterate through all possible edges (i,j) of the median.
@@ -692,8 +706,8 @@ class MedianGraphEstimator(object):
# Collect the labels of the edges to which (i,j) is mapped by the node maps.
edge_labels = []
for graph_id, graph in graphs.items():
k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j)
k = self.__node_maps_from_median[graph_id].image(i)
l = self.__node_maps_from_median[graph_id].image(j)
if k != np.inf and l != np.inf:
if graph.has_edge(k, l):
edge_labels.append(graph.edges[(k, l)])
@@ -711,11 +725,13 @@ class MedianGraphEstimator(object):
rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label)
# Update the median.
if median.has_edge(i, j):
median.remove_edge(i, j)
if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs):
median.add_edge(i, j, **median_label)
else:
if median.has_edge(i, j):
median.remove_edge(i, j)
# else:
# if median.has_edge(i, j):
# median.remove_edge(i, j)


def __update_node_maps(self):
@@ -725,10 +741,12 @@ class MedianGraphEstimator(object):
# Update the node maps.
node_maps_were_modified = False
for graph_id in self.__node_maps_from_median:
for graph_id, node_map in self.__node_maps_from_median.items():
self.__ged_env.run_method(self.__median_id, graph_id)
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < self.__ged_env.get_induced_cost(self.__median_id, graph_id) - self.__epsilon: # @todo: see above.
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) # @todo: node_map may not assigned.
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < node_map.induced_cost() - self.__epsilon:
# xxx = self.__node_maps_from_median[graph_id]
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id)
# yyy = self.__node_maps_from_median[graph_id]
node_maps_were_modified = True
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -748,13 +766,13 @@ class MedianGraphEstimator(object):
print('Trying to decrease order: ... ', end='')
# Initialize ID of the node that is to be deleted.
id_deleted_node = None # @todo: or np.inf
id_deleted_node = [None] # @todo: or np.inf
decreased_order = False
# Decrease the order as long as the best deletion delta is negative.
while self.__compute_best_deletion_delta(graphs, median, [id_deleted_node]) < -self.__epsilon:
while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon: # @todo
decreased_order = True
self.__delete_node_from_median(id_deleted_node, median)
self.__delete_node_from_median(id_deleted_node[0], median)
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -777,7 +795,7 @@ class MedianGraphEstimator(object):
delta -= self.__node_del_cost
else:
delta += self.__node_ins_cost - self.__ged_env.get_node_rel_cost(median.nodes[i], graph.nodes[k])
for j, j_label in median[i]:
for j, j_label in median[i].items():
l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j)
if k == np.inf or l == np.inf:
delta -= self.__edge_del_cost
@@ -790,32 +808,34 @@ class MedianGraphEstimator(object):
if delta < best_delta - self.__epsilon:
best_delta = delta
id_deleted_node[0] = i
id_deleted_node[0] = i # @todo:
return best_delta
def __delete_node_from_median(self, id_deleted_node, median):
# Update the nodes of the median.
median.remove_node(id_deleted_node) # @todo: test if it is right.
def __delete_node_from_median(self, id_deleted_node, median): # @todo: update env.node_map?
# Update the median.
median.remove_node(id_deleted_node)
# Update the node maps.
for _, node_map in self.__node_maps_from_median.items():
new_node_map = {nx.number_of_nodes(median): ''} # @todo
is_unassigned_target_node = ['', True]
new_node_map = [] # @todo
is_unassigned_target_node = [True] * len(node_map)
for i in range(0, nx.number_of_nodes(median)):
if i != id_deleted_node:
new_i = (i if i < id_deleted_node else i - 1)
k = self.__get_node_image_from_map(node_map, i)
new_node_map["ds"] # @todo
new_node_map.append((new_i, k)) # @todo
if k != np.inf:
is_unassigned_target_node[k] = False
for k in range(0, ''):
for k in range(0, len(node_map)):
if is_unassigned_target_node[k]:
new_node_map.sdf[]
node_map = new_node_map
new_node_map.append(np.inf, k)
node_map = new_node_map # @todo
# Increase overall number of decreases.
self.__num_decrease_order += 1
def __improve_sum_of_distances(self, timer):
pass
@@ -825,37 +845,37 @@ class MedianGraphEstimator(object):
return self.__median_id != np.inf
def __get_node_image_from_map(self, node_map, node):
"""
Return ID of the node mapping of `node` in `node_map`.
# def __get_node_image_from_map(self, node_map, node):
# """
# Return ID of the node mapping of `node` in `node_map`.

Parameters
----------
node_map : list[tuple(int, int)]
List of node maps where the mapping node is found.
node : int
The mapping node of this node is returned
# Parameters
# ----------
# node_map : list[tuple(int, int)]
# List of node maps where the mapping node is found.
#
# node : int
# The mapping node of this node is returned

Raises
------
Exception
If the node with ID `node` is not contained in the source nodes of the node map.
# Raises
# ------
# Exception
# If the node with ID `node` is not contained in the source nodes of the node map.

Returns
-------
int
ID of the mapping of `node`.
Notes
-----
This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
"""
if node < len(node_map):
return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
else:
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
return np.inf
# Returns
# -------
# int
# ID of the mapping of `node`.
#
# Notes
# -----
# This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
# """
# if node < len(node_map):
# return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
# else:
# raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
# return np.inf
def __are_graphs_equal(self, g1, g2):
@@ -958,9 +978,9 @@ class MedianGraphEstimator(object):
for label in labels:
coords = {}
for key, val in label.items():
label = float(val)
sums[key] += label
coords[key] = label
label_f = float(val)
sums[key] += label_f
coords[key] = label_f
labels_as_coords.append(coords)
median = {}
for key, val in sums.items():
@@ -980,7 +1000,7 @@ class MedianGraphEstimator(object):
norm = 0
for key, val in label_as_coord.items():
norm += (val - median[key]) ** 2
norm += np.sqrt(norm)
norm = np.sqrt(norm)
if norm > 0:
for key, val in label_as_coord.items():
numerator[key] += val / norm
@@ -1005,64 +1025,64 @@ class MedianGraphEstimator(object):
return median_label

# def __get_median_edge_label_symbolic(self, edge_labels):
# pass
# def __get_median_edge_label_symbolic(self, edge_labels):
# pass
# def __get_median_edge_label_nonsymbolic(self, edge_labels):
# if len(edge_labels) == 0:
# return {}
# else:
# # Transform the labels into coordinates and compute mean label as initial solution.
# edge_labels_as_coords = []
# sums = {}
# for key, val in edge_labels[0].items():
# sums[key] = 0
# for edge_label in edge_labels:
# coords = {}
# for key, val in edge_label.items():
# label = float(val)
# sums[key] += label
# coords[key] = label
# edge_labels_as_coords.append(coords)
# median = {}
# for key, val in sums.items():
# median[key] = val / len(edge_labels)
#
# # Run main loop of Weiszfeld's Algorithm.
# epsilon = 0.0001
# delta = 1.0
# num_itrs = 0
# all_equal = False
# while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
# numerator = {}
# for key, val in sums.items():
# numerator[key] = 0
# denominator = 0
# for edge_label_as_coord in edge_labels_as_coords:
# norm = 0
# for key, val in edge_label_as_coord.items():
# norm += (val - median[key]) ** 2
# norm += np.sqrt(norm)
# if norm > 0:
# for key, val in edge_label_as_coord.items():
# numerator[key] += val / norm
# denominator += 1.0 / norm
# if denominator == 0:
# all_equal = True
# else:
# new_median = {}
# delta = 0.0
# for key, val in numerator.items():
# this_median = val / denominator
# new_median[key] = this_median
# delta += np.abs(median[key] - this_median)
# median = new_median
#
# num_itrs += 1
#
# # Transform the solution to ged::GXLLabel and return it.
# median_label = {}
# for key, val in median.items():
# median_label[key] = str(val)
# return median_label
# def __get_median_edge_label_nonsymbolic(self, edge_labels):
# if len(edge_labels) == 0:
# return {}
# else:
# # Transform the labels into coordinates and compute mean label as initial solution.
# edge_labels_as_coords = []
# sums = {}
# for key, val in edge_labels[0].items():
# sums[key] = 0
# for edge_label in edge_labels:
# coords = {}
# for key, val in edge_label.items():
# label = float(val)
# sums[key] += label
# coords[key] = label
# edge_labels_as_coords.append(coords)
# median = {}
# for key, val in sums.items():
# median[key] = val / len(edge_labels)
#
# # Run main loop of Weiszfeld's Algorithm.
# epsilon = 0.0001
# delta = 1.0
# num_itrs = 0
# all_equal = False
# while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
# numerator = {}
# for key, val in sums.items():
# numerator[key] = 0
# denominator = 0
# for edge_label_as_coord in edge_labels_as_coords:
# norm = 0
# for key, val in edge_label_as_coord.items():
# norm += (val - median[key]) ** 2
# norm += np.sqrt(norm)
# if norm > 0:
# for key, val in edge_label_as_coord.items():
# numerator[key] += val / norm
# denominator += 1.0 / norm
# if denominator == 0:
# all_equal = True
# else:
# new_median = {}
# delta = 0.0
# for key, val in numerator.items():
# this_median = val / denominator
# new_median[key] = this_median
# delta += np.abs(median[key] - this_median)
# median = new_median
#
# num_itrs += 1
#
# # Transform the solution to ged::GXLLabel and return it.
# median_label = {}
# for key, val in median.items():
# median_label[key] = str(val)
# return median_label

+ 11
- 7
gklearn/ged/median/test_median_graph_estimator.py View File

@@ -7,11 +7,10 @@ Created on Mon Mar 16 17:26:40 2020
"""
def test_median_graph_estimator():
from gklearn.utils.graphfiles import loadDataset
from gklearn.utils import load_dataset
from gklearn.ged.median import MedianGraphEstimator, constant_node_costs
from gklearn.gedlib import librariesImport, gedlibpy
from gklearn.preimage.utils import get_same_item_indices
from gklearn.preimage.ged import convertGraph
import multiprocessing

# estimator parameters.
@@ -22,17 +21,20 @@ def test_median_graph_estimator():
# algorithm parameters.
algo = 'IPFP'
initial_solutions = 40
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
initial_solutions = 1
algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1 --initialization-method NODE '

edit_cost_name = 'LETTER2'
# edit_cost_name = 'CONSTANT'
edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001]
# edit_cost_constants = [4, 4, 2, 1, 1, 1]
ds_name = 'COIL-DEL'
# Load dataset.
# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
dataset = '../../../datasets/Letter-high/Letter-high_A.txt'
Gn, y_all = loadDataset(dataset)
# dataset = '../../../datasets/MUTAG/MUTAG_A.txt'
Gn, y_all, _ = load_dataset(dataset)
y_idx = get_same_item_indices(y_all)
for i, (y, values) in enumerate(y_idx.items()):
Gn_i = [Gn[val] for val in values]
@@ -43,7 +45,7 @@ def test_median_graph_estimator():
# gedlibpy.restart_env()
ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
for G in Gn_i:
ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '')
ged_env.add_nx_graph(G, '')
graph_ids = ged_env.get_all_graph_ids()
set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median')
@@ -54,11 +56,13 @@ def test_median_graph_estimator():
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng())
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order FALSE --refine FALSE'# @todo: std::to_string(rng())
# Select the GED algorithm.
algo_options = '--threads ' + str(threads) + algo_options_suffix
mge.set_options(mge_options)
mge.set_label_names(node_labels=[], edge_labels=[],
node_attrs=['x', 'y'], edge_attrs=[])
mge.set_init_method(algo, algo_options)
mge.set_descent_method(algo, algo_options)


+ 2
- 0
gklearn/ged/median/utils.py View File

@@ -30,6 +30,8 @@ def mge_options_to_string(options):
opt_str += '--randomness ' + str(val) + ' '
elif key == 'verbose':
opt_str += '--stdout ' + str(val) + ' '
elif key == 'update_order':
opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'refine':
opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'time_limit':


+ 2232
- 1702
gklearn/gedlib/gedlibpy.cpp
File diff suppressed because it is too large
View File


BIN
gklearn/gedlib/gedlibpy.cpython-36m-x86_64-linux-gnu.so View File


+ 33
- 18
gklearn/gedlib/gedlibpy.pyx View File

@@ -35,8 +35,8 @@ from libcpp.pair cimport pair
from libcpp.list cimport list

#Long unsigned int equivalent
cimport numpy as np
ctypedef np.npy_uint32 UINT32_t
cimport numpy as cnp
ctypedef cnp.npy_uint32 UINT32_t
from cpython cimport array

@@ -76,14 +76,14 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged":
void runMethod(size_t g, size_t h) except +
double getUpperBound(size_t g, size_t h) except +
double getLowerBound(size_t g, size_t h) except +
vector[np.npy_uint64] getForwardMap(size_t g, size_t h) except +
vector[np.npy_uint64] getBackwardMap(size_t g, size_t h) except +
vector[cnp.npy_uint64] getForwardMap(size_t g, size_t h) except +
vector[cnp.npy_uint64] getBackwardMap(size_t g, size_t h) except +
size_t getNodeImage(size_t g, size_t h, size_t nodeId) except +
size_t getNodePreImage(size_t g, size_t h, size_t nodeId) except +
double getInducedCost(size_t g, size_t h) except +
vector[pair[size_t,size_t]] getNodeMap(size_t g, size_t h) except +
vector[vector[int]] getAssignmentMatrix(size_t g, size_t h) except +
vector[vector[np.npy_uint64]] getAllMap(size_t g, size_t h) except +
vector[vector[cnp.npy_uint64]] getAllMap(size_t g, size_t h) except +
double getRuntime(size_t g, size_t h) except +
bool quasimetricCosts() except +
vector[vector[size_t]] hungarianLSAP(vector[vector[size_t]] matrixCost) except +
@@ -105,14 +105,16 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged":
map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except +
string getInitType() except +
# double getNodeCost(size_t label1, size_t label2) except +
void computeInducedCost(size_t g_id, size_t h_id) except +
double computeInducedCost(size_t g_id, size_t h_id) except +
#############################
##CYTHON WRAPPER INTERFACES##
#############################

import numpy as np
import networkx as nx
from gklearn.ged.env import NodeMap

# import librariesImport
from ctypes import *
@@ -726,13 +728,30 @@ cdef class GEDEnv:
:type g: size_t
:type h: size_t
:return: The Node Map between the two selected graph.
:rtype: list[tuple(size_t, size_t)]
:rtype: gklearn.ged.env.NodeMap.
.. seealso:: run_method(), get_forward_map(), get_backward_map(), get_node_image(), get_node_pre_image(), get_assignment_matrix()
.. warning:: run_method() between the same two graph must be called before this function.
.. note:: This function creates datas so use it if necessary, however you can understand how assignement works with this example.
"""
return self.c_env.getNodeMap(g, h)
map_as_relation = self.c_env.getNodeMap(g, h)
induced_cost = self.c_env.getInducedCost(g, h) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary.
source_map = [item.first if item.first < len(map_as_relation) else np.inf for item in map_as_relation] # item.first < len(map_as_relation) is not exactly correct.
# print(source_map)
target_map = [item.second if item.second < len(map_as_relation) else np.inf for item in map_as_relation]
# print(target_map)
num_node_source = len([item for item in source_map if item != np.inf])
# print(num_node_source)
num_node_target = len([item for item in target_map if item != np.inf])
# print(num_node_target)
node_map = NodeMap(num_node_source, num_node_target)
# print(node_map.get_forward_map(), node_map.get_backward_map())
for i in range(len(source_map)):
node_map.add_assignment(source_map[i], target_map[i])
node_map.set_induced_cost(induced_cost)
return node_map
def get_assignment_matrix(self, g, h) :
@@ -1320,7 +1339,7 @@ cdef class GEDEnv:
return graph_id
def compute_induced_cost(self, g_id, h_id):
def compute_induced_cost(self, g_id, h_id, node_map):
"""
Computes the edit cost between two graphs induced by a node map.

@@ -1330,19 +1349,15 @@ cdef class GEDEnv:
ID of input graph.
h_id : int
ID of input graph.
node_map: gklearn.ged.env.NodeMap.
The NodeMap instance whose reduced cost will be computed and re-assigned.

Returns
-------
None.
Notes
-----
The induced edit cost of the node map between `g_id` and `h_id` is implictly computed and stored in `GEDEnv::node_maps_`.

None.
"""
cost = 0.0
self.c_env.computeInducedCost(g_id, h_id)
induced_cost = self.c_env.computeInducedCost(g_id, h_id)
node_map.set_induced_cost(induced_cost)

#####################################################################


+ 3
- 2
gklearn/gedlib/src/GedLibBind.hpp View File

@@ -475,8 +475,9 @@ public:
* @brief Computes the edit cost between two graphs induced by a node map.
* @param[in] g_id ID of input graph.
* @param[in] h_id ID of input graph.
* @return Computed induced cost.
*/
void computeInducedCost(std::size_t g_id, std::size_t h_id) const;
double computeInducedCost(std::size_t g_id, std::size_t h_id) const;

// /*!
// * @brief Returns node relabeling, insertion, or deletion cost.
@@ -492,7 +493,7 @@ public:

private:

ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> env; // environment variable
ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> * env_; // environment variable

bool initialized; // initialization boolean (because env has one but not accessible)



+ 64
- 54
gklearn/gedlib/src/GedLibBind.ipp View File

@@ -277,11 +277,16 @@ std::string toStringVectorInt(std::vector<unsigned long int> vector) {


PyGEDEnv::PyGEDEnv () {
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
this->initialized = false;
}

PyGEDEnv::~PyGEDEnv () {}
PyGEDEnv::~PyGEDEnv () {
if (env_ != NULL) {
delete env_;
env_ = NULL;
}
}

// bool initialized = false; //Initialization boolean (because Env has one but not accessible).

@@ -290,64 +295,68 @@ bool PyGEDEnv::isInitialized() {
}

void PyGEDEnv::restartEnv() {
this->env = ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
if (env_ != NULL) {
delete env_;
env_ = NULL;
}
env_ = new ged::GEDEnv<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel>();
initialized = false;
}

void PyGEDEnv::loadGXLGraph(const std::string & pathFolder, const std::string & pathXML, bool node_type, bool edge_type) {
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(this->env.load_gxl_graph(pathFolder, pathXML,
std::vector<ged::GEDGraph::GraphID> tmp_graph_ids(env_->load_gxl_graph(pathFolder, pathXML,
(node_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED),
(edge_type ? ged::Options::GXLNodeEdgeType::LABELED : ged::Options::GXLNodeEdgeType::UNLABELED),
std::unordered_set<std::string>(), std::unordered_set<std::string>()));
}

std::pair<std::size_t,std::size_t> PyGEDEnv::getGraphIds() const {
return this->env.graph_ids();
return env_->graph_ids();
}

std::vector<std::size_t> PyGEDEnv::getAllGraphIds() {
std::vector<std::size_t> listID;
for (std::size_t i = this->env.graph_ids().first; i != this->env.graph_ids().second; i++) {
for (std::size_t i = env_->graph_ids().first; i != env_->graph_ids().second; i++) {
listID.push_back(i);
}
return listID;
}

const std::string PyGEDEnv::getGraphClass(std::size_t id) const {
return this->env.get_graph_class(id);
return env_->get_graph_class(id);
}

const std::string PyGEDEnv::getGraphName(std::size_t id) const {
return this->env.get_graph_name(id);
return env_->get_graph_name(id);
}

std::size_t PyGEDEnv::addGraph(const std::string & graph_name, const std::string & graph_class) {
ged::GEDGraph::GraphID newId = this->env.add_graph(graph_name, graph_class);
ged::GEDGraph::GraphID newId = env_->add_graph(graph_name, graph_class);
initialized = false;
return std::stoi(std::to_string(newId));
}

void PyGEDEnv::addNode(std::size_t graphId, const std::string & nodeId, const std::map<std::string, std::string> & nodeLabel) {
this->env.add_node(graphId, nodeId, nodeLabel);
env_->add_node(graphId, nodeId, nodeLabel);
initialized = false;
}

/*void addEdge(std::size_t graphId, ged::GXLNodeID tail, ged::GXLNodeID head, ged::GXLLabel edgeLabel) {
this->env.add_edge(graphId, tail, head, edgeLabel);
env_->add_edge(graphId, tail, head, edgeLabel);
}*/

void PyGEDEnv::addEdge(std::size_t graphId, const std::string & tail, const std::string & head, const std::map<std::string, std::string> & edgeLabel, bool ignoreDuplicates) {
this->env.add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates);
env_->add_edge(graphId, tail, head, edgeLabel, ignoreDuplicates);
initialized = false;
}

void PyGEDEnv::clearGraph(std::size_t graphId) {
this->env.clear_graph(graphId);
env_->clear_graph(graphId);
initialized = false;
}

ged::ExchangeGraph<ged::GXLNodeID, ged::GXLLabel, ged::GXLLabel> PyGEDEnv::getGraph(std::size_t graphId) const {
return this->env.get_graph(graphId);
return env_->get_graph(graphId);
}

std::size_t PyGEDEnv::getGraphInternalId(std::size_t graphId) {
@@ -379,71 +388,71 @@ std::vector<std::vector<std::size_t>> PyGEDEnv::getGraphAdjacenceMatrix(std::siz
}

void PyGEDEnv::setEditCost(std::string editCost, std::vector<double> editCostConstants) {
this->env.set_edit_costs(translateEditCost(editCost), editCostConstants);
env_->set_edit_costs(translateEditCost(editCost), editCostConstants);
}

void PyGEDEnv::setPersonalEditCost(std::vector<double> editCostConstants) {
//this->env.set_edit_costs(Your EditCost Class(editCostConstants));
//env_->set_edit_costs(Your EditCost Class(editCostConstants));
}

// void PyGEDEnv::initEnv() {
// this->env.init();
// env_->init();
// initialized = true;
// }

void PyGEDEnv::initEnv(std::string initOption, bool print_to_stdout) {
this->env.init(translateInitOptions(initOption), print_to_stdout);
env_->init(translateInitOptions(initOption), print_to_stdout);
initialized = true;
}

void PyGEDEnv::setMethod(std::string method, const std::string & options) {
this->env.set_method(translateMethod(method), options);
env_->set_method(translateMethod(method), options);
}

void PyGEDEnv::initMethod() {
this->env.init_method();
env_->init_method();
}

double PyGEDEnv::getInitime() const {
return this->env.get_init_time();
return env_->get_init_time();
}

void PyGEDEnv::runMethod(std::size_t g, std::size_t h) {
this->env.run_method(g, h);
env_->run_method(g, h);
}

double PyGEDEnv::getUpperBound(std::size_t g, std::size_t h) const {
return this->env.get_upper_bound(g, h);
return env_->get_upper_bound(g, h);
}

double PyGEDEnv::getLowerBound(std::size_t g, std::size_t h) const {
return this->env.get_lower_bound(g, h);
return env_->get_lower_bound(g, h);
}

std::vector<long unsigned int> PyGEDEnv::getForwardMap(std::size_t g, std::size_t h) const {
return this->env.get_node_map(g, h).get_forward_map();
return env_->get_node_map(g, h).get_forward_map();
}

std::vector<long unsigned int> PyGEDEnv::getBackwardMap(std::size_t g, std::size_t h) const {
return this->env.get_node_map(g, h).get_backward_map();
return env_->get_node_map(g, h).get_backward_map();
}

std::size_t PyGEDEnv::getNodeImage(std::size_t g, std::size_t h, std::size_t nodeId) const {
return this->env.get_node_map(g, h).image(nodeId);
return env_->get_node_map(g, h).image(nodeId);
}

std::size_t PyGEDEnv::getNodePreImage(std::size_t g, std::size_t h, std::size_t nodeId) const {
return this->env.get_node_map(g, h).pre_image(nodeId);
return env_->get_node_map(g, h).pre_image(nodeId);
}

double PyGEDEnv::getInducedCost(std::size_t g, std::size_t h) const {
return this->env.get_node_map(g, h).induced_cost();
return env_->get_node_map(g, h).induced_cost();
}

std::vector<pair<std::size_t, std::size_t>> PyGEDEnv::getNodeMap(std::size_t g, std::size_t h) {
std::vector<pair<std::size_t, std::size_t>> res;
std::vector<ged::NodeMap::Assignment> relation;
this->env.get_node_map(g, h).as_relation(relation);
env_->get_node_map(g, h).as_relation(relation);
for (const auto & assignment : relation) {
res.push_back(std::make_pair(assignment.first, assignment.second));
}
@@ -493,11 +502,11 @@ std::vector<std::vector<unsigned long int>> PyGEDEnv::getAllMap(std::size_t g, s
}

double PyGEDEnv::getRuntime(std::size_t g, std::size_t h) const {
return this->env.get_runtime(g, h);
return env_->get_runtime(g, h);
}

bool PyGEDEnv::quasimetricCosts() const {
return this->env.quasimetric_costs();
return env_->quasimetric_costs();
}

std::vector<std::vector<size_t>> PyGEDEnv::hungarianLSAP(std::vector<std::vector<std::size_t>> matrixCost) {
@@ -542,73 +551,74 @@ std::vector<std::vector<double>> PyGEDEnv::hungarianLSAPE(std::vector<std::vecto
}

std::size_t PyGEDEnv::getNumNodeLabels() const {
return this->env.num_node_labels();
return env_->num_node_labels();
}

std::map<std::string, std::string> PyGEDEnv::getNodeLabel(std::size_t label_id) const {
return this->env.get_node_label(label_id);
return env_->get_node_label(label_id);
}

std::size_t PyGEDEnv::getNumEdgeLabels() const {
return this->env.num_edge_labels();
return env_->num_edge_labels();
}

std::map<std::string, std::string> PyGEDEnv::getEdgeLabel(std::size_t label_id) const {
return this->env.get_edge_label(label_id);
return env_->get_edge_label(label_id);
}

// std::size_t PyGEDEnv::getNumNodes(std::size_t graph_id) const {
// return this->env.get_num_nodes(graph_id);
// return env_->get_num_nodes(graph_id);
// }

double PyGEDEnv::getAvgNumNodes() const {
return this->env.get_avg_num_nodes();
return env_->get_avg_num_nodes();
}

double PyGEDEnv::getNodeRelCost(const std::map<std::string, std::string> & node_label_1, const std::map<std::string, std::string> & node_label_2) const {
return this->env.node_rel_cost(node_label_1, node_label_2);
return env_->node_rel_cost(node_label_1, node_label_2);
}

double PyGEDEnv::getNodeDelCost(const std::map<std::string, std::string> & node_label) const {
return this->env.node_del_cost(node_label);
return env_->node_del_cost(node_label);
}

double PyGEDEnv::getNodeInsCost(const std::map<std::string, std::string> & node_label) const {
return this->env.node_ins_cost(node_label);
return env_->node_ins_cost(node_label);
}

std::map<std::string, std::string> PyGEDEnv::getMedianNodeLabel(const std::vector<std::map<std::string, std::string>> & node_labels) const {
return this->env.median_node_label(node_labels);
return env_->median_node_label(node_labels);
}

double PyGEDEnv::getEdgeRelCost(const std::map<std::string, std::string> & edge_label_1, const std::map<std::string, std::string> & edge_label_2) const {
return this->env.edge_rel_cost(edge_label_1, edge_label_2);
return env_->edge_rel_cost(edge_label_1, edge_label_2);
}

double PyGEDEnv::getEdgeDelCost(const std::map<std::string, std::string> & edge_label) const {
return this->env.edge_del_cost(edge_label);
return env_->edge_del_cost(edge_label);
}

double PyGEDEnv::getEdgeInsCost(const std::map<std::string, std::string> & edge_label) const {
return this->env.edge_ins_cost(edge_label);
return env_->edge_ins_cost(edge_label);
}

std::map<std::string, std::string> PyGEDEnv::getMedianEdgeLabel(const std::vector<std::map<std::string, std::string>> & edge_labels) const {
return this->env.median_edge_label(edge_labels);
return env_->median_edge_label(edge_labels);
}

std::string PyGEDEnv::getInitType() const {
return initOptionsToString(this->env.get_init_type());
return initOptionsToString(env_->get_init_type());
}

void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {
ged::NodeMap node_map = this->env.get_node_map(g_id, h_id);
this->env.compute_induced_cost(g_id, h_id, node_map);
double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {
ged::NodeMap node_map = env_->get_node_map(g_id, h_id);
env_->compute_induced_cost(g_id, h_id, node_map);
return node_map.induced_cost();
}


// double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const {
// return this->env.ged_data_node_cost(label1, label2);
// return env_->ged_data_node_cost(label1, label2);
// }


@@ -630,7 +640,7 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {

/*loadGXLGraph(pathFolder, pathXML);
std::vector<std::size_t> graph_ids = getAllGraphIds();
std::size_t median_id = this->env.add_graph("median", "");
std::size_t median_id = env_->add_graph("median", "");

initEnv(initOption);

@@ -640,10 +650,10 @@ void PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {
median_estimator.set_options("--init-type RANDOM --randomness PSEUDO --seed " + seed);
median_estimator.run(graph_ids, median_id);
std::string gxl_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".gxl");
this->env.save_as_gxl_graph(median_id, gxl_file_name);*/
env_->save_as_gxl_graph(median_id, gxl_file_name);*/

/*std::string tikz_file_name("../output/gen_median_Letter_HIGH_" + letter_class + ".tex");
save_letter_graph_as_tikz_file(this->env.get_graph(median_id), tikz_file_name);*/
save_letter_graph_as_tikz_file(env_->get_graph(median_id), tikz_file_name);*/
//}

}


+ 133
- 1
gklearn/preimage/experiments/xp_median_preimage.py View File

@@ -12,6 +12,132 @@ from gklearn.preimage.utils import generate_median_preimages_by_class
from gklearn.utils import compute_gram_matrices_by_class


def xp_median_preimage_13_1():
"""xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC.
"""
# set parameters.
ds_name = 'PAH' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3, 0], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100, #
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'StructuralSP',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'NON_SYMBOLIC', #
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/'
irrelevant_labels = None #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs'] + ['random'] * 5:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)
def xp_median_preimage_13_2():
"""xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC.
"""
# set parameters.
ds_name = 'PAH' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3, 0], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'ShortestPath',
'edge_weight': None,
'node_kernels': sub_kernels,
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'NON_SYMBOLIC', #
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' #
irrelevant_labels = None #
edge_required = True #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs'] + ['random'] * 5: #
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)


def xp_median_preimage_12_1():
"""xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled.
"""
@@ -1969,7 +2095,13 @@ if __name__ == "__main__":
# xp_median_preimage_12_4()

#### xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled.
xp_median_preimage_12_5()
# xp_median_preimage_12_5()

#### xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC.
xp_median_preimage_13_1()

#### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC.
# xp_median_preimage_13_2()





+ 1
- 1
gklearn/preimage/median_preimage_generator.py View File

@@ -88,7 +88,7 @@ class MedianPreimageGenerator(PreimageGenerator):
node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
**self._kernel_options)
kernel_options=self._kernel_options)
# record start time.
start = time.time()


+ 3
- 2
gklearn/preimage/utils.py View File

@@ -25,7 +25,7 @@ import networkx as nx
import os


def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False):
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None):
import os.path
from gklearn.preimage import MedianPreimageGenerator
from gklearn.utils import split_dataset_by_target
@@ -38,7 +38,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
dataset_all.trim_dataset(edge_required=edge_required)
if irrelevant_labels is not None:
dataset_all.remove_labels(**irrelevant_labels)
# dataset_all.cut_graphs(range(0, 10))
if cut_range is not None:
dataset_all.cut_graphs(cut_range)
datasets = split_dataset_by_target(dataset_all)

if save_results:


+ 89
- 30
gklearn/utils/dataset.py View File

@@ -56,13 +56,14 @@ class Dataset(object):
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self.clean_labels()
def load_graphs(self, graphs, targets=None):
# this has to be followed by set_labels().
self.__graphs = graphs
self.__targets = targets
# self.set_labels_attrs()
# self.set_labels_attrs() # @todo
def load_predefined_dataset(self, ds_name):
@@ -128,6 +129,7 @@ class Dataset(object):
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self.clean_labels()

def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]):
@@ -141,27 +143,27 @@ class Dataset(object):
# @todo: remove labels which have only one possible values.
if node_labels is None:
self.__node_labels = self.__graphs[0].graph['node_labels']
# # graphs are considered node unlabeled if all nodes have the same label.
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False})
# # graphs are considered node unlabeled if all nodes have the same label.
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False})
if node_attrs is None:
self.__node_attrs = self.__graphs[0].graph['node_attrs']
# for G in Gn:
# for n in G.nodes(data=True):
# if 'attributes' in n[1]:
# return len(n[1]['attributes'])
# return 0
# for G in Gn:
# for n in G.nodes(data=True):
# if 'attributes' in n[1]:
# return len(n[1]['attributes'])
# return 0
if edge_labels is None:
self.__edge_labels = self.__graphs[0].graph['edge_labels']
# # graphs are considered edge unlabeled if all edges have the same label.
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False})
# # graphs are considered edge unlabeled if all edges have the same label.
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False})
if edge_attrs is None:
self.__edge_attrs = self.__graphs[0].graph['edge_attrs']
# for G in Gn:
# if nx.number_of_edges(G) > 0:
# for e in G.edges(data=True):
# if 'attributes' in e[2]:
# return len(e[2]['attributes'])
# return 0
# for G in Gn:
# if nx.number_of_edges(G) > 0:
# for e in G.edges(data=True):
# if 'attributes' in e[2]:
# return len(e[2]['attributes'])
# return 0
def get_dataset_infos(self, keys=None):
@@ -326,7 +328,7 @@ class Dataset(object):
if self.__node_label_nums is None:
self.__node_label_nums = {}
for node_label in self.__node_labels:
self.__node_label_nums[node_label] = self.get_node_label_num(node_label)
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label)
infos['node_label_nums'] = self.__node_label_nums
if 'edge_label_dim' in keys:
@@ -338,7 +340,7 @@ class Dataset(object):
if self.__edge_label_nums is None:
self.__edge_label_nums = {}
for edge_label in self.__edge_labels:
self.__edge_label_nums[edge_label] = self.get_edge_label_num(edge_label)
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label)
infos['edge_label_nums'] = self.__edge_label_nums
if 'directed' in keys or 'substructures' in keys:
@@ -417,30 +419,87 @@ class Dataset(object):
for g in self.__graphs:
for nd in g.nodes():
for nl in node_labels:
del g.nodes[nd][nl]
del g.nodes[nd][nl]
for na in node_attrs:
del g.nodes[nd][na]
for ed in g.edges():
for el in edge_labels:
del g.edges[ed][el]
del g.edges[ed][el]
for ea in edge_attrs:
del g.edges[ed][ea]
del g.edges[ed][ea]
if len(node_labels) > 0:
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
if len(edge_labels) > 0:
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
if len(node_attrs) > 0:
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
if len(edge_attrs) > 0:
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
def clean_labels(self):
labels = []
for name in self.__node_labels:
label = set()
for G in self.__graphs:
label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for nd in G.nodes():
del G.nodes[nd][name]
self.__node_labels = labels

labels = []
for name in self.__edge_labels:
label = set()
for G in self.__graphs:
label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for ed in G.edges():
del G.edges[ed][name]
self.__edge_labels = labels

labels = []
for name in self.__node_attrs:
label = set()
for G in self.__graphs:
label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for nd in G.nodes():
del G.nodes[nd][name]
self.__node_attrs = labels

labels = []
for name in self.__edge_attrs:
label = set()
for G in self.__graphs:
label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for ed in G.edges():
del G.edges[ed][name]
self.__edge_attrs = labels
def cut_graphs(self, range_):
self.__graphs = [self.__graphs[i] for i in range_]
if self.__targets is not None:
self.__targets = [self.__targets[i] for i in range_]
# @todo
# self.set_labels_attrs()
self.clean_labels()


def trim_dataset(self, edge_required=False):
@@ -451,8 +510,7 @@ class Dataset(object):
idx = [p[0] for p in trimed_pairs]
self.__graphs = [p[1] for p in trimed_pairs]
self.__targets = [self.__targets[i] for i in idx]
# @todo
# self.set_labels_attrs()
self.clean_labels()
def __get_dataset_size(self):
@@ -655,4 +713,5 @@ def split_dataset_by_target(dataset):
sub_dataset.load_graphs(sub_graphs, [key] * len(val))
sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs)
datasets.append(sub_dataset)
# @todo: clean_labels?
return datasets

Loading…
Cancel
Save