Browse Source

Update and fix bugs in MGE:

1. avoid decreasing all median nodes.
2. in __compute_initial_node_labels(), stop appending elements to median_labels when all node_labels are already selected.
3. return False directly in __update_node_label() when node_labels is empty.
4. solve the bug that median is not modified in place in __delete_node_from_median().
v0.2.x
jajupmochi 5 years ago
parent
commit
7293234d6c
5 changed files with 59 additions and 68 deletions
  1. +45
    -59
      gklearn/ged/median/median_graph_estimator.py
  2. +8
    -2
      gklearn/preimage/experiments/xp_1nn.py
  3. +3
    -4
      gklearn/preimage/kernel_knn_cv.py
  4. +2
    -2
      gklearn/preimage/median_preimage_generator.py
  5. +1
    -1
      gklearn/utils/dataset.py

+ 45
- 59
gklearn/ged/median/median_graph_estimator.py View File

@@ -370,7 +370,9 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
self.__ged_env.init(self.__ged_env.get_init_type())
# Compute node maps and sum of distances for initial median.
# xxx = self.__node_maps_from_median
self.__compute_init_node_maps(graph_ids, gen_median_id)
# yyy = self.__node_maps_from_median
self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
self.__ged_env.load_nx_graph(median, set_median_id)
@@ -557,7 +559,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
def __median_available(self):
return self.__gen_median_id != np.inf
return self.__median_id != np.inf
def get_state(self):
@@ -827,6 +829,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
def __update_node_labels(self, graphs, median):
# print('----------------------------')
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -834,14 +837,15 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
# Iterate through all nodes of the median.
for i in range(0, nx.number_of_nodes(median)):
# print('i: ', i)
# print('i: ', i)
# Collect the labels of the substituted nodes.
node_labels = []
for graph_id, graph in graphs.items():
# print('graph_id: ', graph_id)
# print(self.__node_maps_from_median[graph_id])
# print('graph_id: ', graph_id)
# print(self.__node_maps_from_median[graph_id])
# print(self.__node_maps_from_median[graph_id].get_forward_map(), self.__node_maps_from_median[graph_id].get_backward_map())
k = self.__node_maps_from_median[graph_id].image(i)
# print('k: ', k)
# print('k: ', k)
if k != np.inf:
node_labels.append(graph.nodes[k])
@@ -961,6 +965,11 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
if self.__print_to_stdout == 2:
print('Trying to decrease order: ... ', end='')
if nx.number_of_nodes(median) <= 1:
if self.__print_to_stdout == 2:
print('median graph has only 1 node, skip decrease.')
return False
# Initialize ID of the node that is to be deleted.
id_deleted_node = [None] # @todo: or np.inf
decreased_order = False
@@ -968,7 +977,11 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
# Decrease the order as long as the best deletion delta is negative.
while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon:
decreased_order = True
median = self.__delete_node_from_median(id_deleted_node[0], median)
self.__delete_node_from_median(id_deleted_node[0], median)
if nx.number_of_nodes(median) <= 1:
if self.__print_to_stdout == 2:
print('decrease stopped because median graph remains only 1 node. ', end='')
break
# Print information about current iteration.
if self.__print_to_stdout == 2:
@@ -1011,16 +1024,22 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
def __delete_node_from_median(self, id_deleted_node, median):
# Update the median.
mapping = {}
for i in range(0, nx.number_of_nodes(median)):
if i != id_deleted_node:
new_i = (i if i < id_deleted_node else (i - 1))
mapping[i] = new_i
median.remove_node(id_deleted_node)
median = nx.convert_node_labels_to_integers(median, first_label=0, ordering='default', label_attribute=None) # @todo: This doesn't guarantee that the order is the same as in G.
nx.relabel_nodes(median, mapping, copy=False)
# Update the node maps.
# xxx = self.__node_maps_from_median
for key, node_map in self.__node_maps_from_median.items():
new_node_map = NodeMap(nx.number_of_nodes(median), node_map.num_target_nodes())
is_unassigned_target_node = [True] * node_map.num_target_nodes()
for i in range(0, nx.number_of_nodes(median) + 1):
if i != id_deleted_node:
new_i = (i if i < id_deleted_node else i - 1)
new_i = (i if i < id_deleted_node else (i - 1))
k = node_map.image(i)
new_node_map.add_assignment(new_i, k)
if k != np.inf:
@@ -1028,13 +1047,12 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
for k in range(0, node_map.num_target_nodes()):
if is_unassigned_target_node[k]:
new_node_map.add_assignment(np.inf, k)
# print(self.__node_maps_from_median[key].get_forward_map(), self.__node_maps_from_median[key].get_backward_map())
# print(new_node_map.get_forward_map(), new_node_map.get_backward_map())
self.__node_maps_from_median[key] = new_node_map
# Increase overall number of decreases.
self.__num_decrease_order += 1
return median
def __increase_order(self, graphs, median):
@@ -1230,15 +1248,22 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
continue
for label in median_labels:
weights[label_id] = min(weights[label_id], self.__ged_env.get_node_rel_cost(dict(label), dict(node_labels[label_id])))
sum_weight = np.sum(weights)
if sum_weight == 0:
p = np.array([1 / len(weights)] * len(weights))
else:
p = np.array(weights) / np.sum(weights)
selected_label_id = urng.choice(range(0, len(weights)), size=1, p=p)[0] # for c++ test: xxx[iii]
# get non-zero weights.
weights_p, idx_p = [], []
for i, w in enumerate(weights):
if w != 0:
weights_p.append(w)
idx_p.append(i)
if len(weights_p) > 0:
p = np.array(weights_p) / np.sum(weights_p)
selected_label_id = urng.choice(range(0, len(weights_p)), size=1, p=p)[0] # for c++ test: xxx[iii]
selected_label_id = idx_p[selected_label_id]
# iii += 1 for c++ test
median_labels.append(node_labels[selected_label_id])
already_selected[selected_label_id] = True
median_labels.append(node_labels[selected_label_id])
already_selected[selected_label_id] = True
else: # skip the loop when all node_labels are selected. This happens when len(node_labels) <= self.__num_inits_increase_order.
break
else:
# Compute the initial node medians as the medians of randomly generated clusters of (roughly) equal size.
# @todo: go through and test.
@@ -1315,6 +1340,8 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
def __update_node_label(self, node_labels, node_label):
if len(node_labels) == 0: # @todo: check if this is the correct solution. Especially after calling __update_config().
return False
new_node_label = self.__get_median_node_label(node_labels)
if self.__ged_env.get_node_rel_cost(new_node_label, node_label) > self.__epsilon:
node_label.clear()
@@ -1360,47 +1387,6 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
# Increase overall number of increases.
self.__num_increase_order += 1
def __improve_sum_of_distances(self, timer):
pass
def __median_available(self):
return self.__median_id != np.inf
# def __get_node_image_from_map(self, node_map, node):
# """
# Return ID of the node mapping of `node` in `node_map`.

# Parameters
# ----------
# node_map : list[tuple(int, int)]
# List of node maps where the mapping node is found.
#
# node : int
# The mapping node of this node is returned

# Raises
# ------
# Exception
# If the node with ID `node` is not contained in the source nodes of the node map.

# Returns
# -------
# int
# ID of the mapping of `node`.
#
# Notes
# -----
# This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
# """
# if node < len(node_map):
# return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
# else:
# raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
# return np.inf
def __are_graphs_equal(self, g1, g2):


+ 8
- 2
gklearn/preimage/experiments/xp_1nn.py View File

@@ -31,7 +31,7 @@ def xp_knn_1_1():
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [0.675, 0.675, 0.75, 0.425, 0.425],
'ds_name': ds_name,
'parallel': True, # False
'parallel': False, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
@@ -100,4 +100,10 @@ def xp_knn_1_1():


if __name__ == '__main__':
xp_knn_1_1()
import pdb, traceback
try:
xp_knn_1_1()
except:
extype, value, tb = sys.exc_info()
traceback.print_exc()
pdb.post_mortem(tb)

+ 3
- 4
gklearn/preimage/kernel_knn_cv.py View File

@@ -29,7 +29,6 @@ def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_opti
dataset_all.remove_labels(**irrelevant_labels)
if cut_range is not None:
dataset_all.cut_graphs(cut_range)
# datasets = split_dataset_by_target(dataset_all)

if save_results:
# create result files.
@@ -86,7 +85,7 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
median_set = G_app[i_start:i_end]
dataset = dataset_all.copy()
dataset.load_graphs(median_set.copy(), targets=None)
dataset.load_graphs([g.copy() for g in median_set], targets=None)
mge_options['update_order'] = True
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy()
mpg_options['runtime_precompute_gm'] = 0
@@ -104,7 +103,7 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
for i_app, G_app in enumerate(medians):
# compute dis_mat between medians.
dataset = dataset_all.copy()
dataset.load_graphs(G_app.copy(), targets=None)
dataset.load_graphs([g.copy() for g in G_app], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix.
@@ -204,7 +203,7 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g
print('\nperforming k-nn...')
# compute dis_mat between medians.
dataset = dataset_all.copy()
dataset.load_graphs(best_graphs.copy(), targets=None)
dataset.load_graphs([g.copy() for g in best_graphs], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix.


+ 2
- 2
gklearn/preimage/median_preimage_generator.py View File

@@ -891,8 +891,8 @@ class MedianPreimageGenerator(PreimageGenerator):
ged_options = self.__ged_options.copy()
if self.__parallel:
ged_options['threads'] = 1
self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(ged_options))
self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(ged_options))
self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options))
self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options))
# Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id)


+ 1
- 1
gklearn/utils/dataset.py View File

@@ -525,7 +525,7 @@ class Dataset(object):
def copy(self):
dataset = Dataset()
graphs = self.__graphs.copy() if self.__graphs is not None else None
graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None
target = self.__targets.copy() if self.__targets is not None else None
node_labels = self.__node_labels.copy() if self.__node_labels is not None else None
node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None


Loading…
Cancel
Save