Browse Source

Fix bugs when computing reduced costs in MGE.

v0.2.x
jajupmochi 5 years ago
parent
commit
61d96d52d1
9 changed files with 877 additions and 201 deletions
  1. +45
    -6
      gklearn/ged/median/median_graph_estimator.py
  2. +2
    -2
      gklearn/ged/median/test_median_graph_estimator.py
  3. +752
    -174
      gklearn/gedlib/gedlibpy.cpp
  4. BIN
      gklearn/gedlib/gedlibpy.cpython-36m-x86_64-linux-gnu.so
  5. +12
    -2
      gklearn/gedlib/gedlibpy.pyx
  6. +1
    -1
      gklearn/gedlib/src/GedLibBind.hpp
  7. +27
    -2
      gklearn/gedlib/src/GedLibBind.ipp
  8. +13
    -9
      gklearn/preimage/median_preimage_generator.py
  9. +25
    -5
      gklearn/preimage/utils.py

+ 45
- 6
gklearn/ged/median/median_graph_estimator.py View File

@@ -15,7 +15,7 @@ import sys
import networkx as nx import networkx as nx




class MedianGraphEstimator(object):
class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined node?
def __init__(self, ged_env, constant_node_costs): def __init__(self, ged_env, constant_node_costs):
"""Constructor. """Constructor.
@@ -377,7 +377,7 @@ class MedianGraphEstimator(object):
self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances) self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
self.__ged_env.load_nx_graph(median, set_median_id) self.__ged_env.load_nx_graph(median, set_median_id)
print(self.__best_init_sum_of_distances)
# print(self.__best_init_sum_of_distances)
# Print information about current iteration. # Print information about current iteration.
if self.__print_to_stdout == 2: if self.__print_to_stdout == 2:
@@ -400,7 +400,7 @@ class MedianGraphEstimator(object):
decreased_order = False decreased_order = False
increased_order = False increased_order = False
# Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!!
# Update the median.
median_modified = self.__update_median(graphs, median) median_modified = self.__update_median(graphs, median)
if self.__update_order: if self.__update_order:
if not median_modified or self.__itrs[median_pos] == 0: if not median_modified or self.__itrs[median_pos] == 0:
@@ -434,6 +434,7 @@ class MedianGraphEstimator(object):
# Compute induced costs of the old node maps w.r.t. the updated median. # Compute induced costs of the old node maps w.r.t. the updated median.
for graph_id in graph_ids: for graph_id in graph_ids:
# print(self.__node_maps_from_median[graph_id].induced_cost()) # print(self.__node_maps_from_median[graph_id].induced_cost())
# xxx = self.__node_maps_from_median[graph_id]
self.__ged_env.compute_induced_cost(gen_median_id, graph_id, self.__node_maps_from_median[graph_id]) self.__ged_env.compute_induced_cost(gen_median_id, graph_id, self.__node_maps_from_median[graph_id])
# print('---------------------------------------') # print('---------------------------------------')
# print(self.__node_maps_from_median[graph_id].induced_cost()) # print(self.__node_maps_from_median[graph_id].induced_cost())
@@ -444,7 +445,7 @@ class MedianGraphEstimator(object):
print('done.') print('done.')
# Update the node maps. # Update the node maps.
node_maps_modified = self.__update_node_maps() # @todo
node_maps_modified = self.__update_node_maps()


# Update the order of the median if no improvement can be found with the current order. # Update the order of the median if no improvement can be found with the current order.
@@ -592,6 +593,44 @@ class MedianGraphEstimator(object):
if state == 'converged': if state == 'converged':
return self.__converged_sum_of_distances return self.__converged_sum_of_distances
return self.__sum_of_distances return self.__sum_of_distances


def get_runtime(self, state):
if not self.__median_available():
raise Exception('No median has been computed. Call run() before calling get_runtime().')
if state == AlgorithmState.INITIALIZED:
return self.__runtime_initialized
if state == AlgorithmState.CONVERGED:
return self.__runtime_converged
return self.__runtime

def get_num_itrs(self):
if not self.__median_available():
raise Exception('No median has been computed. Call run() before calling get_num_itrs().')
return self.__itrs


def get_num_times_order_decreased(self):
if not self.__median_available():
raise Exception('No median has been computed. Call run() before calling get_num_times_order_decreased().')
return self.__num_decrease_order
def get_num_times_order_increased(self):
if not self.__median_available():
raise Exception('No median has been computed. Call run() before calling get_num_times_order_increased().')
return self.__num_increase_order
def get_num_converged_descents(self):
if not self.__median_available():
raise Exception('No median has been computed. Call run() before calling get_num_converged_descents().')
return self.__num_converged_descents
def get_ged_env(self):
return self.__ged_env
def __set_default_options(self): def __set_default_options(self):
@@ -814,7 +853,7 @@ class MedianGraphEstimator(object):
decreased_order = False decreased_order = False
# Decrease the order as long as the best deletion delta is negative. # Decrease the order as long as the best deletion delta is negative.
while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon: # @todo
while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon:
decreased_order = True decreased_order = True
median = self.__delete_node_from_median(id_deleted_node[0], median) median = self.__delete_node_from_median(id_deleted_node[0], median)
@@ -896,7 +935,7 @@ class MedianGraphEstimator(object):
increased_order = False increased_order = False
# Increase the order as long as the best insertion delta is negative. # Increase the order as long as the best insertion delta is negative.
while self.__compute_best_insertion_delta(graphs, best_config, best_label) > - self.__epsilon:
while self.__compute_best_insertion_delta(graphs, best_config, best_label) < - self.__epsilon: # @todo
increased_order = True increased_order = True
self.__add_node_to_median(best_config, best_label, median) self.__add_node_to_median(best_config, best_label, median)


+ 2
- 2
gklearn/ged/median/test_median_graph_estimator.py View File

@@ -155,5 +155,5 @@ def test_median_graph_estimator_symb():




if __name__ == '__main__': if __name__ == '__main__':
set_median, gen_median = test_median_graph_estimator()
# set_median, gen_median = test_median_graph_estimator_symb()
# set_median, gen_median = test_median_graph_estimator()
set_median, gen_median = test_median_graph_estimator_symb()

+ 752
- 174
gklearn/gedlib/gedlibpy.cpp
File diff suppressed because it is too large
View File


BIN
gklearn/gedlib/gedlibpy.cpython-36m-x86_64-linux-gnu.so View File


+ 12
- 2
gklearn/gedlib/gedlibpy.pyx View File

@@ -105,7 +105,7 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged":
map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except + map[string, string] getMedianEdgeLabel(vector[map[string, string]] & edge_labels) except +
string getInitType() except + string getInitType() except +
# double getNodeCost(size_t label1, size_t label2) except + # double getNodeCost(size_t label1, size_t label2) except +
double computeInducedCost(size_t g_id, size_t h_id) except +
double computeInducedCost(size_t g_id, size_t h_id, vector[pair[size_t,size_t]]) except +
############################# #############################
@@ -1356,7 +1356,17 @@ cdef class GEDEnv:
------- -------
None. None.
""" """
induced_cost = self.c_env.computeInducedCost(g_id, h_id)
relation = []
node_map.as_relation(relation)
# print(relation)
dummy_node = get_dummy_node()
# print(dummy_node)
for i, val in enumerate(relation):
val1 = dummy_node if val[0] == np.inf else val[0]
val2 = dummy_node if val[1] == np.inf else val[1]
relation[i] = tuple((val1, val2))
# print(relation)
induced_cost = self.c_env.computeInducedCost(g_id, h_id, relation)
node_map.set_induced_cost(induced_cost) node_map.set_induced_cost(induced_cost)




+ 1
- 1
gklearn/gedlib/src/GedLibBind.hpp View File

@@ -477,7 +477,7 @@ public:
* @param[in] h_id ID of input graph. * @param[in] h_id ID of input graph.
* @return Computed induced cost. * @return Computed induced cost.
*/ */
double computeInducedCost(std::size_t g_id, std::size_t h_id) const;
double computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const;


// /*! // /*!
// * @brief Returns node relabeling, insertion, or deletion cost. // * @brief Returns node relabeling, insertion, or deletion cost.


+ 27
- 2
gklearn/gedlib/src/GedLibBind.ipp View File

@@ -610,13 +610,38 @@ std::string PyGEDEnv::getInitType() const {
return initOptionsToString(env_->get_init_type()); return initOptionsToString(env_->get_init_type());
} }


double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id) const {
ged::NodeMap node_map = env_->get_node_map(g_id, h_id);
double PyGEDEnv::computeInducedCost(std::size_t g_id, std::size_t h_id, std::vector<pair<std::size_t, std::size_t>> relation) const {
ged::NodeMap node_map = ged::NodeMap(env_->get_num_nodes(g_id), env_->get_num_nodes(h_id));
for (const auto & assignment : relation) {
node_map.add_assignment(assignment.first, assignment.second);
// std::cout << assignment.first << assignment.second << endl;
}
const std::vector<ged::GEDGraph::NodeID> forward_map = node_map.get_forward_map();
for (std::size_t i{0}; i < node_map.num_source_nodes(); i++) {
if (forward_map.at(i) == ged::GEDGraph::undefined_node()) {
node_map.add_assignment(i, ged::GEDGraph::dummy_node());
}
}
const std::vector<ged::GEDGraph::NodeID> backward_map = node_map.get_backward_map();
for (std::size_t i{0}; i < node_map.num_target_nodes(); i++) {
if (backward_map.at(i) == ged::GEDGraph::undefined_node()) {
node_map.add_assignment(ged::GEDGraph::dummy_node(), i);
}
}
// for (auto & map : node_map.get_forward_map()) {
// std::cout << map << ", ";
// }
// std::cout << endl;
// for (auto & map : node_map.get_backward_map()) {
// std::cout << map << ", ";
// }
env_->compute_induced_cost(g_id, h_id, node_map); env_->compute_induced_cost(g_id, h_id, node_map);
return node_map.induced_cost(); return node_map.induced_cost();
} }






// double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const { // double PyGEDEnv::getNodeCost(std::size_t label1, std::size_t label2) const {
// return env_->ged_data_node_cost(label1, label2); // return env_->ged_data_node_cost(label1, label2);
// } // }


+ 13
- 9
gklearn/preimage/median_preimage_generator.py View File

@@ -187,6 +187,10 @@ class MedianPreimageGenerator(PreimageGenerator):
results['itrs'] = self.__itrs results['itrs'] = self.__itrs
results['converged'] = self.__converged results['converged'] = self.__converged
results['num_updates_ecc'] = self.__num_updates_ecc results['num_updates_ecc'] = self.__num_updates_ecc
results['mge'] = {}
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents()
return results return results


@@ -660,27 +664,27 @@ class MedianPreimageGenerator(PreimageGenerator):
ged_env.init(init_option=self.__ged_options['init_option']) ged_env.init(init_option=self.__ged_options['init_option'])
# Set up the madian graph estimator. # Set up the madian graph estimator.
mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
options = self.__mge_options.copy() options = self.__mge_options.copy()
if not 'seed' in options: if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
# Select the GED algorithm. # Select the GED algorithm.
mge.set_options(mge_options_to_string(options))
mge.set_label_names(node_labels=self._dataset.node_labels,
self.__mge.set_options(mge_options_to_string(options))
self.__mge.set_label_names(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels, edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs, node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs) edge_attrs=self._dataset.edge_attrs)
mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
# Run the estimator. # Run the estimator.
mge.run(graph_ids, set_median_id, gen_median_id)
self.__mge.run(graph_ids, set_median_id, gen_median_id)
# Get SODs. # Get SODs.
self.__sod_set_median = mge.get_sum_of_distances('initialized')
self.__sod_gen_median = mge.get_sum_of_distances('converged')
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
# Get median graphs. # Get median graphs.
self.__set_median = ged_env.get_nx_graph(set_median_id) self.__set_median = ged_env.get_nx_graph(set_median_id)


+ 25
- 5
gklearn/preimage/utils.py View File

@@ -58,6 +58,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
itrs_list = [] itrs_list = []
converged_list = [] converged_list = []
num_updates_ecc_list = [] num_updates_ecc_list = []
mge_decrease_order_list = []
mge_increase_order_list = []
mge_converged_order_list = []
nb_sod_sm2gm = [0, 0, 0] nb_sod_sm2gm = [0, 0, 0]
nb_dis_k_sm2gm = [0, 0, 0] nb_dis_k_sm2gm = [0, 0, 0]
nb_dis_k_gi2sm = [0, 0, 0] nb_dis_k_gi2sm = [0, 0, 0]
@@ -149,7 +152,10 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
results['runtime_precompute_gm'], results['runtime_optimize_ec'], results['runtime_precompute_gm'], results['runtime_optimize_ec'],
results['runtime_generate_preimage'], results['runtime_total'], results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['converged'], results['itrs'], results['converged'],
results['num_updates_ecc']])
results['num_updates_ecc'],
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge
results['mge']['num_increase_order'] > 0,
results['mge']['num_converged_descents'] > 0])
f_detail.close() f_detail.close()
# compute result summary. # compute result summary.
@@ -165,6 +171,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
itrs_list.append(results['itrs']) itrs_list.append(results['itrs'])
converged_list.append(results['converged']) converged_list.append(results['converged'])
num_updates_ecc_list.append(results['num_updates_ecc']) num_updates_ecc_list.append(results['num_updates_ecc'])
mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0)
mge_increase_order_list.append(results['mge']['num_increase_order'] > 0)
mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0)
# # SOD SM -> GM # # SOD SM -> GM
if results['sod_set_median'] > results['sod_gen_median']: if results['sod_set_median'] > results['sod_gen_median']:
nb_sod_sm2gm[0] += 1 nb_sod_sm2gm[0] += 1
@@ -211,7 +220,11 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
results['runtime_precompute_gm'], results['runtime_optimize_ec'], results['runtime_precompute_gm'], results['runtime_optimize_ec'],
results['runtime_generate_preimage'], results['runtime_total'], results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['converged'], results['itrs'], results['converged'],
results['num_updates_ecc'], nb_sod_sm2gm,
results['num_updates_ecc'],
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge
results['mge']['num_increase_order'] > 0,
results['mge']['num_converged_descents'] > 0,
nb_sod_sm2gm,
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm])
f_summary.close() f_summary.close()
@@ -257,6 +270,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
itrs_mean = np.mean(itrs_list) itrs_mean = np.mean(itrs_list)
num_converged = np.sum(converged_list) num_converged = np.sum(converged_list)
num_updates_ecc_mean = np.mean(num_updates_ecc_list) num_updates_ecc_mean = np.mean(num_updates_ecc_list)
num_mge_decrease_order = np.sum(mge_decrease_order_list)
num_mge_increase_order = np.sum(mge_increase_order_list)
num_mge_converged = np.sum(mge_converged_order_list)
sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean))
dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
@@ -271,7 +287,9 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
dis_k_gi2sm_mean, dis_k_gi2gm_mean, dis_k_gi2sm_mean, dis_k_gi2gm_mean,
time_precompute_gm_mean, time_optimize_ec_mean, time_precompute_gm_mean, time_optimize_ec_mean,
time_generate_mean, time_total_mean, itrs_mean, time_generate_mean, time_total_mean, itrs_mean,
num_converged, num_updates_ecc_mean])
num_converged, num_updates_ecc_mean,
num_mge_decrease_order, num_mge_increase_order,
num_mge_converged])
f_summary.close() f_summary.close()
# save total pairwise kernel distances. # save total pairwise kernel distances.
@@ -301,7 +319,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output):
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm',
'time optimize ec', 'time generate preimage', 'time total', 'time optimize ec', 'time generate preimage', 'time total',
'itrs', 'converged', 'num updates ecc'])
'itrs', 'converged', 'num updates ecc', 'mge decrease order',
'mge increase order', 'mge converged'])
f_detail.close() f_detail.close()
# fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' # fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
@@ -313,7 +332,8 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output):
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec',
'time generate preimage', 'time total', 'itrs', 'num converged', 'time generate preimage', 'time total', 'itrs', 'num converged',
'num updates ecc', '# SOD SM -> GM', '# dis_k SM -> GM',
'num updates ecc', 'mge num decrease order', 'mge num increase order',
'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM',
'# dis_k gi -> SM', '# dis_k gi -> GM']) '# dis_k gi -> SM', '# dis_k gi -> GM'])
# 'repeats better SOD SM -> GM', # 'repeats better SOD SM -> GM',
# 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', # 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM',


Loading…
Cancel
Save