Browse Source

New translations median_preimage_generator_cml.py (French)

l10n_v0.2.x
linlin 4 years ago
parent
commit
9005ceeb75
1 changed files with 221 additions and 221 deletions
  1. +221
    -221
      lang/fr/gklearn/preimage/median_preimage_generator_cml.py

+ 221
- 221
lang/fr/gklearn/preimage/median_preimage_generator_cml.py View File

@@ -27,69 +27,69 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset)
### arguments to set.
self.__mge = None
self.__ged_options = {}
self.__mge_options = {}
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__ds_name = None
self._mge = None
self._ged_options = {}
self._mge_options = {}
# self._fit_method = 'k-graphs'
self._init_method = 'random'
self._init_ecc = None
self._parallel = True
self._n_jobs = multiprocessing.cpu_count()
self._ds_name = None
# for cml.
self.__time_limit_in_sec = 0
self.__max_itrs = 100
self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1
self.__allow_zeros = True
# self.__triangle_rule = True
self._time_limit_in_sec = 0
self._max_itrs = 100
self._max_itrs_without_update = 3
self._epsilon_residual = 0.01
self._epsilon_ec = 0.1
self._allow_zeros = True
# self._triangle_rule = True
### values to compute.
self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__set_median = None
self.__gen_median = None
self.__best_from_dataset = None
self.__sod_set_median = None
self.__sod_gen_median = None
self.__k_dis_set_median = None
self.__k_dis_gen_median = None
self.__k_dis_dataset = None
self.__node_label_costs = None
self.__edge_label_costs = None
self._runtime_optimize_ec = None
self._runtime_generate_preimage = None
self._runtime_total = None
self._set_median = None
self._gen_median = None
self._best_from_dataset = None
self._sod_set_median = None
self._sod_gen_median = None
self._k_dis_set_median = None
self._k_dis_gen_median = None
self._k_dis_dataset = None
self._node_label_costs = None
self._edge_label_costs = None
# for cml.
self.__itrs = 0
self.__converged = False
self.__num_updates_ecs = 0
self._itrs = 0
self._converged = False
self._num_updates_ecs = 0
### values that can be set or to be computed.
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
self._edit_cost_constants = []
self._gram_matrix_unnorm = None
self._runtime_precompute_gm = None

def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {})
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__ds_name = kwargs.get('ds_name', None)
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 100)
self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', True)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
self._ged_options = kwargs.get('ged_options', {})
self._mge_options = kwargs.get('mge_options', {})
# self._fit_method = kwargs.get('fit_method', 'k-graphs')
self._init_method = kwargs.get('init_method', 'random')
self._init_ecc = kwargs.get('init_ecc', None)
self._edit_cost_constants = kwargs.get('edit_cost_constants', [])
self._parallel = kwargs.get('parallel', True)
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._ds_name = kwargs.get('ds_name', None)
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self._max_itrs = kwargs.get('max_itrs', 100)
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self._epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self._epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self._allow_zeros = kwargs.get('allow_zeros', True)
# self._triangle_rule = kwargs.get('triangle_rule', True)
def run(self):
@@ -105,48 +105,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
start = time.time()
# 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
if self._gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
self._runtime_precompute_gm = end_precompute_gm - start
else:
if self.__runtime_precompute_gm is None:
if self._runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self._runtime_precompute_gm
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset':
# start = time.time()
# self.__runtime_precompute_gm = 0
# self._runtime_precompute_gm = 0
# end_precompute_gm = start
# 2. optimize edit cost constants.
self.__optimize_edit_cost_vector()
self._optimize_edit_cost_vector()
end_optimize_ec = time.time()
self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm
self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm
# 3. compute set median and gen median using optimized edit costs.
if self._verbose >= 2:
print('\nstart computing set median and gen median using optimized edit costs...\n')
self.__gmg_bcu()
self._gmg_bcu()
end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self.__runtime_total = end_generate_preimage - start
self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self._runtime_total = end_generate_preimage - start
if self._verbose >= 2:
print('medians computed.')
print('SOD of the set median: ', self.__sod_set_median)
print('SOD of the generalized median: ', self.__sod_gen_median)
print('SOD of the set median: ', self._sod_set_median)
print('SOD of the generalized median: ', self._sod_gen_median)
# 4. compute kernel distances to the true median.
if self._verbose >= 2:
print('\nstart computing distances to true median....\n')
self.__compute_distances_to_true_median()
self._compute_distances_to_true_median()

# 5. print out results.
if self._verbose:
@@ -154,145 +154,145 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
print('================================================================================')
print('Finished generation of preimages.')
print('--------------------------------------------------------------------------------')
print('The optimized edit costs:', self.__edit_cost_constants)
print('SOD of the set median:', self.__sod_set_median)
print('SOD of the generalized median:', self.__sod_gen_median)
print('Distance in kernel space for set median:', self.__k_dis_set_median)
print('Distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to optimize edit costs:', self.__runtime_optimize_ec)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating edit costs:', self.__num_updates_ecs)
print('Is optimization of edit costs converged:', self.__converged)
print('The optimized edit costs:', self._edit_cost_constants)
print('SOD of the set median:', self._sod_set_median)
print('SOD of the generalized median:', self._sod_gen_median)
print('Distance in kernel space for set median:', self._k_dis_set_median)
print('Distance in kernel space for generalized median:', self._k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to optimize edit costs:', self._runtime_optimize_ec)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('Total number of iterations for optimizing:', self._itrs)
print('Total number of updating edit costs:', self._num_updates_ecs)
print('Is optimization of edit costs converged:', self._converged)
print('================================================================================')
print()


def get_results(self):
results = {}
results['edit_cost_constants'] = self.__edit_cost_constants
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_optimize_ec'] = self.__runtime_optimize_ec
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['sod_set_median'] = self.__sod_set_median
results['sod_gen_median'] = self.__sod_gen_median
results['k_dis_set_median'] = self.__k_dis_set_median
results['k_dis_gen_median'] = self.__k_dis_gen_median
results['k_dis_dataset'] = self.__k_dis_dataset
results['itrs'] = self.__itrs
results['converged'] = self.__converged
results['num_updates_ecc'] = self.__num_updates_ecs
results['edit_cost_constants'] = self._edit_cost_constants
results['runtime_precompute_gm'] = self._runtime_precompute_gm
results['runtime_optimize_ec'] = self._runtime_optimize_ec
results['runtime_generate_preimage'] = self._runtime_generate_preimage
results['runtime_total'] = self._runtime_total
results['sod_set_median'] = self._sod_set_median
results['sod_gen_median'] = self._sod_gen_median
results['k_dis_set_median'] = self._k_dis_set_median
results['k_dis_gen_median'] = self._k_dis_gen_median
results['k_dis_dataset'] = self._k_dis_dataset
results['itrs'] = self._itrs
results['converged'] = self._converged
results['num_updates_ecc'] = self._num_updates_ecs
results['mge'] = {}
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents()
results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents()
return results

def __optimize_edit_cost_vector(self):
def _optimize_edit_cost_vector(self):
"""Learn edit cost vector.
"""
# Initialize label costs randomly.
if self.__init_method == 'random':
if self._init_method == 'random':
# Initialize label costs.
self.__initialize_label_costs()
self._initialize_label_costs()
# Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
self._optimize_ecm_by_kernel_distances()
# Initialize all label costs with the same value.
elif self.__init_method == 'uniform': # random
elif self._init_method == 'uniform': # random
pass
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'LETTER2':
elif self._fit_method == 'random': # random
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = random.sample(range(1, 1000), 3)
self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'LETTER2':
random.seed(time.time())
self.__edit_cost_constants = random.sample(range(1, 1000), 5)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 5)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._dataset.node_attrs == []:
self.__edit_cost_constants[2] = 0
self._edit_cost_constants[2] = 0
if self._dataset.edge_attrs == []:
self.__edit_cost_constants[5] = 0
self._edit_cost_constants[5] = 0
else:
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._verbose >= 2:
print('edit cost constants used:', self.__edit_cost_constants)
elif self.__fit_method == 'expert': # expert
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
print('edit cost constants used:', self._edit_cost_constants)
elif self._fit_method == 'expert': # expert
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
else:
self.__edit_cost_constants = [3, 3, 1, 3, 3, 1]
self._edit_cost_constants = [3, 3, 1, 3, 3, 1]
else:
self.__edit_cost_constants = self.__init_ecc
elif self.__fit_method == 'k-graphs':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__init_ecc = [0, 0, 1, 1, 1, 0]
self._edit_cost_constants = self._init_ecc
elif self._fit_method == 'k-graphs':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._init_ecc = [0, 0, 1, 1, 1, 0]
if self._dataset.node_attrs == []:
self.__init_ecc[2] = 0
self._init_ecc[2] = 0
if self._dataset.edge_attrs == []:
self.__init_ecc[5] = 0
self._init_ecc[5] = 0
else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimize on the k-graph subset.
self.__optimize_ecm_by_kernel_distances()
elif self.__fit_method == 'whole-dataset':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
self._optimize_ecm_by_kernel_distances()
elif self._fit_method == 'whole-dataset':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimizeon the whole set.
self.__optimize_ecc_by_kernel_distances()
elif self.__fit_method == 'precomputed':
self._optimize_ecc_by_kernel_distances()
elif self._fit_method == 'precomputed':
pass
def __initialize_label_costs(self):
self.__initialize_node_label_costs()
self.__initialize_edge_label_costs()
def _initialize_label_costs(self):
self._initialize_node_label_costs()
self._initialize_edge_label_costs()
def __initialize_node_label_costs(self):
def _initialize_node_label_costs(self):
# Get list of node labels.
nls = self._dataset.get_all_node_labels()
# Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__node_label_costs = rand_costs
self._node_label_costs = rand_costs


def __initialize_edge_label_costs(self):
def _initialize_edge_label_costs(self):
# Get list of edge labels.
els = self._dataset.get_all_edge_labels()
# Generate random costs.
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els))
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__edge_label_costs = rand_costs
self._edge_label_costs = rand_costs
def __optimize_ecm_by_kernel_distances(self):
def _optimize_ecm_by_kernel_distances(self):
# compute distances in feature space.
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
dis_k_vec = []
@@ -303,35 +303,35 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
dis_k_vec = np.array(dis_k_vec)
# Set GEDEnv options.
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
# self.__edit_cost_constants = self.__init_ecc
options = self.__ged_options.copy()
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed.
# graphs = [self._clean_graph(g) for g in self._dataset.graphs]
# self._edit_cost_constants = self._init_ecc
options = self._ged_options.copy()
options['edit_cost_constants'] = self._edit_cost_constants # @todo: not needed.
options['node_labels'] = self._dataset.node_labels
options['edge_labels'] = self._dataset.edge_labels
# options['node_attrs'] = self._dataset.node_attrs
# options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
options['edge_label_costs'] = self.__edge_label_costs
options['node_label_costs'] = self._node_label_costs
options['edge_label_costs'] = self._edge_label_costs
# Learner cost matrices.
# Initialize cost learner.
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec)
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self._parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self._time_limit_in_sec, max_itrs=self._max_itrs, max_itrs_without_update=self._max_itrs_without_update, epsilon_residual=self._epsilon_residual, epsilon_ec=self._epsilon_ec)
# Run cost learner.
cml.update(dis_k_vec, self._dataset.graphs, options)
# Get results.
results = cml.get_results()
self.__converged = results['converged']
self.__itrs = results['itrs']
self.__num_updates_ecs = results['num_updates_ecs']
self._converged = results['converged']
self._itrs = results['itrs']
self._num_updates_ecs = results['num_updates_ecs']
cost_list = results['cost_list']
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)]
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):]
self._node_label_costs = cost_list[-1][0:len(self._node_label_costs)]
self._edge_label_costs = cost_list[-1][len(self._node_label_costs):]

def __gmg_bcu(self):
def _gmg_bcu(self):
"""
The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).

@@ -343,77 +343,77 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
# Set up the ged environment.
ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible.
# gedlibpy.restart_env()
ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants)
graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants)
graphs = [self._clean_graph(g) for g in self._dataset.graphs]
for g in graphs:
ged_env.add_nx_graph(g, '')
graph_ids = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels))
node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels))
ged_env.set_label_costs(node_label_costs, edge_label_costs)
set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median')
ged_env.init(init_type=self.__ged_options['init_option'])
ged_env.init(init_type=self._ged_options['init_option'])
# Set up the madian graph estimator.
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy()
self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost']))
self._mge.set_refine_method(self._ged_options['method'], self._ged_options)
options = self._mge_options.copy()
if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
options['parallel'] = self.__parallel
options['parallel'] = self._parallel
# Select the GED algorithm.
self.__mge.set_options(mge_options_to_string(options))
self.__mge.set_label_names(node_labels=self._dataset.node_labels,
self._mge.set_options(mge_options_to_string(options))
self._mge.set_label_names(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs)
ged_options = self.__ged_options.copy()
if self.__parallel:
ged_options = self._ged_options.copy()
if self._parallel:
ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
self._mge.set_init_method(ged_options['method'], ged_options)
self._mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id)
self._mge.run(graph_ids, set_median_id, gen_median_id)
# Get SODs.
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
self._sod_set_median = self._mge.get_sum_of_distances('initialized')
self._sod_gen_median = self._mge.get_sum_of_distances('converged')
# Get median graphs.
self.__set_median = ged_env.get_nx_graph(set_median_id)
self.__gen_median = ged_env.get_nx_graph(gen_median_id)
self._set_median = ged_env.get_nx_graph(set_median_id)
self._gen_median = ged_env.get_nx_graph(gen_median_id)
def __compute_distances_to_true_median(self):
def _compute_distances_to_true_median(self):
# compute distance in kernel space for set median.
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options)
if self._kernel_options['normalize']:
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernel_sm = 1
# @todo: not correct kernel value
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1)
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_sm, withterm3=False)
# compute distance in kernel space for generalized median.
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options)
if self._kernel_options['normalize']:
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernel_gm = 1
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1)
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False)
@@ -424,19 +424,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False))
idx_k_dis_median_set_min = np.argmin(k_dis_median_set)
self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
if self._verbose >= 2:
print()
print('distance in kernel space for set median:', self.__k_dis_set_median)
print('distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('distance in kernel space for set median:', self._k_dis_set_median)
print('distance in kernel space for generalized median:', self._k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('distance in kernel space for each graph in median set:', k_dis_median_set)
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
"""
Cleans node and edge labels and attributes of the given graph.
"""
@@ -458,63 +458,63 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
@property
def mge(self):
return self.__mge
return self._mge
@property
def ged_options(self):
return self.__ged_options
return self._ged_options

@ged_options.setter
def ged_options(self, value):
self.__ged_options = value
self._ged_options = value

@property
def mge_options(self):
return self.__mge_options
return self._mge_options

@mge_options.setter
def mge_options(self, value):
self.__mge_options = value
self._mge_options = value


@property
def fit_method(self):
return self.__fit_method
return self._fit_method

@fit_method.setter
def fit_method(self, value):
self.__fit_method = value
self._fit_method = value
@property
def init_ecc(self):
return self.__init_ecc
return self._init_ecc

@init_ecc.setter
def init_ecc(self, value):
self.__init_ecc = value
self._init_ecc = value
@property
def set_median(self):
return self.__set_median
return self._set_median


@property
def gen_median(self):
return self.__gen_median
return self._gen_median
@property
def best_from_dataset(self):
return self.__best_from_dataset
return self._best_from_dataset
@property
def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
return self._gram_matrix_unnorm
@gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value
self._gram_matrix_unnorm = value

Loading…
Cancel
Save