Browse Source

1. add function to get median node/edge label in MedianGraphEstimator.

2. update load_tud function.
3. update MedianPreimageGenerator.
v0.2.x
jajupmochi 5 years ago
parent
commit
66e18c93e1
5 changed files with 201 additions and 45 deletions
  1. +144
    -3
      gklearn/ged/median/median_graph_estimator.py
  2. +34
    -29
      gklearn/preimage/median_preimage_generator.py
  3. +6
    -4
      gklearn/preimage/utils.py
  4. +4
    -2
      gklearn/utils/dataset.py
  5. +13
    -7
      gklearn/utils/graph_files.py

+ 144
- 3
gklearn/ged/median/median_graph_estimator.py View File

@@ -666,7 +666,8 @@ class MedianGraphEstimator(object):
# Compute the median label and update the median. # Compute the median label and update the median.
if len(node_labels) > 0: if len(node_labels) > 0:
median_label = self.__ged_env.get_median_node_label(node_labels)
# median_label = self.__ged_env.get_median_node_label(node_labels)
median_label = self.__get_median_node_label(node_labels)
if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon: if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
nx.set_node_attributes(median, {i: median_label}) nx.set_node_attributes(median, {i: median_label})
@@ -701,7 +702,7 @@ class MedianGraphEstimator(object):
if median.has_edge(i, j): if median.has_edge(i, j):
median_label = median.edges[(i, j)] median_label = median.edges[(i, j)]
if self.__labeled_edges and len(edge_labels) > 0: if self.__labeled_edges and len(edge_labels) > 0:
new_median_label = self.__ged_env.median_edge_label(edge_labels)
new_median_label = self.__get_median_edge_label(edge_labels)
if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon: if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon:
median_label = new_median_label median_label = new_median_label
for edge_label in edge_labels: for edge_label in edge_labels:
@@ -821,4 +822,144 @@ class MedianGraphEstimator(object):
def compute_my_cost(g, h, node_map): def compute_my_cost(g, h, node_map):
cost = 0.0 cost = 0.0
for node in g.nodes: for node in g.nodes:
cost += 0
cost += 0
def __get_median_node_label(self, node_labels):
if True:
return self.__get_median_label_nonsymbolic(node_labels)
else:
return self.__get_median_node_label_symbolic(node_labels)
def __get_median_edge_label(self, edge_labels):
if True:
return self.__get_median_label_nonsymbolic(edge_labels)
else:
return self.__get_median_edge_label_symbolic(edge_labels)
def __get_median_label_nonsymbolic(self, labels):
if len(labels) == 0:
return {} # @todo
else:
# Transform the labels into coordinates and compute mean label as initial solution.
labels_as_coords = []
sums = {}
for key, val in labels[0].items():
sums[key] = 0
for label in labels:
coords = {}
for key, val in label.items():
label = float(val)
sums[key] += label
coords[key] = label
labels_as_coords.append(coords)
median = {}
for key, val in sums.items():
median[key] = val / len(labels)
# Run main loop of Weiszfeld's Algorithm.
epsilon = 0.0001
delta = 1.0
num_itrs = 0
all_equal = False
while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
numerator = {}
for key, val in sums.items():
numerator[key] = 0
denominator = 0
for label_as_coord in labels_as_coords:
norm = 0
for key, val in label_as_coord.items():
norm += (val - median[key]) ** 2
norm += np.sqrt(norm)
if norm > 0:
for key, val in label_as_coord.items():
numerator[key] += val / norm
denominator += 1.0 / norm
if denominator == 0:
all_equal = True
else:
new_median = {}
delta = 0.0
for key, val in numerator.items():
this_median = val / denominator
new_median[key] = this_median
delta += np.abs(median[key] - this_median)
median = new_median
num_itrs += 1
# Transform the solution to strings and return it.
median_label = {}
for key, val in median.items():
median_label[key] = str(val)
return median_label
def __get_median_node_label_symbolic(self, node_labels):
pass

def __get_median_edge_label_symbolic(self, edge_labels):
pass
# def __get_median_edge_label_nonsymbolic(self, edge_labels):
# if len(edge_labels) == 0:
# return {}
# else:
# # Transform the labels into coordinates and compute mean label as initial solution.
# edge_labels_as_coords = []
# sums = {}
# for key, val in edge_labels[0].items():
# sums[key] = 0
# for edge_label in edge_labels:
# coords = {}
# for key, val in edge_label.items():
# label = float(val)
# sums[key] += label
# coords[key] = label
# edge_labels_as_coords.append(coords)
# median = {}
# for key, val in sums.items():
# median[key] = val / len(edge_labels)
#
# # Run main loop of Weiszfeld's Algorithm.
# epsilon = 0.0001
# delta = 1.0
# num_itrs = 0
# all_equal = False
# while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
# numerator = {}
# for key, val in sums.items():
# numerator[key] = 0
# denominator = 0
# for edge_label_as_coord in edge_labels_as_coords:
# norm = 0
# for key, val in edge_label_as_coord.items():
# norm += (val - median[key]) ** 2
# norm += np.sqrt(norm)
# if norm > 0:
# for key, val in edge_label_as_coord.items():
# numerator[key] += val / norm
# denominator += 1.0 / norm
# if denominator == 0:
# all_equal = True
# else:
# new_median = {}
# delta = 0.0
# for key, val in numerator.items():
# this_median = val / denominator
# new_median[key] = this_median
# delta += np.abs(median[key] - this_median)
# median = new_median
#
# num_itrs += 1
#
# # Transform the solution to ged::GXLLabel and return it.
# median_label = {}
# for key, val in median.items():
# median_label[key] = str(val)
# return median_label

+ 34
- 29
gklearn/preimage/median_preimage_generator.py View File

@@ -96,7 +96,10 @@ class MedianPreimageGenerator(PreimageGenerator):
if self.__runtime_precompute_gm is None: if self.__runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
end_precompute_gm = time.time() end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm start -= self.__runtime_precompute_gm
@@ -447,31 +450,7 @@ class MedianPreimageGenerator(PreimageGenerator):
constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints) prob = cp.Problem(cp.Minimize(cost_fun), constraints)
try:
prob.solve(verbose=True)
except MemoryError as error0:
if self._verbose >= 2:
print('\nUsing solver "OSQP" caused a memory error.')
print('the original error message is\n', error0)
print('solver status: ', prob.status)
print('trying solver "CVXOPT" instead...\n')
try:
prob.solve(solver=cp.CVXOPT, verbose=True)
except Exception as error1:
if self._verbose >= 2:
print('\nAn error occured when using solver "CVXOPT".')
print('the original error message is\n', error1)
print('solver status: ', prob.status)
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
prob.solve(solver=cp.MOSEK, verbose=True)
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
if self._verbose >= 2:
print()
self.__execute_cvx(prob)
edit_costs_new = x.value edit_costs_new = x.value
residual = np.sqrt(prob.value) residual = np.sqrt(prob.value)
elif rw_constraints == '2constraints': elif rw_constraints == '2constraints':
@@ -551,9 +530,7 @@ class MedianPreimageGenerator(PreimageGenerator):
constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints) prob = cp.Problem(cp.Minimize(cost_fun), constraints)
prob.solve()
if self._verbose >= 2:
print(x.value)
self.execute_cvx(prob)
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) edit_costs_new = np.concatenate((x.value, np.array([0.0])))
residual = np.sqrt(prob.value) residual = np.sqrt(prob.value)
elif not is_n_attr and is_e_attr: elif not is_n_attr and is_e_attr:
@@ -616,6 +593,34 @@ class MedianPreimageGenerator(PreimageGenerator):
return edit_costs_new, residual return edit_costs_new, residual
def __execute_cvx(self, prob):
try:
prob.solve(verbose=(self._verbose>=2))
except MemoryError as error0:
if self._verbose >= 2:
print('\nUsing solver "OSQP" caused a memory error.')
print('the original error message is\n', error0)
print('solver status: ', prob.status)
print('trying solver "CVXOPT" instead...\n')
try:
prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2))
except Exception as error1:
if self._verbose >= 2:
print('\nAn error occured when using solver "CVXOPT".')
print('the original error message is\n', error1)
print('solver status: ', prob.status)
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2))
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
if self._verbose >= 2:
print()

def __generate_preimage_iam(self): def __generate_preimage_iam(self):
# Set up the ged environment. # Set up the ged environment.
ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible.


+ 6
- 4
gklearn/preimage/utils.py View File

@@ -67,8 +67,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
if gmfile_exist: if gmfile_exist:
gmfile = np.load(gm_fname)
gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list']
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']]
time_precompute_gm_list = gmfile['run_time_list'].tolist() time_precompute_gm_list = gmfile['run_time_list'].tolist()
else: else:
gram_matrix_unnorm_list = [] gram_matrix_unnorm_list = []
@@ -87,6 +87,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
print('start generating preimage for each class of target...') print('start generating preimage for each class of target...')
idx_offset = 0
for idx, dataset in enumerate(datasets): for idx, dataset in enumerate(datasets):
target = dataset.targets[0] target = dataset.targets[0]
print('\ntarget =', target, '\n') print('\ntarget =', target, '\n')
@@ -96,14 +97,15 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
num_graphs = len(dataset.graphs) num_graphs = len(dataset.graphs)
if num_graphs < 2: if num_graphs < 2:
print('\nnumber of graphs = ', num_graphs, ', skip.\n') print('\nnumber of graphs = ', num_graphs, ', skip.\n')
idx_offset += 1
continue continue
# 2. set parameters. # 2. set parameters.
print('2. initializing mpg and setting parameters...') print('2. initializing mpg and setting parameters...')
if load_gm: if load_gm:
if gmfile_exist: if gmfile_exist:
mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx]
mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx]
mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset]
mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset]
mpg = MedianPreimageGenerator() mpg = MedianPreimageGenerator()
mpg.dataset = dataset mpg.dataset = dataset
mpg.set_options(**mpg_options.copy()) mpg.set_options(**mpg_options.copy())


+ 4
- 2
gklearn/utils/dataset.py View File

@@ -92,9 +92,11 @@ class Dataset(object):
elif ds_name == 'COIL-RAG': elif ds_name == 'COIL-RAG':
pass pass
elif ds_name == 'COLORS-3': elif ds_name == 'COLORS-3':
pass
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'FRANKENSTEIN': elif ds_name == 'FRANKENSTEIN':
pass
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self.__node_labels = label_names['node_labels'] self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs'] self.__node_attrs = label_names['node_attrs']


+ 13
- 7
gklearn/utils/graph_files.py View File

@@ -541,10 +541,21 @@ def load_tud(filename):


content_gi = open(fgi).read().splitlines() # graph indicator content_gi = open(fgi).read().splitlines() # graph indicator
content_am = open(fam).read().splitlines() # adjacency matrix content_am = open(fam).read().splitlines() # adjacency matrix
content_gl = open(fgl).read().splitlines() # graph labels
# load targets.
if 'fgl' in locals():
content_targets = open(fgl).read().splitlines() # targets (classification)
targets = [float(i) for i in content_targets]
elif 'fga' in locals():
content_targets = open(fga).read().splitlines() # targets (regression)
targets = [int(i) for i in content_targets]
if 'class_label_map' in locals():
targets = [class_label_map[t] for t in targets]
else:
raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.')


# create graphs and add nodes # create graphs and add nodes
data = [nx.Graph(name=str(i)) for i in range(0, len(content_gl))]
data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))]
if 'fnl' in locals(): if 'fnl' in locals():
content_nl = open(fnl).read().splitlines() # node labels content_nl = open(fnl).read().splitlines() # node labels
for idx, line in enumerate(content_gi): for idx, line in enumerate(content_gi):
@@ -619,11 +630,6 @@ def load_tud(filename):
for i, a_name in enumerate(label_names['edge_attrs']): for i, a_name in enumerate(label_names['edge_attrs']):
data[g].edges[n[0], n[1]][a_name] = attrs[i] data[g].edges[n[0], n[1]][a_name] = attrs[i]


# load targets.
targets = [int(i) for i in content_gl]
if 'class_label_map' in locals():
targets = [class_label_map[t] for t in targets]

return data, targets, label_names return data, targets, label_names






Loading…
Cancel
Save