@@ -29,6 +29,7 @@ gklearn/kernels/*_sym.py | |||
gklearn/preimage/* | |||
!gklearn/preimage/*.py | |||
!gklearn/preimage/experiments/*.py | |||
__pycache__ | |||
##*# | |||
@@ -70,6 +70,7 @@ class MedianGraphEstimator(object): | |||
self.__num_increase_order = 0 | |||
self.__num_converged_descents = 0 | |||
self.__state = AlgorithmState.TERMINATED | |||
self.__label_names = {} | |||
if ged_env is None: | |||
raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.') | |||
@@ -551,6 +552,7 @@ class MedianGraphEstimator(object): | |||
self.__init_type_increase_order = 'K-MEANS++' | |||
self.__max_itrs_increase_order = 10 | |||
self.__print_to_stdout = 2 | |||
self.__label_names = {} | |||
def __construct_initial_medians(self, graph_ids, timer, initial_medians): | |||
@@ -824,19 +826,49 @@ class MedianGraphEstimator(object): | |||
for node in g.nodes: | |||
cost += 0 | |||
def set_label_names(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
self.__label_names = {'node_labels': node_labels, 'edge_labels': edge_labels, | |||
'node_attrs': node_attrs, 'edge_attrs': edge_attrs} | |||
def __get_median_node_label(self, node_labels): | |||
if True: | |||
if len(self.__label_names['node_labels']) > 0: | |||
return self.__get_median_label_symbolic(node_labels) | |||
elif len(self.__label_names['node_attrs']) > 0: | |||
return self.__get_median_label_nonsymbolic(node_labels) | |||
else: | |||
return self.__get_median_node_label_symbolic(node_labels) | |||
raise Exception('Node label names are not given.') | |||
def __get_median_edge_label(self, edge_labels): | |||
if True: | |||
if len(self.__label_names['edge_labels']) > 0: | |||
return self.__get_median_label_symbolic(edge_labels) | |||
elif len(self.__label_names['edge_attrs']) > 0: | |||
return self.__get_median_label_nonsymbolic(edge_labels) | |||
else: | |||
return self.__get_median_edge_label_symbolic(edge_labels) | |||
raise Exception('Edge label names are not given.') | |||
def __get_median_label_symbolic(self, labels): | |||
# Construct histogram. | |||
hist = {} | |||
for label in labels: | |||
label = tuple([kv for kv in label.items()]) # @todo: this may be slow. | |||
if label not in hist: | |||
hist[label] = 1 | |||
else: | |||
hist[label] += 1 | |||
# Return the label that appears most frequently. | |||
best_count = 0 | |||
median_label = {} | |||
for label, count in hist.items(): | |||
if count > best_count: | |||
best_count = count | |||
median_label = {kv[0]: kv[1] for kv in label} | |||
return median_label | |||
def __get_median_label_nonsymbolic(self, labels): | |||
@@ -896,14 +928,10 @@ class MedianGraphEstimator(object): | |||
for key, val in median.items(): | |||
median_label[key] = str(val) | |||
return median_label | |||
def __get_median_node_label_symbolic(self, node_labels): | |||
pass | |||
def __get_median_edge_label_symbolic(self, edge_labels): | |||
pass | |||
# def __get_median_edge_label_symbolic(self, edge_labels): | |||
# pass | |||
# def __get_median_edge_label_nonsymbolic(self, edge_labels): | |||
@@ -9,6 +9,10 @@ Created on Wed Apr 1 15:12:31 2020 | |||
def constant_node_costs(edit_cost_name): | |||
if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER': | |||
return False | |||
elif edit_cost_name == 'CONSTANT': | |||
return True | |||
else: | |||
raise Exception('Can not recognize the given edit cost. Possible edit costs include: "NON_SYMBOLIC", "LETTER", "LETTER2", "CONSTANT".') | |||
# elif edit_cost_name != '': | |||
# # throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>"); | |||
# return False | |||
@@ -58,7 +58,8 @@ def compute_geds(graphs, options={}, parallel=False): | |||
ged_env.init_method() | |||
# compute ged. | |||
neo_options = {'edit_cost': options['edit_cost'], | |||
neo_options = {'edit_cost': options['edit_cost'], | |||
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], | |||
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} | |||
ged_mat = np.zeros((len(graphs), len(graphs))) | |||
if parallel: | |||
@@ -147,12 +148,18 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, ** | |||
edge_attrs = kwargs.get('edge_attrs', []) | |||
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, | |||
node_attrs=node_attrs, edge_attrs=edge_attrs) | |||
elif edit_cost == 'CONSTANT': | |||
node_labels = kwargs.get('node_labels', []) | |||
edge_labels = kwargs.get('edge_labels', []) | |||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, | |||
node_labels=node_labels, edge_labels=edge_labels) | |||
else: | |||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map) | |||
def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map): | |||
"""Compute the number of each edit operations. | |||
def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, | |||
node_labels=[], edge_labels=[]): | |||
"""Compute the number of each edit operations for symbolic-labeled graphs. | |||
""" | |||
n_vi = 0 | |||
n_vr = 0 | |||
@@ -165,8 +172,13 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map): | |||
for i, map_i in enumerate(forward_map): | |||
if map_i == np.inf: | |||
n_vr += 1 | |||
elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']: | |||
n_vs += 1 | |||
else: | |||
for nl in node_labels: | |||
label1 = g1.nodes[nodes1[i]][nl] | |||
label2 = g2.nodes[map_i][nl] | |||
if label1 != label2: | |||
n_vs += 1 | |||
break | |||
for map_i in backward_map: | |||
if map_i == np.inf: | |||
n_vi += 1 | |||
@@ -185,15 +197,21 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map): | |||
elif (forward_map[idx1], forward_map[idx2]) in g2.edges(): | |||
nb_edges2_cnted += 1 | |||
# edge labels are different. | |||
if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \ | |||
!= g1.edges[(n1, n2)]['bond_type']: | |||
for el in edge_labels: | |||
label1 = g2.edges[((forward_map[idx1], forward_map[idx2]))][el] | |||
label2 = g1.edges[(n1, n2)][el] | |||
if label1 != label2: | |||
n_es += 1 | |||
break | |||
elif (forward_map[idx2], forward_map[idx1]) in g2.edges(): | |||
nb_edges2_cnted += 1 | |||
# edge labels are different. | |||
if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \ | |||
!= g1.edges[(n1, n2)]['bond_type']: | |||
n_es += 1 | |||
for el in edge_labels: | |||
label1 = g2.edges[((forward_map[idx2], forward_map[idx1]))][el] | |||
label2 = g1.edges[(n1, n2)][el] | |||
if label1 != label2: | |||
n_es += 1 | |||
break | |||
# corresponding nodes are in g2, however the edge is removed. | |||
else: | |||
n_er += 1 | |||
@@ -262,6 +262,8 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
self.__edit_cost_constants = self.__init_ecc | |||
options = self.__ged_options.copy() | |||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo | |||
options['node_labels'] = self._dataset.node_labels | |||
options['edge_labels'] = self._dataset.edge_labels | |||
options['node_attrs'] = self._dataset.node_attrs | |||
options['edge_attrs'] = self._dataset.edge_attrs | |||
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | |||
@@ -302,6 +304,8 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
# compute new GEDs and numbers of edit operations. | |||
options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) | |||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo | |||
options['node_labels'] = self._dataset.node_labels | |||
options['edge_labels'] = self._dataset.edge_labels | |||
options['node_attrs'] = self._dataset.node_attrs | |||
options['edge_attrs'] = self._dataset.edge_attrs | |||
ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | |||
@@ -451,7 +455,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
@@ -524,17 +528,17 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif is_n_attr and not is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.execute_cvx(prob) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif not is_n_attr and is_e_attr: | |||
@@ -544,7 +548,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||
residual = np.sqrt(prob.value) | |||
else: | |||
@@ -553,10 +557,20 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||
x.value[2:], np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
else: | |||
# # method 1: simple least square method. | |||
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||
@@ -588,7 +602,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
@@ -647,6 +661,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||
# Select the GED algorithm. | |||
mge.set_options(mge_options_to_string(options)) | |||
mge.set_label_names(node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
node_attrs=self._dataset.node_attrs, | |||
edge_attrs=self._dataset.edge_attrs) | |||
mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
@@ -37,7 +37,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
dataset_all.trim_dataset(edge_required=edge_required) | |||
if irrelevant_labels is not None: | |||
dataset_all.remove_labels(**irrelevant_labels) | |||
# dataset_all.cut_graphs(range(0, 100)) | |||
# dataset_all.cut_graphs(range(0, 10)) | |||
datasets = split_dataset_by_target(dataset_all) | |||
if save_results: | |||
@@ -67,24 +67,7 @@ class Dataset(object): | |||
def load_predefined_dataset(self, ds_name): | |||
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | |||
if ds_name == 'Letter-high': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-med': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-high/Letter-med_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-low': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-high/Letter-low_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Fingerprint': | |||
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'SYNTHETIC': | |||
pass | |||
elif ds_name == 'SYNTHETICnew': | |||
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Synthie': | |||
if ds_name == 'acyclic': | |||
pass | |||
elif ds_name == 'COIL-DEL': | |||
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||
@@ -95,9 +78,31 @@ class Dataset(object): | |||
elif ds_name == 'COLORS-3': | |||
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Fingerprint': | |||
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'FRANKENSTEIN': | |||
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-high': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-low': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-high/Letter-low_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-med': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-high/Letter-med_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'MUTAG': | |||
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'SYNTHETIC': | |||
pass | |||
elif ds_name == 'SYNTHETICnew': | |||
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Synthie': | |||
pass | |||
self.__node_labels = label_names['node_labels'] | |||
self.__node_attrs = label_names['node_attrs'] | |||