@@ -29,6 +29,7 @@ gklearn/kernels/*_sym.py | |||||
gklearn/preimage/* | gklearn/preimage/* | ||||
!gklearn/preimage/*.py | !gklearn/preimage/*.py | ||||
!gklearn/preimage/experiments/*.py | |||||
__pycache__ | __pycache__ | ||||
##*# | ##*# | ||||
@@ -1,6 +1,10 @@ | |||||
language: python | language: python | ||||
python: | python: | ||||
- '3.6.9' | |||||
- '3.0' | |||||
- '3.1' | |||||
- '3.2' | |||||
- '3.3' | |||||
- '3.4' | |||||
- '3.5' | - '3.5' | ||||
- '3.6' | - '3.6' | ||||
- '3.7' | - '3.7' | ||||
@@ -70,6 +70,7 @@ class MedianGraphEstimator(object): | |||||
self.__num_increase_order = 0 | self.__num_increase_order = 0 | ||||
self.__num_converged_descents = 0 | self.__num_converged_descents = 0 | ||||
self.__state = AlgorithmState.TERMINATED | self.__state = AlgorithmState.TERMINATED | ||||
self.__label_names = {} | |||||
if ged_env is None: | if ged_env is None: | ||||
raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.') | raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.') | ||||
@@ -551,6 +552,7 @@ class MedianGraphEstimator(object): | |||||
self.__init_type_increase_order = 'K-MEANS++' | self.__init_type_increase_order = 'K-MEANS++' | ||||
self.__max_itrs_increase_order = 10 | self.__max_itrs_increase_order = 10 | ||||
self.__print_to_stdout = 2 | self.__print_to_stdout = 2 | ||||
self.__label_names = {} | |||||
def __construct_initial_medians(self, graph_ids, timer, initial_medians): | def __construct_initial_medians(self, graph_ids, timer, initial_medians): | ||||
@@ -666,7 +668,8 @@ class MedianGraphEstimator(object): | |||||
# Compute the median label and update the median. | # Compute the median label and update the median. | ||||
if len(node_labels) > 0: | if len(node_labels) > 0: | ||||
median_label = self.__ged_env.get_median_node_label(node_labels) | |||||
# median_label = self.__ged_env.get_median_node_label(node_labels) | |||||
median_label = self.__get_median_node_label(node_labels) | |||||
if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon: | if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon: | ||||
nx.set_node_attributes(median, {i: median_label}) | nx.set_node_attributes(median, {i: median_label}) | ||||
@@ -701,7 +704,7 @@ class MedianGraphEstimator(object): | |||||
if median.has_edge(i, j): | if median.has_edge(i, j): | ||||
median_label = median.edges[(i, j)] | median_label = median.edges[(i, j)] | ||||
if self.__labeled_edges and len(edge_labels) > 0: | if self.__labeled_edges and len(edge_labels) > 0: | ||||
new_median_label = self.__ged_env.median_edge_label(edge_labels) | |||||
new_median_label = self.__get_median_edge_label(edge_labels) | |||||
if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon: | if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon: | ||||
median_label = new_median_label | median_label = new_median_label | ||||
for edge_label in edge_labels: | for edge_label in edge_labels: | ||||
@@ -821,4 +824,170 @@ class MedianGraphEstimator(object): | |||||
def compute_my_cost(g, h, node_map): | def compute_my_cost(g, h, node_map): | ||||
cost = 0.0 | cost = 0.0 | ||||
for node in g.nodes: | for node in g.nodes: | ||||
cost += 0 | |||||
cost += 0 | |||||
def set_label_names(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||||
self.__label_names = {'node_labels': node_labels, 'edge_labels': edge_labels, | |||||
'node_attrs': node_attrs, 'edge_attrs': edge_attrs} | |||||
def __get_median_node_label(self, node_labels): | |||||
if len(self.__label_names['node_labels']) > 0: | |||||
return self.__get_median_label_symbolic(node_labels) | |||||
elif len(self.__label_names['node_attrs']) > 0: | |||||
return self.__get_median_label_nonsymbolic(node_labels) | |||||
else: | |||||
raise Exception('Node label names are not given.') | |||||
def __get_median_edge_label(self, edge_labels): | |||||
if len(self.__label_names['edge_labels']) > 0: | |||||
return self.__get_median_label_symbolic(edge_labels) | |||||
elif len(self.__label_names['edge_attrs']) > 0: | |||||
return self.__get_median_label_nonsymbolic(edge_labels) | |||||
else: | |||||
raise Exception('Edge label names are not given.') | |||||
def __get_median_label_symbolic(self, labels): | |||||
# Construct histogram. | |||||
hist = {} | |||||
for label in labels: | |||||
label = tuple([kv for kv in label.items()]) # @todo: this may be slow. | |||||
if label not in hist: | |||||
hist[label] = 1 | |||||
else: | |||||
hist[label] += 1 | |||||
# Return the label that appears most frequently. | |||||
best_count = 0 | |||||
median_label = {} | |||||
for label, count in hist.items(): | |||||
if count > best_count: | |||||
best_count = count | |||||
median_label = {kv[0]: kv[1] for kv in label} | |||||
return median_label | |||||
def __get_median_label_nonsymbolic(self, labels): | |||||
if len(labels) == 0: | |||||
return {} # @todo | |||||
else: | |||||
# Transform the labels into coordinates and compute mean label as initial solution. | |||||
labels_as_coords = [] | |||||
sums = {} | |||||
for key, val in labels[0].items(): | |||||
sums[key] = 0 | |||||
for label in labels: | |||||
coords = {} | |||||
for key, val in label.items(): | |||||
label = float(val) | |||||
sums[key] += label | |||||
coords[key] = label | |||||
labels_as_coords.append(coords) | |||||
median = {} | |||||
for key, val in sums.items(): | |||||
median[key] = val / len(labels) | |||||
# Run main loop of Weiszfeld's Algorithm. | |||||
epsilon = 0.0001 | |||||
delta = 1.0 | |||||
num_itrs = 0 | |||||
all_equal = False | |||||
while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): | |||||
numerator = {} | |||||
for key, val in sums.items(): | |||||
numerator[key] = 0 | |||||
denominator = 0 | |||||
for label_as_coord in labels_as_coords: | |||||
norm = 0 | |||||
for key, val in label_as_coord.items(): | |||||
norm += (val - median[key]) ** 2 | |||||
norm += np.sqrt(norm) | |||||
if norm > 0: | |||||
for key, val in label_as_coord.items(): | |||||
numerator[key] += val / norm | |||||
denominator += 1.0 / norm | |||||
if denominator == 0: | |||||
all_equal = True | |||||
else: | |||||
new_median = {} | |||||
delta = 0.0 | |||||
for key, val in numerator.items(): | |||||
this_median = val / denominator | |||||
new_median[key] = this_median | |||||
delta += np.abs(median[key] - this_median) | |||||
median = new_median | |||||
num_itrs += 1 | |||||
# Transform the solution to strings and return it. | |||||
median_label = {} | |||||
for key, val in median.items(): | |||||
median_label[key] = str(val) | |||||
return median_label | |||||
# def __get_median_edge_label_symbolic(self, edge_labels): | |||||
# pass | |||||
# def __get_median_edge_label_nonsymbolic(self, edge_labels): | |||||
# if len(edge_labels) == 0: | |||||
# return {} | |||||
# else: | |||||
# # Transform the labels into coordinates and compute mean label as initial solution. | |||||
# edge_labels_as_coords = [] | |||||
# sums = {} | |||||
# for key, val in edge_labels[0].items(): | |||||
# sums[key] = 0 | |||||
# for edge_label in edge_labels: | |||||
# coords = {} | |||||
# for key, val in edge_label.items(): | |||||
# label = float(val) | |||||
# sums[key] += label | |||||
# coords[key] = label | |||||
# edge_labels_as_coords.append(coords) | |||||
# median = {} | |||||
# for key, val in sums.items(): | |||||
# median[key] = val / len(edge_labels) | |||||
# | |||||
# # Run main loop of Weiszfeld's Algorithm. | |||||
# epsilon = 0.0001 | |||||
# delta = 1.0 | |||||
# num_itrs = 0 | |||||
# all_equal = False | |||||
# while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): | |||||
# numerator = {} | |||||
# for key, val in sums.items(): | |||||
# numerator[key] = 0 | |||||
# denominator = 0 | |||||
# for edge_label_as_coord in edge_labels_as_coords: | |||||
# norm = 0 | |||||
# for key, val in edge_label_as_coord.items(): | |||||
# norm += (val - median[key]) ** 2 | |||||
# norm += np.sqrt(norm) | |||||
# if norm > 0: | |||||
# for key, val in edge_label_as_coord.items(): | |||||
# numerator[key] += val / norm | |||||
# denominator += 1.0 / norm | |||||
# if denominator == 0: | |||||
# all_equal = True | |||||
# else: | |||||
# new_median = {} | |||||
# delta = 0.0 | |||||
# for key, val in numerator.items(): | |||||
# this_median = val / denominator | |||||
# new_median[key] = this_median | |||||
# delta += np.abs(median[key] - this_median) | |||||
# median = new_median | |||||
# | |||||
# num_itrs += 1 | |||||
# | |||||
# # Transform the solution to ged::GXLLabel and return it. | |||||
# median_label = {} | |||||
# for key, val in median.items(): | |||||
# median_label[key] = str(val) | |||||
# return median_label |
@@ -9,6 +9,10 @@ Created on Wed Apr 1 15:12:31 2020 | |||||
def constant_node_costs(edit_cost_name): | def constant_node_costs(edit_cost_name): | ||||
if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER': | if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER': | ||||
return False | return False | ||||
elif edit_cost_name == 'CONSTANT': | |||||
return True | |||||
else: | |||||
raise Exception('Can not recognize the given edit cost. Possible edit costs include: "NON_SYMBOLIC", "LETTER", "LETTER2", "CONSTANT".') | |||||
# elif edit_cost_name != '': | # elif edit_cost_name != '': | ||||
# # throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>"); | # # throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>"); | ||||
# return False | # return False | ||||
@@ -57,7 +57,10 @@ def compute_geds(graphs, options={}, parallel=False): | |||||
ged_env.set_method(options['method'], ged_options_to_string(options)) | ged_env.set_method(options['method'], ged_options_to_string(options)) | ||||
ged_env.init_method() | ged_env.init_method() | ||||
# compute ged. | |||||
# compute ged. | |||||
neo_options = {'edit_cost': options['edit_cost'], | |||||
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], | |||||
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} | |||||
ged_mat = np.zeros((len(graphs), len(graphs))) | ged_mat = np.zeros((len(graphs), len(graphs))) | ||||
if parallel: | if parallel: | ||||
len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | ||||
@@ -74,7 +77,7 @@ def compute_geds(graphs, options={}, parallel=False): | |||||
G_graphs = graphs_toshare | G_graphs = graphs_toshare | ||||
G_ged_env = ged_env_toshare | G_ged_env = ged_env_toshare | ||||
G_listID = listID_toshare | G_listID = listID_toshare | ||||
do_partial = partial(_wrapper_compute_ged_parallel, options) | |||||
do_partial = partial(_wrapper_compute_ged_parallel, neo_options) | |||||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) | pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) | ||||
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | ||||
desc='computing GEDs', file=sys.stdout) | desc='computing GEDs', file=sys.stdout) | ||||
@@ -100,7 +103,7 @@ def compute_geds(graphs, options={}, parallel=False): | |||||
ged_vec.append(dis) | ged_vec.append(dis) | ||||
ged_mat[i][j] = dis | ged_mat[i][j] = dis | ||||
ged_mat[j][i] = dis | ged_mat[j][i] = dis | ||||
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, edit_cost=options['edit_cost']) | |||||
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) | |||||
n_edit_operations.append(n_eo_tmp) | n_edit_operations.append(n_eo_tmp) | ||||
return ged_vec, ged_mat, n_edit_operations | return ged_vec, ged_mat, n_edit_operations | ||||
@@ -115,7 +118,7 @@ def _wrapper_compute_ged_parallel(options, itr): | |||||
def _compute_ged_parallel(env, gid1, gid2, g1, g2, options): | def _compute_ged_parallel(env, gid1, gid2, g1, g2, options): | ||||
dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2) | dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2) | ||||
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, edit_cost=options['edit_cost']) # [0,0,0,0,0,0] | |||||
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0] | |||||
return dis, n_eo_tmp | return dis, n_eo_tmp | ||||
@@ -137,17 +140,26 @@ def _compute_ged(env, gid1, gid2, g1, g2): | |||||
return dis, pi_forward, pi_backward | return dis, pi_forward, pi_backward | ||||
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None): | |||||
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs): | |||||
if edit_cost == 'LETTER' or edit_cost == 'LETTER2': | if edit_cost == 'LETTER' or edit_cost == 'LETTER2': | ||||
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map) | return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map) | ||||
elif edit_cost == 'NON_SYMBOLIC': | elif edit_cost == 'NON_SYMBOLIC': | ||||
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map) | |||||
node_attrs = kwargs.get('node_attrs', []) | |||||
edge_attrs = kwargs.get('edge_attrs', []) | |||||
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, | |||||
node_attrs=node_attrs, edge_attrs=edge_attrs) | |||||
elif edit_cost == 'CONSTANT': | |||||
node_labels = kwargs.get('node_labels', []) | |||||
edge_labels = kwargs.get('edge_labels', []) | |||||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, | |||||
node_labels=node_labels, edge_labels=edge_labels) | |||||
else: | else: | ||||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map) | return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map) | ||||
def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map): | |||||
"""Compute the number of each edit operations. | |||||
def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, | |||||
node_labels=[], edge_labels=[]): | |||||
"""Compute the number of each edit operations for symbolic-labeled graphs. | |||||
""" | """ | ||||
n_vi = 0 | n_vi = 0 | ||||
n_vr = 0 | n_vr = 0 | ||||
@@ -160,8 +172,13 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map): | |||||
for i, map_i in enumerate(forward_map): | for i, map_i in enumerate(forward_map): | ||||
if map_i == np.inf: | if map_i == np.inf: | ||||
n_vr += 1 | n_vr += 1 | ||||
elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']: | |||||
n_vs += 1 | |||||
else: | |||||
for nl in node_labels: | |||||
label1 = g1.nodes[nodes1[i]][nl] | |||||
label2 = g2.nodes[map_i][nl] | |||||
if label1 != label2: | |||||
n_vs += 1 | |||||
break | |||||
for map_i in backward_map: | for map_i in backward_map: | ||||
if map_i == np.inf: | if map_i == np.inf: | ||||
n_vi += 1 | n_vi += 1 | ||||
@@ -180,15 +197,21 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map): | |||||
elif (forward_map[idx1], forward_map[idx2]) in g2.edges(): | elif (forward_map[idx1], forward_map[idx2]) in g2.edges(): | ||||
nb_edges2_cnted += 1 | nb_edges2_cnted += 1 | ||||
# edge labels are different. | # edge labels are different. | ||||
if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \ | |||||
!= g1.edges[(n1, n2)]['bond_type']: | |||||
for el in edge_labels: | |||||
label1 = g2.edges[((forward_map[idx1], forward_map[idx2]))][el] | |||||
label2 = g1.edges[(n1, n2)][el] | |||||
if label1 != label2: | |||||
n_es += 1 | n_es += 1 | ||||
break | |||||
elif (forward_map[idx2], forward_map[idx1]) in g2.edges(): | elif (forward_map[idx2], forward_map[idx1]) in g2.edges(): | ||||
nb_edges2_cnted += 1 | nb_edges2_cnted += 1 | ||||
# edge labels are different. | # edge labels are different. | ||||
if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \ | |||||
!= g1.edges[(n1, n2)]['bond_type']: | |||||
n_es += 1 | |||||
for el in edge_labels: | |||||
label1 = g2.edges[((forward_map[idx2], forward_map[idx1]))][el] | |||||
label2 = g1.edges[(n1, n2)][el] | |||||
if label1 != label2: | |||||
n_es += 1 | |||||
break | |||||
# corresponding nodes are in g2, however the edge is removed. | # corresponding nodes are in g2, however the edge is removed. | ||||
else: | else: | ||||
n_er += 1 | n_er += 1 | ||||
@@ -242,7 +265,8 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map): | |||||
return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er | return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er | ||||
def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map): | |||||
def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, | |||||
node_attrs=[], edge_attrs=[]): | |||||
"""Compute the number of each edit operations. | """Compute the number of each edit operations. | ||||
""" | """ | ||||
n_vi = 0 | n_vi = 0 | ||||
@@ -261,7 +285,7 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map): | |||||
else: | else: | ||||
n_vs += 1 | n_vs += 1 | ||||
sum_squares = 0 | sum_squares = 0 | ||||
for a_name in g1.graph['node_attrs']: | |||||
for a_name in node_attrs: | |||||
diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name]) | diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name]) | ||||
sum_squares += np.square(diff) | sum_squares += np.square(diff) | ||||
sod_vs += np.sqrt(sum_squares) | sod_vs += np.sqrt(sum_squares) | ||||
@@ -284,15 +308,15 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map): | |||||
elif (n1_g2, n2_g2) in g2.edges(): | elif (n1_g2, n2_g2) in g2.edges(): | ||||
n_es += 1 | n_es += 1 | ||||
sum_squares = 0 | sum_squares = 0 | ||||
for a_name in g1.graph['edge_attrs']: | |||||
diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name]) | |||||
for a_name in edge_attrs: | |||||
diff = float(g1.edges[n1, n2][a_name]) - float(g2.edges[n1_g2, n2_g2][a_name]) | |||||
sum_squares += np.square(diff) | sum_squares += np.square(diff) | ||||
sod_es += np.sqrt(sum_squares) | sod_es += np.sqrt(sum_squares) | ||||
elif (n2_g2, n1_g2) in g2.edges(): | elif (n2_g2, n1_g2) in g2.edges(): | ||||
n_es += 1 | n_es += 1 | ||||
sum_squares = 0 | sum_squares = 0 | ||||
for a_name in g1.graph['edge_attrs']: | |||||
diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name]) | |||||
for a_name in edge_attrs: | |||||
diff = float(g1.edges[n2, n1][a_name]) - float(g2.edges[n2_g2, n1_g2][a_name]) | |||||
sum_squares += np.square(diff) | sum_squares += np.square(diff) | ||||
sod_es += np.sqrt(sum_squares) | sod_es += np.sqrt(sum_squares) | ||||
# corresponding nodes are in g2, however the edge is removed. | # corresponding nodes are in g2, however the edge is removed. | ||||
@@ -96,7 +96,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
if self.__runtime_precompute_gm is None: | if self.__runtime_precompute_gm is None: | ||||
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | ||||
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | ||||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||||
if self._kernel_options['normalize']: | |||||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||||
else: | |||||
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) | |||||
end_precompute_gm = time.time() | end_precompute_gm = time.time() | ||||
start -= self.__runtime_precompute_gm | start -= self.__runtime_precompute_gm | ||||
@@ -259,6 +262,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
self.__edit_cost_constants = self.__init_ecc | self.__edit_cost_constants = self.__init_ecc | ||||
options = self.__ged_options.copy() | options = self.__ged_options.copy() | ||||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo | options['edit_cost_constants'] = self.__edit_cost_constants # @todo | ||||
options['node_labels'] = self._dataset.node_labels | |||||
options['edge_labels'] = self._dataset.edge_labels | |||||
options['node_attrs'] = self._dataset.node_attrs | |||||
options['edge_attrs'] = self._dataset.edge_attrs | |||||
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | ||||
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | ||||
time_list = [time.time() - time0] | time_list = [time.time() - time0] | ||||
@@ -297,6 +304,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
# compute new GEDs and numbers of edit operations. | # compute new GEDs and numbers of edit operations. | ||||
options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) | options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) | ||||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo | options['edit_cost_constants'] = self.__edit_cost_constants # @todo | ||||
options['node_labels'] = self._dataset.node_labels | |||||
options['edge_labels'] = self._dataset.edge_labels | |||||
options['node_attrs'] = self._dataset.node_attrs | |||||
options['edge_attrs'] = self._dataset.edge_attrs | |||||
ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | ||||
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) | residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) | ||||
time_list.append(time.time() - time0) | time_list.append(time.time() - time0) | ||||
@@ -444,34 +455,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | ||||
constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
try: | |||||
prob.solve(verbose=True) | |||||
except MemoryError as error0: | |||||
if self._verbose >= 2: | |||||
print('\nUsing solver "OSQP" caused a memory error.') | |||||
print('the original error message is\n', error0) | |||||
print('solver status: ', prob.status) | |||||
print('trying solver "CVXOPT" instead...\n') | |||||
try: | |||||
prob.solve(solver=cp.CVXOPT, verbose=True) | |||||
except Exception as error1: | |||||
if self._verbose >= 2: | |||||
print('\nAn error occured when using solver "CVXOPT".') | |||||
print('the original error message is\n', error1) | |||||
print('solver status: ', prob.status) | |||||
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') | |||||
prob.solve(solver=cp.MOSEK, verbose=True) | |||||
else: | |||||
if self._verbose >= 2: | |||||
print('solver status: ', prob.status) | |||||
else: | |||||
if self._verbose >= 2: | |||||
print('solver status: ', prob.status) | |||||
if self._verbose >= 2: | |||||
print() | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif rw_constraints == '2constraints': | elif rw_constraints == '2constraints': | ||||
@@ -541,19 +528,17 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | ||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
prob.solve() | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif is_n_attr and not is_e_attr: | elif is_n_attr and not is_e_attr: | ||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | ||||
constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
prob.solve() | |||||
if self._verbose >= 2: | |||||
print(x.value) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif not is_n_attr and is_e_attr: | elif not is_n_attr and is_e_attr: | ||||
@@ -563,7 +548,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | ||||
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
prob.solve() | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
else: | else: | ||||
@@ -572,10 +557,20 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | ||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
prob.solve() | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | ||||
x.value[2:], np.array([0.0]))) | x.value[2:], np.array([0.0]))) | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. | |||||
x = cp.Variable(nb_cost_mat.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
else: | else: | ||||
# # method 1: simple least square method. | # # method 1: simple least square method. | ||||
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | ||||
@@ -607,7 +602,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | ||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
prob.solve() | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
@@ -616,6 +611,34 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
return edit_costs_new, residual | return edit_costs_new, residual | ||||
def __execute_cvx(self, prob): | |||||
try: | |||||
prob.solve(verbose=(self._verbose>=2)) | |||||
except MemoryError as error0: | |||||
if self._verbose >= 2: | |||||
print('\nUsing solver "OSQP" caused a memory error.') | |||||
print('the original error message is\n', error0) | |||||
print('solver status: ', prob.status) | |||||
print('trying solver "CVXOPT" instead...\n') | |||||
try: | |||||
prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2)) | |||||
except Exception as error1: | |||||
if self._verbose >= 2: | |||||
print('\nAn error occured when using solver "CVXOPT".') | |||||
print('the original error message is\n', error1) | |||||
print('solver status: ', prob.status) | |||||
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') | |||||
prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2)) | |||||
else: | |||||
if self._verbose >= 2: | |||||
print('solver status: ', prob.status) | |||||
else: | |||||
if self._verbose >= 2: | |||||
print('solver status: ', prob.status) | |||||
if self._verbose >= 2: | |||||
print() | |||||
def __generate_preimage_iam(self): | def __generate_preimage_iam(self): | ||||
# Set up the ged environment. | # Set up the ged environment. | ||||
ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. | ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. | ||||
@@ -638,6 +661,10 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
# Select the GED algorithm. | # Select the GED algorithm. | ||||
mge.set_options(mge_options_to_string(options)) | mge.set_options(mge_options_to_string(options)) | ||||
mge.set_label_names(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | |||||
node_attrs=self._dataset.node_attrs, | |||||
edge_attrs=self._dataset.edge_attrs) | |||||
mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | ||||
mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | ||||
@@ -37,7 +37,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
dataset_all.trim_dataset(edge_required=edge_required) | dataset_all.trim_dataset(edge_required=edge_required) | ||||
if irrelevant_labels is not None: | if irrelevant_labels is not None: | ||||
dataset_all.remove_labels(**irrelevant_labels) | dataset_all.remove_labels(**irrelevant_labels) | ||||
# dataset_all.cut_graphs(range(0, 100)) | |||||
# dataset_all.cut_graphs(range(0, 10)) | |||||
datasets = split_dataset_by_target(dataset_all) | datasets = split_dataset_by_target(dataset_all) | ||||
if save_results: | if save_results: | ||||
@@ -67,8 +67,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | ||||
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | ||||
if gmfile_exist: | if gmfile_exist: | ||||
gmfile = np.load(gm_fname) | |||||
gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list'] | |||||
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] | |||||
time_precompute_gm_list = gmfile['run_time_list'].tolist() | time_precompute_gm_list = gmfile['run_time_list'].tolist() | ||||
else: | else: | ||||
gram_matrix_unnorm_list = [] | gram_matrix_unnorm_list = [] | ||||
@@ -87,6 +87,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
print('start generating preimage for each class of target...') | print('start generating preimage for each class of target...') | ||||
idx_offset = 0 | |||||
for idx, dataset in enumerate(datasets): | for idx, dataset in enumerate(datasets): | ||||
target = dataset.targets[0] | target = dataset.targets[0] | ||||
print('\ntarget =', target, '\n') | print('\ntarget =', target, '\n') | ||||
@@ -96,14 +97,15 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
num_graphs = len(dataset.graphs) | num_graphs = len(dataset.graphs) | ||||
if num_graphs < 2: | if num_graphs < 2: | ||||
print('\nnumber of graphs = ', num_graphs, ', skip.\n') | print('\nnumber of graphs = ', num_graphs, ', skip.\n') | ||||
idx_offset += 1 | |||||
continue | continue | ||||
# 2. set parameters. | # 2. set parameters. | ||||
print('2. initializing mpg and setting parameters...') | print('2. initializing mpg and setting parameters...') | ||||
if load_gm: | if load_gm: | ||||
if gmfile_exist: | if gmfile_exist: | ||||
mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx] | |||||
mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx] | |||||
mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset] | |||||
mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset] | |||||
mpg = MedianPreimageGenerator() | mpg = MedianPreimageGenerator() | ||||
mpg.dataset = dataset | mpg.dataset = dataset | ||||
mpg.set_options(**mpg_options.copy()) | mpg.set_options(**mpg_options.copy()) | ||||
@@ -67,18 +67,35 @@ class Dataset(object): | |||||
def load_predefined_dataset(self, ds_name): | def load_predefined_dataset(self, ds_name): | ||||
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | ||||
if ds_name == 'Letter-high': # node non-symb | |||||
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' | |||||
if ds_name == 'acyclic': | |||||
pass | |||||
elif ds_name == 'COIL-DEL': | |||||
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'Letter-med': # node non-symb | |||||
ds_file = current_path + '../../datasets/Letter-high/Letter-med_A.txt' | |||||
elif ds_name == 'COIL-RAG': | |||||
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'Letter-low': # node non-symb | |||||
ds_file = current_path + '../../datasets/Letter-high/Letter-low_A.txt' | |||||
elif ds_name == 'COLORS-3': | |||||
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'Fingerprint': | elif ds_name == 'Fingerprint': | ||||
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'FRANKENSTEIN': | |||||
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Letter-high': # node non-symb | |||||
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Letter-low': # node non-symb | |||||
ds_file = current_path + '../../datasets/Letter-high/Letter-low_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Letter-med': # node non-symb | |||||
ds_file = current_path + '../../datasets/Letter-high/Letter-med_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'MUTAG': | |||||
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'SYNTHETIC': | elif ds_name == 'SYNTHETIC': | ||||
pass | pass | ||||
elif ds_name == 'SYNTHETICnew': | elif ds_name == 'SYNTHETICnew': | ||||
@@ -86,15 +103,6 @@ class Dataset(object): | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | self.__graphs, self.__targets, label_names = load_dataset(ds_file) | ||||
elif ds_name == 'Synthie': | elif ds_name == 'Synthie': | ||||
pass | pass | ||||
elif ds_name == 'COIL-DEL': | |||||
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'COIL-RAG': | |||||
pass | |||||
elif ds_name == 'COLORS-3': | |||||
pass | |||||
elif ds_name == 'FRANKENSTEIN': | |||||
pass | |||||
self.__node_labels = label_names['node_labels'] | self.__node_labels = label_names['node_labels'] | ||||
self.__node_attrs = label_names['node_attrs'] | self.__node_attrs = label_names['node_attrs'] | ||||
@@ -474,6 +474,7 @@ def load_tud(filename): | |||||
label_names = {'node_labels': [], 'node_attrs': [], | label_names = {'node_labels': [], 'node_attrs': [], | ||||
'edge_labels': [], 'edge_attrs': []} | 'edge_labels': [], 'edge_attrs': []} | ||||
class_label_map = None | |||||
class_label_map_strings = [] | class_label_map_strings = [] | ||||
content_rm = open(frm).read().splitlines() | content_rm = open(frm).read().splitlines() | ||||
i = 0 | i = 0 | ||||
@@ -538,20 +539,32 @@ def load_tud(filename): | |||||
else: | else: | ||||
label_names = {'node_labels': [], 'node_attrs': [], | label_names = {'node_labels': [], 'node_attrs': [], | ||||
'edge_labels': [], 'edge_attrs': []} | 'edge_labels': [], 'edge_attrs': []} | ||||
class_label_map = None | |||||
content_gi = open(fgi).read().splitlines() # graph indicator | content_gi = open(fgi).read().splitlines() # graph indicator | ||||
content_am = open(fam).read().splitlines() # adjacency matrix | content_am = open(fam).read().splitlines() # adjacency matrix | ||||
content_gl = open(fgl).read().splitlines() # graph labels | |||||
# load targets. | |||||
if 'fgl' in locals(): | |||||
content_targets = open(fgl).read().splitlines() # targets (classification) | |||||
targets = [float(i) for i in content_targets] | |||||
elif 'fga' in locals(): | |||||
content_targets = open(fga).read().splitlines() # targets (regression) | |||||
targets = [int(i) for i in content_targets] | |||||
if class_label_map is not None: | |||||
targets = [class_label_map[t] for t in targets] | |||||
else: | |||||
raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.') | |||||
# create graphs and add nodes | # create graphs and add nodes | ||||
data = [nx.Graph(name=str(i)) for i in range(0, len(content_gl))] | |||||
data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))] | |||||
if 'fnl' in locals(): | if 'fnl' in locals(): | ||||
content_nl = open(fnl).read().splitlines() # node labels | content_nl = open(fnl).read().splitlines() # node labels | ||||
for idx, line in enumerate(content_gi): | for idx, line in enumerate(content_gi): | ||||
# transfer to int first in case of unexpected blanks | # transfer to int first in case of unexpected blanks | ||||
data[int(line) - 1].add_node(idx) | data[int(line) - 1].add_node(idx) | ||||
labels = [l.strip() for l in content_nl[idx].split(',')] | labels = [l.strip() for l in content_nl[idx].split(',')] | ||||
if label_names['node_labels'] == []: | |||||
if label_names['node_labels'] == []: # @todo: need fix bug. | |||||
for i, label in enumerate(labels): | for i, label in enumerate(labels): | ||||
l_name = 'label_' + str(i) | l_name = 'label_' + str(i) | ||||
data[int(line) - 1].nodes[idx][l_name] = label | data[int(line) - 1].nodes[idx][l_name] = label | ||||
@@ -619,11 +632,6 @@ def load_tud(filename): | |||||
for i, a_name in enumerate(label_names['edge_attrs']): | for i, a_name in enumerate(label_names['edge_attrs']): | ||||
data[g].edges[n[0], n[1]][a_name] = attrs[i] | data[g].edges[n[0], n[1]][a_name] = attrs[i] | ||||
# load targets. | |||||
targets = [int(i) for i in content_gl] | |||||
if 'class_label_map' in locals(): | |||||
targets = [class_label_map[t] for t in targets] | |||||
return data, targets, label_names | return data, targets, label_names | ||||