Browse Source

Fix bugs in some graph kernels. 2. Update MedianGraphEstimator.

v0.2.x
jajupmochi 5 years ago
parent
commit
f8fc1d9336
11 changed files with 524 additions and 76 deletions
  1. +12
    -0
      gklearn/ged/env/node_map.py
  2. +330
    -2
      gklearn/ged/median/median_graph_estimator.py
  3. +3
    -3
      gklearn/ged/median/test_median_graph_estimator.py
  4. +9
    -8
      gklearn/kernels/path_up_to_h.py
  5. +9
    -8
      gklearn/kernels/treelet.py
  6. +5
    -4
      gklearn/kernels/weisfeiler_lehman.py
  7. +135
    -49
      gklearn/preimage/experiments/xp_median_preimage.py
  8. +2
    -0
      gklearn/preimage/utils.py
  9. +1
    -0
      gklearn/utils/__init__.py
  10. +5
    -0
      gklearn/utils/dataset.py
  11. +13
    -2
      gklearn/utils/utils.py

+ 12
- 0
gklearn/ged/env/node_map.py View File

@@ -47,6 +47,18 @@ class NodeMap(object):
return self.__backward_map return self.__backward_map
def as_relation(self, relation):
relation.clear()
for i in range(0, len(self.__forward_map)):
k = self.__forward_map[i]
if k != np.inf:
relation.append(tuple((i, k)))
for k in range(0, len(self.__backward_map)):
i = self.__backward_map[k]
if i == np.inf:
relation.append(tuple((i, k)))
def add_assignment(self, i, k): def add_assignment(self, i, k):
if i != np.inf: if i != np.inf:
if i < len(self.__forward_map): if i < len(self.__forward_map):


+ 330
- 2
gklearn/ged/median/median_graph_estimator.py View File

@@ -491,7 +491,7 @@ class MedianGraphEstimator(object):
# Refine the sum of distances and the node maps for the converged median. # Refine the sum of distances and the node maps for the converged median.
self.__converged_sum_of_distances = self.__sum_of_distances self.__converged_sum_of_distances = self.__sum_of_distances
if self.__refine: if self.__refine:
self.__improve_sum_of_distances(timer) # @todo
self.__improve_sum_of_distances(timer)
# Record end time, set runtime and reset the number of initial medians. # Record end time, set runtime and reset the number of initial medians.
end = time.time() end = time.time()
@@ -526,8 +526,52 @@ class MedianGraphEstimator(object):
print('Overall number of times the order decreased: ', self.__num_decrease_order) print('Overall number of times the order decreased: ', self.__num_decrease_order)
print('Overall number of times the order increased: ', self.__num_increase_order) print('Overall number of times the order increased: ', self.__num_increase_order)
print('===========================================================\n') print('===========================================================\n')
def __improve_sum_of_distances(self, timer): # @todo: go through and test
# Use method selected for refinement phase.
self.__ged_env.set_method(self.__refine_method, self.__refine_options)
# Print information about current iteration.
if self.__print_to_stdout == 2:
progress = tqdm(desc='Improving node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
print('\n===========================================================')
print('Improving node maps and SOD for converged median.')
print('-----------------------------------------------------------')
progress.update(1)
# Improving the node maps.
for graph_id, node_map in self.__node_maps_from_median.items():
if time.expired():
if self.__state == AlgorithmState.TERMINATED:
self.__state = AlgorithmState.CONVERGED
break
self.__ged_env.run_method(self.__gen_median_id, graph_id)
if self.__ged_env.get_upper_bound(self.__gen_median_id, graph_id) < node_map.induced_cost():
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__gen_median_id, graph_id)
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost()
# Print information.
if self.__print_to_stdout == 2:
progress.update(1)
self.__sum_of_distances = 0.0
for key, val in self.__node_maps_from_median.items():
self.__sum_of_distances += val.induced_cost()
# Print information.
if self.__print_to_stdout == 2:
print('===========================================================\n')
def __median_available(self):
return self.__gen_median_id != np.inf
def get_state(self):
if not self.__median_available():
raise Exception('No median has been computed. Call run() before calling get_state().')
return self.__state
def get_sum_of_distances(self, state=''): def get_sum_of_distances(self, state=''):
"""Returns the sum of distances. """Returns the sum of distances.
@@ -852,7 +896,7 @@ class MedianGraphEstimator(object):
increased_order = False increased_order = False
# Increase the order as long as the best insertion delta is negative. # Increase the order as long as the best insertion delta is negative.
while self.__compute_best_insertion_delta(graphs, best_config, best_label) < - self.__epsilon:
while self.__compute_best_insertion_delta(graphs, best_config, best_label) > - self.__epsilon:
increased_order = True increased_order = True
self.__add_node_to_median(best_config, best_label, median) self.__add_node_to_median(best_config, best_label, median)
@@ -862,7 +906,291 @@ class MedianGraphEstimator(object):
# Return true iff the order was increased. # Return true iff the order was increased.
return increased_order return increased_order
def __compute_best_insertion_delta(self, graphs, best_config, best_label):
# Construct sets of inserted nodes.
no_inserted_node = True
inserted_nodes = {}
for graph_id, graph in graphs.items():
inserted_nodes[graph_id] = []
best_config[graph_id] = np.inf
for k in range(nx.number_of_nodes(graph)):
if self.__node_maps_from_median[graph_id].pre_image(k) == np.inf:
no_inserted_node = False
inserted_nodes[graph_id].append((k, tuple(item for item in graph.nodes[k].items()))) # @todo: can order of label names be garantteed?
# Return 0.0 if no node is inserted in any of the graphs.
if no_inserted_node:
return 0.0
# Compute insertion configuration, label, and delta.
best_delta = 0.0 # @todo
if len(self.__label_names['node_labels']) == 0 and len(self.__label_names['node_attrs']) == 0: # @todo
best_delta = self.__compute_insertion_delta_unlabeled(inserted_nodes, best_config, best_label)
elif self.__constant_node_costs:
best_delta = self.__compute_insertion_delta_constant(inserted_nodes, best_config, best_label)
else:
best_delta = self.__compute_insertion_delta_generic(inserted_nodes, best_config, best_label)
# Return the best delta.
return best_delta
def __compute_insertion_delta_unlabeled(self, inserted_nodes, best_config, best_label):
# Construct the nest configuration and compute its insertion delta.
best_delta = 0.0
best_config.clear()
for graph_id, node_set in inserted_nodes.items():
if len(node_set) == 0:
best_config[graph_id] = np.inf
best_delta += self.__node_del_cost
else:
best_config[graph_id] = node_set[0][0]
best_delta -= self.__node_ins_cost
# Return the best insertion delta.
return best_delta
def __compute_insertion_delta_constant(self, inserted_nodes, best_config, best_label):
# Construct histogram and inverse label maps.
hist = {}
inverse_label_maps = {}
for graph_id, node_set in inserted_nodes.items():
inverse_label_maps[graph_id] = {}
for node in node_set:
k = node[0]
label = node[1]
if label not in inverse_label_maps[graph_id]:
inverse_label_maps[graph_id][label] = k
if label not in hist:
hist[label] = 1
else:
hist[label] += 1
# Determine the best label.
best_count = 0
for key, val in hist.items():
if val > best_count:
best_count = val
best_label_tuple = key
# get best label.
best_label.clear()
for key, val in best_label_tuple:
best_label[key] = val
# Construct the best configuration and compute its insertion delta.
best_config.clear()
best_delta = 0.0
node_rel_cost = self.__ged_env.get_node_rel_cost(self.__ged_env.get_node_label(1), self.__ged_env.get_node_label(2))
triangle_ineq_holds = (node_rel_cost <= self.__node_del_cost + self.__node_ins_cost)
for graph_id, _ in inserted_nodes.items():
if best_label_tuple in inverse_label_maps[graph_id]:
best_config[graph_id] = inverse_label_maps[graph_id][best_label_tuple]
best_delta -= self.__node_ins_cost
elif triangle_ineq_holds and not len(inserted_nodes[graph_id]) == 0:
best_config[graph_id] = inserted_nodes[graph_id][0][0]
best_delta += node_rel_cost - self.__node_ins_cost
else:
best_config[graph_id] = np.inf
best_delta += self.__node_del_cost
# Return the best insertion delta.
return best_delta
def __compute_insertion_delta_generic(self, inserted_nodes, best_config, best_label):
# Collect all node labels of inserted nodes.
node_labels = []
for _, node_set in inserted_nodes.items():
for node in node_set:
node_labels.append(node[1])
# Compute node label medians that serve as initial solutions for block gradient descent.
initial_node_labels = []
self.__compute_initial_node_labels(node_labels, initial_node_labels)
# Determine best insertion configuration, label, and delta via parallel block gradient descent from all initial node labels.
best_delta = 0.0
for node_label in initial_node_labels:
# Construct local configuration.
config = {}
for graph_id, _ in inserted_nodes.items():
config[graph_id] = tuple((np.inf, self.__ged_env.get_node_label(1)))
# Run block gradient descent.
converged = False
itr = 0
while not self.__insertion_termination_criterion_met(converged, itr):
converged = not self.__update_config_(node_label, inserted_nodes, config, node_labels)
converged = converged and (not self.__update_node_label(node_labels, node_label))
itr += 1
# Compute insertion delta of converged solution.
delta = 0.0
for _, node in config.items():
if node[0] == np.inf:
delta += self.__node_del_cost
else:
delta += self.__ged_env.node_rel_cost(node_label, node[1]) - self.__node_ins_cost
# Update best delta and global configuration if improvement has been found.
if delta < best_delta - self.__epsilon:
best_delta = delta
best_label = node_label # @todo: may be wrong.
best_config.clear()
for graph_id, k in config.items():
best_config[graph_id] = k
# Return the best delta.
return best_delta


def __compute_initial_node_labels(self, node_labels, median_labels):
median_labels.clear()
if self.__use_real_randomness: # @todo: may not work if parallelized.
rng = np.random.randint(size=1)
urng = np.random.RandomState(seed=rng[0])
else:
urng = np.random.RandomState(seed=self.__seed)
# Generate the initial node label medians.
if self.__init_type_increase_order == 'K-MEANS++':
# Use k-means++ heuristic to generate the initial node label medians.
already_selected = [False] * len(node_labels)
selected_label_id = urng.uniform(low=0, high=len(node_labels), size=1)[0]
median_labels.append(node_labels[selected_label_id])
already_selected[selected_label_id] = True
while len(median_labels) > self.__num_inits_increase_order:
weights = [np.inf] * len(node_labels)
for label_id in range(0, len(node_labels)):
if already_selected[label_id]:
weights[label_id] = 0
continue
for label in median_labels:
weights[label_id] = min(weights[label_id], self.__ged_env.node_rel_cost(label, node_labels[label_id]))
selected_label_id = urng.choice(range(0, len(weights)), size=1, p=weights)
median_labels.append(node_labels[selected_label_id])
already_selected[selected_label_id] = True
else:
# Compute the initial node medians as the medians of randomly generated clusters of (roughly) equal size.
# @todo: go through and test.
shuffled_node_labels = [np.inf] * len(node_labels) #@todo: random?
# @todo: std::shuffle(shuffled_node_labels.begin(), shuffled_node_labels.end(), urng);?
cluster_size = len(node_labels) / self.__num_inits_increase_order
pos = 0.0
cluster = []
while len(median_labels) < self.__num_inits_increase_order - 1:
while pos < (len(median_labels) + 1) * cluster_size:
cluster.append(shuffled_node_labels[pos])
pos += 1
median_labels.append(self.__get_median_node_label(cluster))
cluster.clear()
while pos < len(shuffled_node_labels):
pos += 1
cluster.append(shuffled_node_labels[pos])
median_labels.append(self.__get_median_node_label(cluster))
cluster.clear()
# Run Lloyd's Algorithm.
converged = False
closest_median_ids = [np.inf] * len(node_labels)
clusters = [[] for _ in len(median_labels)]
itr = 1
while not self.__insertion_termination_criterion_met(converged, itr):
converged = not self.__update_clusters(node_labels, median_labels, closest_median_ids)
if not converged:
for cluster in clusters:
cluster.clear()
for label_id in range(0, len(node_labels)):
cluster[closest_median_ids[label_id]].append(node_labels[label_id])
for cluster_id in range(0, len(clusters)):
self.__update_node_label(cluster[cluster_id], median_labels[cluster_id])
itr += 1
def __insertion_termination_criterion_met(self, converged, itr):
return converged or (itr >= self.__max_itrs_increase_order if self.__max_itrs_increase_order > 0 else False)
def __update_config_(self, node_label, inserted_nodes, config, node_labels):
# Determine the best configuration.
config_modified = False
for graph_id, node_set in inserted_nodes.items():
best_assignment = config[graph_id]
best_cost = 0.0
if best_assignment[0] == np.inf:
best_cost = self.__node_del_cost
else:
bets_cost = self.__ged_env.node_rel_cost(node_label, best_assignment[1]) - self.__node_ins_cost
for node in node_set:
cost = self.__ged_env.node_rel_cost(node_label, node[1]) - self.__node_ins_cost
if cost < best_cost - self.__epsilon:
best_cost = cost
best_assignment = node
config_modified = True
if self.__node_del_cost < best_cost - self.__epsilon:
best_cost = self.__node_del_cost
best_assignment[0] = np.inf # @todo: work?
config_modified = True
config[graph_id] = best_assignment
# Collect the node labels contained in the best configuration.
node_labels.clear()
for key, val in config.items():
if val[0] != np.inf:
node_labels.append(val[1])
# Return true if the configuration was modified.
return config_modified
def __update_node_label(self, node_labels, node_label):
new_node_label = self.__get_median_node_label(node_labels)
if self.__ged_env.node_rel_cost(new_node_label, node_label) > self.__epsilon:
node_label = new_node_label # @todo: may be wrong
return True
return False
def __update_clusters(self, node_labels, median_labels, closest_median_ids):
# Determine the closest median for each node label.
clusters_modified = False
for label_id in range(0, len(node_labels)):
closest_median_id = np.inf
dist_to_closest_median = np.inf
for median_id in range(0, len(median_labels)):
dist_to_median = self.__ged_env.node_rel_cost(median_labels[median_id], node_labels[label_id])
if dist_to_median < dist_to_closest_median - self.__epsilon:
dist_to_closest_median = dist_to_median
closest_median_id = median_id
if closest_median_id != closest_median_ids[label_id]:
closest_median_ids[label_id] = closest_median_id
clusters_modified = True
# Return true if the clusters were modified.
return clusters_modified
def __add_node_to_median(self, best_config, best_label, median):
# Update the median.
median.add_node(nx.number_of_nodes(median), **best_label)
# Update the node maps.
for graph_id, node_map in self.__node_maps_from_median.items():
node_map_as_rel = []
node_map.as_relation(node_map_as_rel)
new_node_map = NodeMap(nx.number_of_nodes(median), node_map.num_target_nodes())
for assignment in node_map_as_rel:
new_node_map.add_assignment(assignment[0], assignment[1])
new_node_map.add_assignment(nx.number_of_nodes(median) - 1, best_config[graph_id])
self.__node_maps_from_median[graph_id] = new_node_map
# Increase overall number of increases.
self.__num_increase_order += 1
def __improve_sum_of_distances(self, timer): def __improve_sum_of_distances(self, timer):
pass pass


+ 3
- 3
gklearn/ged/median/test_median_graph_estimator.py View File

@@ -53,7 +53,7 @@ def test_median_graph_estimator():
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng())
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng())
# Select the GED algorithm. # Select the GED algorithm.
algo_options = '--threads ' + str(threads) + algo_options_suffix algo_options = '--threads ' + str(threads) + algo_options_suffix
@@ -155,5 +155,5 @@ def test_median_graph_estimator_symb():




if __name__ == '__main__': if __name__ == '__main__':
# set_median, gen_median = test_median_graph_estimator()
set_median, gen_median = test_median_graph_estimator_symb()
set_median, gen_median = test_median_graph_estimator()
# set_median, gen_median = test_median_graph_estimator_symb()

+ 9
- 8
gklearn/kernels/path_up_to_h.py View File

@@ -18,6 +18,7 @@ import numpy as np
import networkx as nx import networkx as nx
from collections import Counter from collections import Counter
from functools import partial from functools import partial
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel
from gklearn.utils import Trie from gklearn.utils import Trie
@@ -582,11 +583,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def __add_dummy_labels(self, Gn): def __add_dummy_labels(self, Gn):
if self.__k_func is not None: if self.__k_func is not None:
if len(self.__node_labels) == 0:
for G in Gn:
nx.set_node_attributes(G, '0', 'dummy')
self.__node_labels.append('dummy')
if len(self.__edge_labels) == 0:
for G in Gn:
nx.set_edge_attributes(G, '0', 'dummy')
self.__edge_labels.append('dummy')
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 9
- 8
gklearn/kernels/treelet.py View File

@@ -18,6 +18,7 @@ import numpy as np
import networkx as nx import networkx as nx
from collections import Counter from collections import Counter
from itertools import chain from itertools import chain
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel
@@ -495,11 +496,11 @@ class Treelet(GraphKernel):
def __add_dummy_labels(self, Gn): def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0:
for G in Gn:
nx.set_node_attributes(G, '0', 'dummy')
self.__node_labels.append('dummy')
if len(self.__edge_labels) == 0:
for G in Gn:
nx.set_edge_attributes(G, '0', 'dummy')
self.__edge_labels.append('dummy')
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 5
- 4
gklearn/kernels/weisfeiler_lehman.py View File

@@ -16,6 +16,7 @@ import numpy as np
import networkx as nx import networkx as nx
from collections import Counter from collections import Counter
from functools import partial from functools import partial
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm from gklearn.utils.parallel import parallel_gm
from gklearn.kernels import GraphKernel from gklearn.kernels import GraphKernel


@@ -469,10 +470,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def __add_dummy_node_labels(self, Gn): def __add_dummy_node_labels(self, Gn):
if len(self.__node_labels) == 0:
for G in Gn:
nx.set_node_attributes(G, '0', 'dummy')
self.__node_labels.append('dummy')
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
class WLSubtree(WeisfeilerLehman): class WLSubtree(WeisfeilerLehman):


+ 135
- 49
gklearn/preimage/experiments/xp_median_preimage.py View File

@@ -8,11 +8,83 @@ Created on Tue Jan 14 15:39:29 2020
import multiprocessing import multiprocessing
import functools import functools
import sys import sys
import os
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from gklearn.preimage.utils import generate_median_preimages_by_class from gklearn.preimage.utils import generate_median_preimages_by_class
from gklearn.utils import compute_gram_matrices_by_class from gklearn.utils import compute_gram_matrices_by_class




def xp_median_preimage_14_1():
"""xp 14_1: DD, PathUpToH, using CONSTANT.
"""
# set parameters.
ds_name = 'DD' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [4, 4, 2, 1, 1, 1], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100, #
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
kernel_options = {'name': 'PathUpToH',
'depth': 2, #
'k_func': 'MinMax', #
'compute_method': 'trie',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'CONSTANT', #
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 0,
'verbose': 2,
'update_order': False,
'refine': False}
save_results = True
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/'
irrelevant_labels = None #
edge_required = False #

if not os.path.exists(dir_save):
os.makedirs(dir_save)
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output
# # compute gram matrices for each class a priori.
# print('Compute gram matrices for each class a priori.')
# compute_gram_matrices_by_class(ds_name, kernel_options, save_results=save_results, dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs'] + ['random'] * 5:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)


def xp_median_preimage_13_1(): def xp_median_preimage_13_1():
"""xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. """xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC.
""" """
@@ -60,7 +132,7 @@ def xp_median_preimage_13_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -125,7 +197,7 @@ def xp_median_preimage_13_2():
edge_required = True # edge_required = True #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -192,7 +264,7 @@ def xp_median_preimage_12_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -256,7 +328,7 @@ def xp_median_preimage_12_2():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -320,7 +392,9 @@ def xp_median_preimage_12_3():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
if not os.path.exists(dir_save):
os.makedirs(dir_save)
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -383,7 +457,7 @@ def xp_median_preimage_12_4():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -452,7 +526,7 @@ def xp_median_preimage_12_5():
edge_required = True # edge_required = True #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -519,7 +593,7 @@ def xp_median_preimage_9_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -583,7 +657,7 @@ def xp_median_preimage_9_2():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -647,7 +721,7 @@ def xp_median_preimage_9_3():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -710,7 +784,7 @@ def xp_median_preimage_9_4():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -781,7 +855,7 @@ def xp_median_preimage_8_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -845,7 +919,7 @@ def xp_median_preimage_8_2():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -909,7 +983,7 @@ def xp_median_preimage_8_3():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -972,7 +1046,7 @@ def xp_median_preimage_8_4():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1039,7 +1113,7 @@ def xp_median_preimage_7_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1103,7 +1177,7 @@ def xp_median_preimage_7_2():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1167,7 +1241,7 @@ def xp_median_preimage_7_3():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1230,7 +1304,7 @@ def xp_median_preimage_7_4():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1297,7 +1371,7 @@ def xp_median_preimage_6_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1362,7 +1436,9 @@ def xp_median_preimage_6_2():
edge_required = True # edge_required = True #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
if not os.path.exists(dir_save):
os.makedirs(dir_save)
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1429,7 +1505,7 @@ def xp_median_preimage_5_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1496,7 +1572,7 @@ def xp_median_preimage_4_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1522,7 +1598,7 @@ def xp_median_preimage_3_2():
# set parameters. # set parameters.
ds_name = 'Fingerprint' # ds_name = 'Fingerprint' #
mpg_options = {'fit_method': 'k-graphs', mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [0.525, 0.525, 0.001, 0.125, 0.125], #
'init_ecc': [0.525, 0.525, 0.01, 0.125, 0.125], #
'ds_name': ds_name, 'ds_name': ds_name,
'parallel': True, # False 'parallel': True, # False
'time_limit_in_sec': 0, 'time_limit_in_sec': 0,
@@ -1561,7 +1637,9 @@ def xp_median_preimage_3_2():
edge_required = True # edge_required = True #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
if not os.path.exists(dir_save):
os.makedirs(dir_save)
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1587,7 +1665,7 @@ def xp_median_preimage_3_1():
# set parameters. # set parameters.
ds_name = 'Fingerprint' # ds_name = 'Fingerprint' #
mpg_options = {'fit_method': 'k-graphs', mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [0.525, 0.525, 0.001, 0.125, 0.125], #
'init_ecc': [0.525, 0.525, 0.01, 0.125, 0.125], #
'ds_name': ds_name, 'ds_name': ds_name,
'parallel': True, # False 'parallel': True, # False
'time_limit_in_sec': 0, 'time_limit_in_sec': 0,
@@ -1628,7 +1706,9 @@ def xp_median_preimage_3_1():
edge_required = False # edge_required = False #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
if not os.path.exists(dir_save):
os.makedirs(dir_save)
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1685,7 +1765,7 @@ def xp_median_preimage_2_1():
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID', mge_options = {'init_type': 'MEDOID',
'random_inits': 10, 'random_inits': 10,
'time_limit': 600,
'time_limit': 0,
'verbose': 2, 'verbose': 2,
'update_order': False, 'update_order': False,
'refine': False} 'refine': False}
@@ -1694,7 +1774,9 @@ def xp_median_preimage_2_1():
irrelevant_labels = {'edge_labels': ['valence']} irrelevant_labels = {'edge_labels': ['valence']}
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
if not os.path.exists(dir_save):
os.makedirs(dir_save)
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1763,7 +1845,7 @@ def xp_median_preimage_1_1():
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/'
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1826,7 +1908,7 @@ def xp_median_preimage_1_2():
edge_required = True # edge_required = True #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1891,7 +1973,7 @@ def xp_median_preimage_10_1():
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/'
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -1954,7 +2036,7 @@ def xp_median_preimage_10_2():
edge_required = True # edge_required = True #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -2019,7 +2101,7 @@ def xp_median_preimage_11_1():
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/'
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -2082,7 +2164,7 @@ def xp_median_preimage_11_2():
edge_required = True # edge_required = True #
# print settings. # print settings.
file_output = open(dir_save + 'output.txt', 'w')
file_output = open(dir_save + 'output.txt', 'a')
sys.stdout = file_output sys.stdout = file_output
print('parameters:') print('parameters:')
print('dataset name:', ds_name) print('dataset name:', ds_name)
@@ -2147,7 +2229,7 @@ if __name__ == "__main__":
# # xp_median_preimage_7_1() # # xp_median_preimage_7_1()


# #### xp 7_2: MUTAG, PathUpToH, using CONSTANT. # #### xp 7_2: MUTAG, PathUpToH, using CONSTANT.
# # xp_median_preimage_7_2()
# xp_median_preimage_7_2()


# #### xp 7_3: MUTAG, Treelet, using CONSTANT. # #### xp 7_3: MUTAG, Treelet, using CONSTANT.
# # xp_median_preimage_7_3() # # xp_median_preimage_7_3()
@@ -2200,6 +2282,10 @@ if __name__ == "__main__":
#### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. #### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC.
# xp_median_preimage_13_2() # xp_median_preimage_13_2()


#### xp 14_1: DD, PathUpToH, using CONSTANT.
xp_median_preimage_14_1()






# #### xp 1_1: Letter-high, StructuralSP. # #### xp 1_1: Letter-high, StructuralSP.
@@ -2221,10 +2307,10 @@ if __name__ == "__main__":
# xp_median_preimage_11_2() # xp_median_preimage_11_2()
# #
# #### xp 2_1: COIL-DEL, StructuralSP, using LETTER2, only node attrs. # #### xp 2_1: COIL-DEL, StructuralSP, using LETTER2, only node attrs.
# # xp_median_preimage_2_1()
# xp_median_preimage_2_1()
# #
# #### xp 3_1: Fingerprint, StructuralSP, using LETTER2, only node attrs. # #### xp 3_1: Fingerprint, StructuralSP, using LETTER2, only node attrs.
# # xp_median_preimage_3_1()
# xp_median_preimage_3_1()


# #### xp 3_2: Fingerprint, ShortestPath, using LETTER2, only node attrs. # #### xp 3_2: Fingerprint, ShortestPath, using LETTER2, only node attrs.
# xp_median_preimage_3_2() # xp_median_preimage_3_2()
@@ -2266,35 +2352,35 @@ if __name__ == "__main__":
# xp_median_preimage_8_4() # xp_median_preimage_8_4()


# #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. # #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only.
xp_median_preimage_9_1()
# xp_median_preimage_9_1()


# #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. # #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only.
xp_median_preimage_9_2()
# xp_median_preimage_9_2()


# #### xp 9_3: MAO, Treelet, using CONSTANT, symbolic only. # #### xp 9_3: MAO, Treelet, using CONSTANT, symbolic only.
xp_median_preimage_9_3()
# xp_median_preimage_9_3()


# #### xp 9_4: MAO, WeisfeilerLehman, using CONSTANT, symbolic only. # #### xp 9_4: MAO, WeisfeilerLehman, using CONSTANT, symbolic only.
xp_median_preimage_9_4()
# xp_median_preimage_9_4()


#### xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled. #### xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled.
xp_median_preimage_12_1()
# xp_median_preimage_12_1()


#### xp 12_2: PAH, PathUpToH, using CONSTANT, unlabeled. #### xp 12_2: PAH, PathUpToH, using CONSTANT, unlabeled.
xp_median_preimage_12_2()
# xp_median_preimage_12_2()


#### xp 12_3: PAH, Treelet, using CONSTANT, unlabeled. #### xp 12_3: PAH, Treelet, using CONSTANT, unlabeled.
xp_median_preimage_12_3()
# xp_median_preimage_12_3()


#### xp 12_4: PAH, WeisfeilerLehman, using CONSTANT, unlabeled. #### xp 12_4: PAH, WeisfeilerLehman, using CONSTANT, unlabeled.
xp_median_preimage_12_4()
# xp_median_preimage_12_4()


#### xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled. #### xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled.
xp_median_preimage_12_5()
# xp_median_preimage_12_5()


#### xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC. #### xp 13_1: PAH, StructuralSP, using NON_SYMBOLIC.
xp_median_preimage_13_1()
# xp_median_preimage_13_1()


#### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC. #### xp 13_2: PAH, ShortestPath, using NON_SYMBOLIC.
xp_median_preimage_13_2()
# xp_median_preimage_13_2()



+ 2
- 0
gklearn/preimage/utils.py View File

@@ -419,6 +419,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='
Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
height=4, base_kernel='subtree', parallel=None, height=4, base_kernel='subtree', parallel=None,
n_jobs=multiprocessing.cpu_count(), verbose=verbose) n_jobs=multiprocessing.cpu_count(), verbose=verbose)
else:
raise Exception('The graph kernel "', graph_kernel, '" is not defined.')
# normalization # normalization
Kmatrix_diag = Kmatrix.diagonal().copy() Kmatrix_diag = Kmatrix.diagonal().copy()


+ 1
- 0
gklearn/utils/__init__.py View File

@@ -20,4 +20,5 @@ from gklearn.utils.graph_files import load_dataset, save_dataset
from gklearn.utils.timer import Timer from gklearn.utils.timer import Timer
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
from gklearn.utils.utils import compute_gram_matrices_by_class from gklearn.utils.utils import compute_gram_matrices_by_class
from gklearn.utils.utils import SpecialLabel
from gklearn.utils.trie import Trie from gklearn.utils.trie import Trie

+ 5
- 0
gklearn/utils/dataset.py View File

@@ -90,6 +90,9 @@ class Dataset(object):
elif ds_name == 'Cuneiform': elif ds_name == 'Cuneiform':
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'DD':
ds_file = current_path + '../../datasets/DD/DD_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'Fingerprint': elif ds_name == 'Fingerprint':
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__graphs, self.__targets, label_names = load_dataset(ds_file)
@@ -124,6 +127,8 @@ class Dataset(object):
self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'Synthie': elif ds_name == 'Synthie':
pass pass
else:
raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
self.__node_labels = label_names['node_labels'] self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs'] self.__node_attrs = label_names['node_attrs']


+ 13
- 2
gklearn/utils/utils.py View File

@@ -1,6 +1,7 @@
import networkx as nx import networkx as nx
import numpy as np import numpy as np
from copy import deepcopy from copy import deepcopy
from enum import Enum, auto
#from itertools import product #from itertools import product


# from tqdm import tqdm # from tqdm import tqdm
@@ -343,13 +344,15 @@ def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attr
return graph_kernel return graph_kernel




def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None):
def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None, edge_required=False):
import os
from gklearn.utils import Dataset, split_dataset_by_target from gklearn.utils import Dataset, split_dataset_by_target
# 1. get dataset. # 1. get dataset.
print('1. getting dataset...') print('1. getting dataset...')
dataset_all = Dataset() dataset_all = Dataset()
dataset_all.load_predefined_dataset(ds_name) dataset_all.load_predefined_dataset(ds_name)
dataset_all.trim_dataset(edge_required=edge_required)
if not irrelevant_labels is None: if not irrelevant_labels is None:
dataset_all.remove_labels(**irrelevant_labels) dataset_all.remove_labels(**irrelevant_labels)
# dataset_all.cut_graphs(range(0, 10)) # dataset_all.cut_graphs(range(0, 10))
@@ -385,6 +388,8 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d
print() print()
print('4. saving results...') print('4. saving results...')
if save_results: if save_results:
if not os.path.exists(dir_save):
os.makedirs(dir_save)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)


print('\ncomplete.') print('\ncomplete.')
@@ -460,4 +465,10 @@ def get_mlti_dim_edge_attrs(G, attr_names):
attributes = [] attributes = []
for ed, attrs in G.edges(data=True): for ed, attrs in G.edges(data=True):
attributes.append(tuple(attrs[aname] for aname in attr_names)) attributes.append(tuple(attrs[aname] for aname in attr_names))
return attributes
return attributes


class SpecialLabel(Enum):
"""can be used to define special labels.
"""
DUMMY = auto # The dummy label.

Loading…
Cancel
Save