@@ -32,6 +32,7 @@ gklearn/kernels/*_sym.py | |||||
gklearn/preimage/* | gklearn/preimage/* | ||||
!gklearn/preimage/*.py | !gklearn/preimage/*.py | ||||
!gklearn/preimage/experiments/*.py | !gklearn/preimage/experiments/*.py | ||||
!gklearn/preimage/experiments/tools/*.py | |||||
__pycache__ | __pycache__ | ||||
##*# | ##*# | ||||
@@ -10,7 +10,7 @@ A Python package for graph kernels, graph edit distances and graph pre-image pro | |||||
## Requirements | ## Requirements | ||||
* python>=3.5 | * python>=3.5 | ||||
* numpy>=1.15.2 | |||||
* numpy>=1.16.2 | |||||
* scipy>=1.1.0 | * scipy>=1.1.0 | ||||
* matplotlib>=3.0.0 | * matplotlib>=3.0.0 | ||||
* networkx>=2.2 | * networkx>=2.2 | ||||
@@ -18,4 +18,4 @@ __date__ = "November 2017" | |||||
# import sub modules | # import sub modules | ||||
# from gklearn import c_ext | # from gklearn import c_ext | ||||
# from gklearn import ged | # from gklearn import ged | ||||
from gklearn import utils | |||||
# from gklearn import utils |
@@ -6,12 +6,13 @@ Created on Thu Mar 19 18:17:38 2020 | |||||
@author: ljia | @author: ljia | ||||
""" | """ | ||||
from enum import Enum, auto | |||||
from enum import Enum, unique | |||||
@unique | |||||
class AlgorithmState(Enum): | class AlgorithmState(Enum): | ||||
"""can be used to specify the state of an algorithm. | """can be used to specify the state of an algorithm. | ||||
""" | """ | ||||
CALLED = auto # The algorithm has been called. | |||||
INITIALIZED = auto # The algorithm has been initialized. | |||||
CONVERGED = auto # The algorithm has converged. | |||||
TERMINATED = auto # The algorithm has terminated. | |||||
CALLED = 1 # The algorithm has been called. | |||||
INITIALIZED = 2 # The algorithm has been initialized. | |||||
CONVERGED = 3 # The algorithm has converged. | |||||
TERMINATED = 4 # The algorithm has terminated. |
@@ -39,14 +39,6 @@ class NodeMap(object): | |||||
return np.inf | return np.inf | ||||
def get_forward_map(self): | |||||
return self.__forward_map | |||||
def get_backward_map(self): | |||||
return self.__backward_map | |||||
def as_relation(self, relation): | def as_relation(self, relation): | ||||
relation.clear() | relation.clear() | ||||
for i in range(0, len(self.__forward_map)): | for i in range(0, len(self.__forward_map)): | ||||
@@ -77,4 +69,22 @@ class NodeMap(object): | |||||
def induced_cost(self): | def induced_cost(self): | ||||
return self.__induced_cost | |||||
return self.__induced_cost | |||||
@property | |||||
def forward_map(self): | |||||
return self.__forward_map | |||||
@forward_map.setter | |||||
def forward_map(self, value): | |||||
self.__forward_map = value | |||||
@property | |||||
def backward_map(self): | |||||
return self.__backward_map | |||||
@backward_map.setter | |||||
def backward_map(self, value): | |||||
self.__backward_map = value |
@@ -13,6 +13,9 @@ import time | |||||
from tqdm import tqdm | from tqdm import tqdm | ||||
import sys | import sys | ||||
import networkx as nx | import networkx as nx | ||||
import multiprocessing | |||||
from multiprocessing import Pool | |||||
from functools import partial | |||||
class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined node? | class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined node? | ||||
@@ -47,7 +50,9 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
self.__desired_num_random_inits = 10 | self.__desired_num_random_inits = 10 | ||||
self.__use_real_randomness = True | self.__use_real_randomness = True | ||||
self.__seed = 0 | self.__seed = 0 | ||||
self.__parallel = True | |||||
self.__update_order = True | self.__update_order = True | ||||
self.__sort_graphs = True # sort graphs by size when computing GEDs. | |||||
self.__refine = True | self.__refine = True | ||||
self.__time_limit_in_sec = 0 | self.__time_limit_in_sec = 0 | ||||
self.__epsilon = 0.0001 | self.__epsilon = 0.0001 | ||||
@@ -125,6 +130,16 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
else: | else: | ||||
raise Exception('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"') | raise Exception('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"') | ||||
elif opt_name == 'parallel': | |||||
if opt_val == 'TRUE': | |||||
self.__parallel = True | |||||
elif opt_val == 'FALSE': | |||||
self.__parallel = False | |||||
else: | |||||
raise Exception('Invalid argument "' + opt_val + '" for option parallel. Usage: options = "[--parallel TRUE|FALSE] [...]"') | |||||
elif opt_name == 'update-order': | elif opt_name == 'update-order': | ||||
if opt_val == 'TRUE': | if opt_val == 'TRUE': | ||||
@@ -136,6 +151,16 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
else: | else: | ||||
raise Exception('Invalid argument "' + opt_val + '" for option update-order. Usage: options = "[--update-order TRUE|FALSE] [...]"') | raise Exception('Invalid argument "' + opt_val + '" for option update-order. Usage: options = "[--update-order TRUE|FALSE] [...]"') | ||||
elif opt_name == 'sort-graphs': | |||||
if opt_val == 'TRUE': | |||||
self.__sort_graphs = True | |||||
elif opt_val == 'FALSE': | |||||
self.__sort_graphs = False | |||||
else: | |||||
raise Exception('Invalid argument "' + opt_val + '" for option sort-graphs. Usage: options = "[--sort-graphs TRUE|FALSE] [...]"') | |||||
elif opt_name == 'refine': | elif opt_name == 'refine': | ||||
if opt_val == 'TRUE': | if opt_val == 'TRUE': | ||||
self.__refine = True | self.__refine = True | ||||
@@ -302,7 +327,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
self.__median_id = gen_median_id | self.__median_id = gen_median_id | ||||
self.__state = AlgorithmState.TERMINATED | self.__state = AlgorithmState.TERMINATED | ||||
# Get ExchangeGraph representations of the input graphs. | |||||
# Get NetworkX graph representations of the input graphs. | |||||
graphs = {} | graphs = {} | ||||
for graph_id in graph_ids: | for graph_id in graph_ids: | ||||
# @todo: get_nx_graph() function may need to be modified according to the coming code. | # @todo: get_nx_graph() function may need to be modified according to the coming code. | ||||
@@ -312,7 +337,6 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
# print(graphs[0].nodes(data=True)) | # print(graphs[0].nodes(data=True)) | ||||
# print(graphs[0].edges(data=True)) | # print(graphs[0].edges(data=True)) | ||||
# print(nx.adjacency_matrix(graphs[0])) | # print(nx.adjacency_matrix(graphs[0])) | ||||
# Construct initial medians. | # Construct initial medians. | ||||
medians = [] | medians = [] | ||||
@@ -356,30 +380,14 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
self.__ged_env.load_nx_graph(median, gen_median_id) | self.__ged_env.load_nx_graph(median, gen_median_id) | ||||
self.__ged_env.init(self.__ged_env.get_init_type()) | self.__ged_env.init(self.__ged_env.get_init_type()) | ||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress = tqdm(desc='Computing initial node maps', total=len(graph_ids), file=sys.stdout) | |||||
# Compute node maps and sum of distances for initial median. | # Compute node maps and sum of distances for initial median. | ||||
self.__sum_of_distances = 0 | |||||
self.__node_maps_from_median.clear() | |||||
for graph_id in graph_ids: | |||||
self.__ged_env.run_method(gen_median_id, graph_id) | |||||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id) | |||||
# print(self.__node_maps_from_median[graph_id]) | |||||
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost() | |||||
# print(self.__sum_of_distances) | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress.update(1) | |||||
# xxx = self.__node_maps_from_median | |||||
self.__compute_init_node_maps(graph_ids, gen_median_id) | |||||
# yyy = self.__node_maps_from_median | |||||
self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances) | self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances) | ||||
self.__ged_env.load_nx_graph(median, set_median_id) | self.__ged_env.load_nx_graph(median, set_median_id) | ||||
# print(self.__best_init_sum_of_distances) | # print(self.__best_init_sum_of_distances) | ||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
print('\n') | |||||
# Run block gradient descent from initial median. | # Run block gradient descent from initial median. | ||||
converged = False | converged = False | ||||
@@ -434,7 +442,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
# print(self.__node_maps_from_median[graph_id].induced_cost()) | # print(self.__node_maps_from_median[graph_id].induced_cost()) | ||||
# xxx = self.__node_maps_from_median[graph_id] | # xxx = self.__node_maps_from_median[graph_id] | ||||
self.__ged_env.compute_induced_cost(gen_median_id, graph_id, self.__node_maps_from_median[graph_id]) | self.__ged_env.compute_induced_cost(gen_median_id, graph_id, self.__node_maps_from_median[graph_id]) | ||||
# print('---------------------------------------') | |||||
# print('---------------------------------------') | |||||
# print(self.__node_maps_from_median[graph_id].induced_cost()) | # print(self.__node_maps_from_median[graph_id].induced_cost()) | ||||
# @todo:!!!!!!!!!!!!!!!!!!!!!!!!!!!!This value is a slight different from the c++ program, which might be a bug! Use it very carefully! | # @todo:!!!!!!!!!!!!!!!!!!!!!!!!!!!!This value is a slight different from the c++ program, which might be a bug! Use it very carefully! | ||||
@@ -540,18 +548,31 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
progress.update(1) | progress.update(1) | ||||
# Improving the node maps. | # Improving the node maps. | ||||
nb_nodes_median = self.__ged_env.get_graph_num_nodes(self.__gen_median_id) | |||||
for graph_id, node_map in self.__node_maps_from_median.items(): | for graph_id, node_map in self.__node_maps_from_median.items(): | ||||
if time.expired(): | if time.expired(): | ||||
if self.__state == AlgorithmState.TERMINATED: | if self.__state == AlgorithmState.TERMINATED: | ||||
self.__state = AlgorithmState.CONVERGED | self.__state = AlgorithmState.CONVERGED | ||||
break | break | ||||
self.__ged_env.run_method(self.__gen_median_id, graph_id) | |||||
if self.__ged_env.get_upper_bound(self.__gen_median_id, graph_id) < node_map.induced_cost(): | |||||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__gen_median_id, graph_id) | |||||
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost() | |||||
nb_nodes_g = self.__ged_env.get_graph_num_nodes(graph_id) | |||||
if nb_nodes_median <= nb_nodes_g or not self.__sort_graphs: | |||||
self.__ged_env.run_method(self.__gen_median_id, graph_id) | |||||
if self.__ged_env.get_upper_bound(self.__gen_median_id, graph_id) < node_map.induced_cost(): | |||||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__gen_median_id, graph_id) | |||||
else: | |||||
self.__ged_env.run_method(graph_id, self.__gen_median_id) | |||||
if self.__ged_env.get_upper_bound(graph_id, self.__gen_median_id) < node_map.induced_cost(): | |||||
node_map_tmp = self.__ged_env.get_node_map(graph_id, self.__gen_median_id) | |||||
node_map_tmp.forward_map, node_map_tmp.backward_map = node_map_tmp.backward_map, node_map_tmp.forward_map | |||||
self.__node_maps_from_median[graph_id] = node_map_tmp | |||||
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost() | |||||
# Print information. | # Print information. | ||||
if self.__print_to_stdout == 2: | if self.__print_to_stdout == 2: | ||||
progress.update(1) | progress.update(1) | ||||
self.__sum_of_distances = 0.0 | self.__sum_of_distances = 0.0 | ||||
for key, val in self.__node_maps_from_median.items(): | for key, val in self.__node_maps_from_median.items(): | ||||
self.__sum_of_distances += val.induced_cost() | self.__sum_of_distances += val.induced_cost() | ||||
@@ -562,7 +583,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
def __median_available(self): | def __median_available(self): | ||||
return self.__gen_median_id != np.inf | |||||
return self.__median_id != np.inf | |||||
def get_state(self): | def get_state(self): | ||||
@@ -637,7 +658,9 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
self.__desired_num_random_inits = 10 | self.__desired_num_random_inits = 10 | ||||
self.__use_real_randomness = True | self.__use_real_randomness = True | ||||
self.__seed = 0 | self.__seed = 0 | ||||
self.__parallel = True | |||||
self.__update_order = True | self.__update_order = True | ||||
self.__sort_graphs = True | |||||
self.__refine = True | self.__refine = True | ||||
self.__time_limit_in_sec = 0 | self.__time_limit_in_sec = 0 | ||||
self.__epsilon = 0.0001 | self.__epsilon = 0.0001 | ||||
@@ -682,35 +705,138 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
def __compute_medoid(self, graph_ids, timer, initial_medians): | def __compute_medoid(self, graph_ids, timer, initial_medians): | ||||
# Use method selected for initialization phase. | # Use method selected for initialization phase. | ||||
self.__ged_env.set_method(self.__init_method, self.__init_options) | self.__ged_env.set_method(self.__init_method, self.__init_options) | ||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress = tqdm(desc='Computing medoid', total=len(graph_ids), file=sys.stdout) | |||||
# Compute the medoid. | # Compute the medoid. | ||||
medoid_id = graph_ids[0] | |||||
best_sum_of_distances = np.inf | |||||
for g_id in graph_ids: | |||||
if timer.expired(): | |||||
self.__state = AlgorithmState.CALLED | |||||
break | |||||
sum_of_distances = 0 | |||||
for h_id in graph_ids: | |||||
self.__ged_env.run_method(g_id, h_id) | |||||
sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id) | |||||
if sum_of_distances < best_sum_of_distances: | |||||
best_sum_of_distances = sum_of_distances | |||||
medoid_id = g_id | |||||
if self.__parallel: | |||||
# @todo: notice when parallel self.__ged_env is not modified. | |||||
sum_of_distances_list = [np.inf] * len(graph_ids) | |||||
len_itr = len(graph_ids) | |||||
itr = zip(graph_ids, range(0, len(graph_ids))) | |||||
n_jobs = multiprocessing.cpu_count() | |||||
if len_itr < 100 * n_jobs: | |||||
chunksize = int(len_itr / n_jobs) + 1 | |||||
else: | |||||
chunksize = 100 | |||||
def init_worker(ged_env_toshare): | |||||
global G_ged_env | |||||
G_ged_env = ged_env_toshare | |||||
do_fun = partial(_compute_medoid_parallel, graph_ids, self.__sort_graphs) | |||||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(self.__ged_env,)) | |||||
if self.__print_to_stdout == 2: | |||||
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), | |||||
desc='Computing medoid', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(do_fun, itr, chunksize) | |||||
for i, dis in iterator: | |||||
sum_of_distances_list[i] = dis | |||||
pool.close() | |||||
pool.join() | |||||
medoid_id = np.argmin(sum_of_distances_list) | |||||
best_sum_of_distances = sum_of_distances_list[medoid_id] | |||||
initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo | |||||
else: | |||||
# Print information about current iteration. | # Print information about current iteration. | ||||
if self.__print_to_stdout == 2: | if self.__print_to_stdout == 2: | ||||
progress.update(1) | |||||
initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo | |||||
progress = tqdm(desc='Computing medoid', total=len(graph_ids), file=sys.stdout) | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
print('\n') | |||||
medoid_id = graph_ids[0] | |||||
best_sum_of_distances = np.inf | |||||
for g_id in graph_ids: | |||||
if timer.expired(): | |||||
self.__state = AlgorithmState.CALLED | |||||
break | |||||
nb_nodes_g = self.__ged_env.get_graph_num_nodes(g_id) | |||||
sum_of_distances = 0 | |||||
for h_id in graph_ids: | |||||
nb_nodes_h = self.__ged_env.get_graph_num_nodes(h_id) | |||||
if nb_nodes_g <= nb_nodes_h or not self.__sort_graphs: | |||||
self.__ged_env.run_method(g_id, h_id) | |||||
sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id) | |||||
else: | |||||
self.__ged_env.run_method(h_id, g_id) | |||||
sum_of_distances += self.__ged_env.get_upper_bound(h_id, g_id) | |||||
if sum_of_distances < best_sum_of_distances: | |||||
best_sum_of_distances = sum_of_distances | |||||
medoid_id = g_id | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress.update(1) | |||||
initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
print('\n') | |||||
def __compute_init_node_maps(self, graph_ids, gen_median_id): | |||||
# Compute node maps and sum of distances for initial median. | |||||
if self.__parallel: | |||||
# @todo: notice when parallel self.__ged_env is not modified. | |||||
self.__sum_of_distances = 0 | |||||
self.__node_maps_from_median.clear() | |||||
sum_of_distances_list = [0] * len(graph_ids) | |||||
len_itr = len(graph_ids) | |||||
itr = graph_ids | |||||
n_jobs = multiprocessing.cpu_count() | |||||
if len_itr < 100 * n_jobs: | |||||
chunksize = int(len_itr / n_jobs) + 1 | |||||
else: | |||||
chunksize = 100 | |||||
def init_worker(ged_env_toshare): | |||||
global G_ged_env | |||||
G_ged_env = ged_env_toshare | |||||
nb_nodes_median = self.__ged_env.get_graph_num_nodes(gen_median_id) | |||||
do_fun = partial(_compute_init_node_maps_parallel, gen_median_id, self.__sort_graphs, nb_nodes_median) | |||||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(self.__ged_env,)) | |||||
if self.__print_to_stdout == 2: | |||||
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), | |||||
desc='Computing initial node maps', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(do_fun, itr, chunksize) | |||||
for g_id, sod, node_maps in iterator: | |||||
sum_of_distances_list[g_id] = sod | |||||
self.__node_maps_from_median[g_id] = node_maps | |||||
pool.close() | |||||
pool.join() | |||||
self.__sum_of_distances = np.sum(sum_of_distances_list) | |||||
# xxx = self.__node_maps_from_median | |||||
else: | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress = tqdm(desc='Computing initial node maps', total=len(graph_ids), file=sys.stdout) | |||||
self.__sum_of_distances = 0 | |||||
self.__node_maps_from_median.clear() | |||||
nb_nodes_median = self.__ged_env.get_graph_num_nodes(gen_median_id) | |||||
for graph_id in graph_ids: | |||||
nb_nodes_g = self.__ged_env.get_graph_num_nodes(graph_id) | |||||
if nb_nodes_median <= nb_nodes_g or not self.__sort_graphs: | |||||
self.__ged_env.run_method(gen_median_id, graph_id) | |||||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id) | |||||
else: | |||||
self.__ged_env.run_method(graph_id, gen_median_id) | |||||
node_map_tmp = self.__ged_env.get_node_map(graph_id, gen_median_id) | |||||
node_map_tmp.forward_map, node_map_tmp.backward_map = node_map_tmp.backward_map, node_map_tmp.forward_map | |||||
self.__node_maps_from_median[graph_id] = node_map_tmp | |||||
# print(self.__node_maps_from_median[graph_id]) | |||||
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost() | |||||
# print(self.__sum_of_distances) | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress.update(1) | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
print('\n') | |||||
def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): | def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): | ||||
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): | if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): | ||||
@@ -743,6 +869,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
def __update_node_labels(self, graphs, median): | def __update_node_labels(self, graphs, median): | ||||
# print('----------------------------') | |||||
# Print information about current iteration. | # Print information about current iteration. | ||||
if self.__print_to_stdout == 2: | if self.__print_to_stdout == 2: | ||||
@@ -750,14 +877,15 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
# Iterate through all nodes of the median. | # Iterate through all nodes of the median. | ||||
for i in range(0, nx.number_of_nodes(median)): | for i in range(0, nx.number_of_nodes(median)): | ||||
# print('i: ', i) | |||||
# print('i: ', i) | |||||
# Collect the labels of the substituted nodes. | # Collect the labels of the substituted nodes. | ||||
node_labels = [] | node_labels = [] | ||||
for graph_id, graph in graphs.items(): | for graph_id, graph in graphs.items(): | ||||
# print('graph_id: ', graph_id) | |||||
# print(self.__node_maps_from_median[graph_id]) | |||||
# print('graph_id: ', graph_id) | |||||
# print(self.__node_maps_from_median[graph_id]) | |||||
# print(self.__node_maps_from_median[graph_id].forward_map, self.__node_maps_from_median[graph_id].backward_map) | |||||
k = self.__node_maps_from_median[graph_id].image(i) | k = self.__node_maps_from_median[graph_id].image(i) | ||||
# print('k: ', k) | |||||
# print('k: ', k) | |||||
if k != np.inf: | if k != np.inf: | ||||
node_labels.append(graph.nodes[k]) | node_labels.append(graph.nodes[k]) | ||||
@@ -816,26 +944,70 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
def __update_node_maps(self): | def __update_node_maps(self): | ||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress = tqdm(desc='Updating node maps', total=len(self.__node_maps_from_median), file=sys.stdout) | |||||
# Update the node maps. | # Update the node maps. | ||||
node_maps_were_modified = False | |||||
for graph_id, node_map in self.__node_maps_from_median.items(): | |||||
self.__ged_env.run_method(self.__median_id, graph_id) | |||||
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < node_map.induced_cost() - self.__epsilon: | |||||
# xxx = self.__node_maps_from_median[graph_id] | |||||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) | |||||
# yyy = self.__node_maps_from_median[graph_id] | |||||
node_maps_were_modified = True | |||||
if self.__parallel: | |||||
# @todo: notice when parallel self.__ged_env is not modified. | |||||
node_maps_were_modified = False | |||||
# xxx = self.__node_maps_from_median.copy() | |||||
len_itr = len(self.__node_maps_from_median) | |||||
itr = [item for item in self.__node_maps_from_median.items()] | |||||
n_jobs = multiprocessing.cpu_count() | |||||
if len_itr < 100 * n_jobs: | |||||
chunksize = int(len_itr / n_jobs) + 1 | |||||
else: | |||||
chunksize = 100 | |||||
def init_worker(ged_env_toshare): | |||||
global G_ged_env | |||||
G_ged_env = ged_env_toshare | |||||
nb_nodes_median = self.__ged_env.get_graph_num_nodes(self.__median_id) | |||||
do_fun = partial(_update_node_maps_parallel, self.__median_id, self.__epsilon, self.__sort_graphs, nb_nodes_median) | |||||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(self.__ged_env,)) | |||||
if self.__print_to_stdout == 2: | |||||
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), | |||||
desc='Updating node maps', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(do_fun, itr, chunksize) | |||||
for g_id, node_map, nm_modified in iterator: | |||||
self.__node_maps_from_median[g_id] = node_map | |||||
if nm_modified: | |||||
node_maps_were_modified = True | |||||
pool.close() | |||||
pool.join() | |||||
# yyy = self.__node_maps_from_median.copy() | |||||
else: | |||||
# Print information about current iteration. | # Print information about current iteration. | ||||
if self.__print_to_stdout == 2: | if self.__print_to_stdout == 2: | ||||
progress.update(1) | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
print('\n') | |||||
progress = tqdm(desc='Updating node maps', total=len(self.__node_maps_from_median), file=sys.stdout) | |||||
node_maps_were_modified = False | |||||
nb_nodes_median = self.__ged_env.get_graph_num_nodes(self.__median_id) | |||||
for graph_id, node_map in self.__node_maps_from_median.items(): | |||||
nb_nodes_g = self.__ged_env.get_graph_num_nodes(graph_id) | |||||
if nb_nodes_median <= nb_nodes_g or not self.__sort_graphs: | |||||
self.__ged_env.run_method(self.__median_id, graph_id) | |||||
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < node_map.induced_cost() - self.__epsilon: | |||||
# xxx = self.__node_maps_from_median[graph_id] | |||||
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) | |||||
node_maps_were_modified = True | |||||
else: | |||||
self.__ged_env.run_method(graph_id, self.__median_id) | |||||
if self.__ged_env.get_upper_bound(graph_id, self.__median_id) < node_map.induced_cost() - self.__epsilon: | |||||
node_map_tmp = self.__ged_env.get_node_map(graph_id, self.__median_id) | |||||
node_map_tmp.forward_map, node_map_tmp.backward_map = node_map_tmp.backward_map, node_map_tmp.forward_map | |||||
self.__node_maps_from_median[graph_id] = node_map_tmp | |||||
node_maps_were_modified = True | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
progress.update(1) | |||||
# Print information about current iteration. | |||||
if self.__print_to_stdout == 2: | |||||
print('\n') | |||||
# Return true if the node maps were modified. | # Return true if the node maps were modified. | ||||
return node_maps_were_modified | return node_maps_were_modified | ||||
@@ -846,6 +1018,11 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
if self.__print_to_stdout == 2: | if self.__print_to_stdout == 2: | ||||
print('Trying to decrease order: ... ', end='') | print('Trying to decrease order: ... ', end='') | ||||
if nx.number_of_nodes(median) <= 1: | |||||
if self.__print_to_stdout == 2: | |||||
print('median graph has only 1 node, skip decrease.') | |||||
return False | |||||
# Initialize ID of the node that is to be deleted. | # Initialize ID of the node that is to be deleted. | ||||
id_deleted_node = [None] # @todo: or np.inf | id_deleted_node = [None] # @todo: or np.inf | ||||
decreased_order = False | decreased_order = False | ||||
@@ -853,7 +1030,11 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
# Decrease the order as long as the best deletion delta is negative. | # Decrease the order as long as the best deletion delta is negative. | ||||
while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon: | while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon: | ||||
decreased_order = True | decreased_order = True | ||||
median = self.__delete_node_from_median(id_deleted_node[0], median) | |||||
self.__delete_node_from_median(id_deleted_node[0], median) | |||||
if nx.number_of_nodes(median) <= 1: | |||||
if self.__print_to_stdout == 2: | |||||
print('decrease stopped because median graph remains only 1 node. ', end='') | |||||
break | |||||
# Print information about current iteration. | # Print information about current iteration. | ||||
if self.__print_to_stdout == 2: | if self.__print_to_stdout == 2: | ||||
@@ -896,16 +1077,22 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
def __delete_node_from_median(self, id_deleted_node, median): | def __delete_node_from_median(self, id_deleted_node, median): | ||||
# Update the median. | # Update the median. | ||||
mapping = {} | |||||
for i in range(0, nx.number_of_nodes(median)): | |||||
if i != id_deleted_node: | |||||
new_i = (i if i < id_deleted_node else (i - 1)) | |||||
mapping[i] = new_i | |||||
median.remove_node(id_deleted_node) | median.remove_node(id_deleted_node) | ||||
median = nx.convert_node_labels_to_integers(median, first_label=0, ordering='default', label_attribute=None) # @todo: This doesn't guarantee that the order is the same as in G. | |||||
nx.relabel_nodes(median, mapping, copy=False) | |||||
# Update the node maps. | # Update the node maps. | ||||
# xxx = self.__node_maps_from_median | |||||
for key, node_map in self.__node_maps_from_median.items(): | for key, node_map in self.__node_maps_from_median.items(): | ||||
new_node_map = NodeMap(nx.number_of_nodes(median), node_map.num_target_nodes()) | new_node_map = NodeMap(nx.number_of_nodes(median), node_map.num_target_nodes()) | ||||
is_unassigned_target_node = [True] * node_map.num_target_nodes() | is_unassigned_target_node = [True] * node_map.num_target_nodes() | ||||
for i in range(0, nx.number_of_nodes(median) + 1): | for i in range(0, nx.number_of_nodes(median) + 1): | ||||
if i != id_deleted_node: | if i != id_deleted_node: | ||||
new_i = (i if i < id_deleted_node else i - 1) | |||||
new_i = (i if i < id_deleted_node else (i - 1)) | |||||
k = node_map.image(i) | k = node_map.image(i) | ||||
new_node_map.add_assignment(new_i, k) | new_node_map.add_assignment(new_i, k) | ||||
if k != np.inf: | if k != np.inf: | ||||
@@ -913,13 +1100,12 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
for k in range(0, node_map.num_target_nodes()): | for k in range(0, node_map.num_target_nodes()): | ||||
if is_unassigned_target_node[k]: | if is_unassigned_target_node[k]: | ||||
new_node_map.add_assignment(np.inf, k) | new_node_map.add_assignment(np.inf, k) | ||||
# print(new_node_map.get_forward_map(), new_node_map.get_backward_map()) | |||||
# print(self.__node_maps_from_median[key].forward_map, self.__node_maps_from_median[key].backward_map) | |||||
# print(new_node_map.forward_map, new_node_map.backward_map | |||||
self.__node_maps_from_median[key] = new_node_map | self.__node_maps_from_median[key] = new_node_map | ||||
# Increase overall number of decreases. | # Increase overall number of decreases. | ||||
self.__num_decrease_order += 1 | self.__num_decrease_order += 1 | ||||
return median | |||||
def __increase_order(self, graphs, median): | def __increase_order(self, graphs, median): | ||||
@@ -1115,10 +1301,22 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
continue | continue | ||||
for label in median_labels: | for label in median_labels: | ||||
weights[label_id] = min(weights[label_id], self.__ged_env.get_node_rel_cost(dict(label), dict(node_labels[label_id]))) | weights[label_id] = min(weights[label_id], self.__ged_env.get_node_rel_cost(dict(label), dict(node_labels[label_id]))) | ||||
selected_label_id = urng.choice(range(0, len(weights)), size=1, p=np.array(weights) / np.sum(weights))[0] # for c++ test: xxx[iii] | |||||
# get non-zero weights. | |||||
weights_p, idx_p = [], [] | |||||
for i, w in enumerate(weights): | |||||
if w != 0: | |||||
weights_p.append(w) | |||||
idx_p.append(i) | |||||
if len(weights_p) > 0: | |||||
p = np.array(weights_p) / np.sum(weights_p) | |||||
selected_label_id = urng.choice(range(0, len(weights_p)), size=1, p=p)[0] # for c++ test: xxx[iii] | |||||
selected_label_id = idx_p[selected_label_id] | |||||
# iii += 1 for c++ test | # iii += 1 for c++ test | ||||
median_labels.append(node_labels[selected_label_id]) | |||||
already_selected[selected_label_id] = True | |||||
median_labels.append(node_labels[selected_label_id]) | |||||
already_selected[selected_label_id] = True | |||||
else: # skip the loop when all node_labels are selected. This happens when len(node_labels) <= self.__num_inits_increase_order. | |||||
break | |||||
else: | else: | ||||
# Compute the initial node medians as the medians of randomly generated clusters of (roughly) equal size. | # Compute the initial node medians as the medians of randomly generated clusters of (roughly) equal size. | ||||
# @todo: go through and test. | # @todo: go through and test. | ||||
@@ -1195,6 +1393,8 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
def __update_node_label(self, node_labels, node_label): | def __update_node_label(self, node_labels, node_label): | ||||
if len(node_labels) == 0: # @todo: check if this is the correct solution. Especially after calling __update_config(). | |||||
return False | |||||
new_node_label = self.__get_median_node_label(node_labels) | new_node_label = self.__get_median_node_label(node_labels) | ||||
if self.__ged_env.get_node_rel_cost(new_node_label, node_label) > self.__epsilon: | if self.__ged_env.get_node_rel_cost(new_node_label, node_label) > self.__epsilon: | ||||
node_label.clear() | node_label.clear() | ||||
@@ -1225,7 +1425,8 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
def __add_node_to_median(self, best_config, best_label, median): | def __add_node_to_median(self, best_config, best_label, median): | ||||
# Update the median. | # Update the median. | ||||
median.add_node(nx.number_of_nodes(median), **best_label) | |||||
nb_nodes_median = nx.number_of_nodes(median) | |||||
median.add_node(nb_nodes_median, **best_label) | |||||
# Update the node maps. | # Update the node maps. | ||||
for graph_id, node_map in self.__node_maps_from_median.items(): | for graph_id, node_map in self.__node_maps_from_median.items(): | ||||
@@ -1239,47 +1440,6 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
# Increase overall number of increases. | # Increase overall number of increases. | ||||
self.__num_increase_order += 1 | self.__num_increase_order += 1 | ||||
def __improve_sum_of_distances(self, timer): | |||||
pass | |||||
def __median_available(self): | |||||
return self.__median_id != np.inf | |||||
# def __get_node_image_from_map(self, node_map, node): | |||||
# """ | |||||
# Return ID of the node mapping of `node` in `node_map`. | |||||
# Parameters | |||||
# ---------- | |||||
# node_map : list[tuple(int, int)] | |||||
# List of node maps where the mapping node is found. | |||||
# | |||||
# node : int | |||||
# The mapping node of this node is returned | |||||
# Raises | |||||
# ------ | |||||
# Exception | |||||
# If the node with ID `node` is not contained in the source nodes of the node map. | |||||
# Returns | |||||
# ------- | |||||
# int | |||||
# ID of the mapping of `node`. | |||||
# | |||||
# Notes | |||||
# ----- | |||||
# This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function. | |||||
# """ | |||||
# if node < len(node_map): | |||||
# return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf | |||||
# else: | |||||
# raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||||
# return np.inf | |||||
def __are_graphs_equal(self, g1, g2): | def __are_graphs_equal(self, g1, g2): | ||||
@@ -1489,4 +1649,61 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no | |||||
# median_label = {} | # median_label = {} | ||||
# for key, val in median.items(): | # for key, val in median.items(): | ||||
# median_label[key] = str(val) | # median_label[key] = str(val) | ||||
# return median_label | |||||
# return median_label | |||||
def _compute_medoid_parallel(graph_ids, sort, itr): | |||||
g_id = itr[0] | |||||
i = itr[1] | |||||
# @todo: timer not considered here. | |||||
# if timer.expired(): | |||||
# self.__state = AlgorithmState.CALLED | |||||
# break | |||||
nb_nodes_g = G_ged_env.get_graph_num_nodes(g_id) | |||||
sum_of_distances = 0 | |||||
for h_id in graph_ids: | |||||
nb_nodes_h = G_ged_env.get_graph_num_nodes(h_id) | |||||
if nb_nodes_g <= nb_nodes_h or not sort: | |||||
G_ged_env.run_method(g_id, h_id) | |||||
sum_of_distances += G_ged_env.get_upper_bound(g_id, h_id) | |||||
else: | |||||
G_ged_env.run_method(h_id, g_id) | |||||
sum_of_distances += G_ged_env.get_upper_bound(h_id, g_id) | |||||
return i, sum_of_distances | |||||
def _compute_init_node_maps_parallel(gen_median_id, sort, nb_nodes_median, itr): | |||||
graph_id = itr | |||||
nb_nodes_g = G_ged_env.get_graph_num_nodes(graph_id) | |||||
if nb_nodes_median <= nb_nodes_g or not sort: | |||||
G_ged_env.run_method(gen_median_id, graph_id) | |||||
node_map = G_ged_env.get_node_map(gen_median_id, graph_id) | |||||
# print(self.__node_maps_from_median[graph_id]) | |||||
else: | |||||
G_ged_env.run_method(graph_id, gen_median_id) | |||||
node_map = G_ged_env.get_node_map(graph_id, gen_median_id) | |||||
node_map.forward_map, node_map.backward_map = node_map.backward_map, node_map.forward_map | |||||
sum_of_distance = node_map.induced_cost() | |||||
# print(self.__sum_of_distances) | |||||
return graph_id, sum_of_distance, node_map | |||||
def _update_node_maps_parallel(median_id, epsilon, sort, nb_nodes_median, itr): | |||||
graph_id = itr[0] | |||||
node_map = itr[1] | |||||
node_maps_were_modified = False | |||||
nb_nodes_g = G_ged_env.get_graph_num_nodes(graph_id) | |||||
if nb_nodes_median <= nb_nodes_g or not sort: | |||||
G_ged_env.run_method(median_id, graph_id) | |||||
if G_ged_env.get_upper_bound(median_id, graph_id) < node_map.induced_cost() - epsilon: | |||||
node_map = G_ged_env.get_node_map(median_id, graph_id) | |||||
node_maps_were_modified = True | |||||
else: | |||||
G_ged_env.run_method(graph_id, median_id) | |||||
if G_ged_env.get_upper_bound(graph_id, median_id) < node_map.induced_cost() - epsilon: | |||||
node_map = G_ged_env.get_node_map(graph_id, median_id) | |||||
node_map.forward_map, node_map.backward_map = node_map.backward_map, node_map.forward_map | |||||
node_maps_were_modified = True | |||||
return graph_id, node_map, node_maps_were_modified |
@@ -53,7 +53,7 @@ def test_median_graph_estimator(): | |||||
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | ||||
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | ||||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO '# @todo: std::to_string(rng()) | |||||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO --parallel TRUE '# @todo: std::to_string(rng()) | |||||
# Select the GED algorithm. | # Select the GED algorithm. | ||||
algo_options = '--threads ' + str(threads) + algo_options_suffix | algo_options = '--threads ' + str(threads) + algo_options_suffix | ||||
@@ -127,7 +127,7 @@ def test_median_graph_estimator_symb(): | |||||
mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | ||||
mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | ||||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE'# @todo: std::to_string(rng()) | |||||
mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --update-order TRUE --refine FALSE --randomness PSEUDO --parallel TRUE '# @todo: std::to_string(rng()) | |||||
# Select the GED algorithm. | # Select the GED algorithm. | ||||
algo_options = '--threads ' + str(threads) + algo_options_suffix | algo_options = '--threads ' + str(threads) + algo_options_suffix | ||||
@@ -155,5 +155,5 @@ def test_median_graph_estimator_symb(): | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
set_median, gen_median = test_median_graph_estimator() | |||||
# set_median, gen_median = test_median_graph_estimator_symb() | |||||
# set_median, gen_median = test_median_graph_estimator() | |||||
set_median, gen_median = test_median_graph_estimator_symb() |
@@ -30,8 +30,12 @@ def mge_options_to_string(options): | |||||
opt_str += '--randomness ' + str(val) + ' ' | opt_str += '--randomness ' + str(val) + ' ' | ||||
elif key == 'verbose': | elif key == 'verbose': | ||||
opt_str += '--stdout ' + str(val) + ' ' | opt_str += '--stdout ' + str(val) + ' ' | ||||
elif key == 'parallel': | |||||
opt_str += '--parallel ' + ('TRUE' if val else 'FALSE') + ' ' | |||||
elif key == 'update_order': | elif key == 'update_order': | ||||
opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' ' | opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' ' | ||||
elif key == 'sort_graphs': | |||||
opt_str += '--sort-graphs ' + ('TRUE' if val else 'FALSE') + ' ' | |||||
elif key == 'refine': | elif key == 'refine': | ||||
opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' ' | ||||
elif key == 'time_limit': | elif key == 'time_limit': | ||||
@@ -46,7 +46,7 @@ def compute_ged(g1, g2, options): | |||||
return dis, pi_forward, pi_backward | return dis, pi_forward, pi_backward | ||||
def compute_geds(graphs, options={}, parallel=False): | |||||
def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): | |||||
# initialize ged env. | # initialize ged env. | ||||
ged_env = gedlibpy.GEDEnv() | ged_env = gedlibpy.GEDEnv() | ||||
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants']) | ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants']) | ||||
@@ -54,6 +54,8 @@ def compute_geds(graphs, options={}, parallel=False): | |||||
ged_env.add_nx_graph(g, '') | ged_env.add_nx_graph(g, '') | ||||
listID = ged_env.get_all_graph_ids() | listID = ged_env.get_all_graph_ids() | ||||
ged_env.init() | ged_env.init() | ||||
if parallel: | |||||
options['threads'] = 1 | |||||
ged_env.set_method(options['method'], ged_options_to_string(options)) | ged_env.set_method(options['method'], ged_options_to_string(options)) | ||||
ged_env.init_method() | ged_env.init_method() | ||||
@@ -77,10 +79,13 @@ def compute_geds(graphs, options={}, parallel=False): | |||||
G_graphs = graphs_toshare | G_graphs = graphs_toshare | ||||
G_ged_env = ged_env_toshare | G_ged_env = ged_env_toshare | ||||
G_listID = listID_toshare | G_listID = listID_toshare | ||||
do_partial = partial(_wrapper_compute_ged_parallel, neo_options) | |||||
do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort) | |||||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) | pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) | ||||
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | |||||
if verbose: | |||||
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | |||||
desc='computing GEDs', file=sys.stdout) | desc='computing GEDs', file=sys.stdout) | ||||
else: | |||||
iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||||
# iterator = pool.imap_unordered(do_partial, itr, chunksize) | # iterator = pool.imap_unordered(do_partial, itr, chunksize) | ||||
for i, j, dis, n_eo_tmp in iterator: | for i, j, dis, n_eo_tmp in iterator: | ||||
idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2) | idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2) | ||||
@@ -96,28 +101,38 @@ def compute_geds(graphs, options={}, parallel=False): | |||||
else: | else: | ||||
ged_vec = [] | ged_vec = [] | ||||
n_edit_operations = [] | n_edit_operations = [] | ||||
for i in tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout): | |||||
if verbose: | |||||
iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout) | |||||
else: | |||||
iterator = range(len(graphs)) | |||||
for i in iterator: | |||||
# for i in range(len(graphs)): | # for i in range(len(graphs)): | ||||
for j in range(i + 1, len(graphs)): | for j in range(i + 1, len(graphs)): | ||||
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j]) | |||||
if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort: | |||||
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j]) | |||||
else: | |||||
dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i]) | |||||
ged_vec.append(dis) | ged_vec.append(dis) | ||||
ged_mat[i][j] = dis | ged_mat[i][j] = dis | ||||
ged_mat[j][i] = dis | ged_mat[j][i] = dis | ||||
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) | |||||
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) | |||||
n_edit_operations.append(n_eo_tmp) | n_edit_operations.append(n_eo_tmp) | ||||
return ged_vec, ged_mat, n_edit_operations | return ged_vec, ged_mat, n_edit_operations | ||||
def _wrapper_compute_ged_parallel(options, itr): | |||||
def _wrapper_compute_ged_parallel(options, sort, itr): | |||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
dis, n_eo_tmp = _compute_ged_parallel(G_ged_env, G_listID[i], G_listID[j], G_graphs[i], G_graphs[j], options) | |||||
dis, n_eo_tmp = _compute_ged_parallel(G_ged_env, G_listID[i], G_listID[j], G_graphs[i], G_graphs[j], options, sort) | |||||
return i, j, dis, n_eo_tmp | return i, j, dis, n_eo_tmp | ||||
def _compute_ged_parallel(env, gid1, gid2, g1, g2, options): | |||||
dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2) | |||||
def _compute_ged_parallel(env, gid1, gid2, g1, g2, options, sort): | |||||
if nx.number_of_nodes(g1) <= nx.number_of_nodes(g2) or not sort: | |||||
dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2) | |||||
else: | |||||
dis, pi_backward, pi_forward = _compute_ged(env, gid2, gid1, g2, g1) | |||||
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0] | n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0] | ||||
return dis, n_eo_tmp | return dis, n_eo_tmp | ||||
@@ -112,6 +112,7 @@ cdef extern from "src/GedLibBind.hpp" namespace "pyged": | |||||
##CYTHON WRAPPER INTERFACES## | ##CYTHON WRAPPER INTERFACES## | ||||
############################# | ############################# | ||||
# import cython | |||||
import numpy as np | import numpy as np | ||||
import networkx as nx | import networkx as nx | ||||
from gklearn.ged.env import NodeMap | from gklearn.ged.env import NodeMap | ||||
@@ -177,14 +178,16 @@ def get_dummy_node() : | |||||
return getDummyNode() | return getDummyNode() | ||||
# @cython.auto_pickle(True) | |||||
cdef class GEDEnv: | cdef class GEDEnv: | ||||
"""Cython wrapper class for C++ class PyGEDEnv | """Cython wrapper class for C++ class PyGEDEnv | ||||
""" | """ | ||||
# cdef PyGEDEnv c_env # Hold a C++ instance which we're wrapping | |||||
# cdef PyGEDEnv c_env # Hold a C++ instance which we're wrapping | |||||
cdef PyGEDEnv* c_env # hold a pointer to the C++ instance which we're wrapping | cdef PyGEDEnv* c_env # hold a pointer to the C++ instance which we're wrapping | ||||
def __cinit__(self): | def __cinit__(self): | ||||
# self.c_env = PyGEDEnv() | |||||
self.c_env = new PyGEDEnv() | self.c_env = new PyGEDEnv() | ||||
@@ -192,6 +195,11 @@ cdef class GEDEnv: | |||||
del self.c_env | del self.c_env | ||||
# def __reduce__(self): | |||||
# # return GEDEnv, (self.c_env,) | |||||
# return GEDEnv, tuple() | |||||
def is_initialized(self) : | def is_initialized(self) : | ||||
""" | """ | ||||
Checks and returns if the computation environment is initialized or not. | Checks and returns if the computation environment is initialized or not. | ||||
@@ -67,6 +67,9 @@ class GraphKernel(object): | |||||
def normalize_gm(self, gram_matrix): | def normalize_gm(self, gram_matrix): | ||||
import warnings | |||||
warnings.warn('gklearn.kernels.graph_kernel.normalize_gm will be deprecated, use gklearn.utils.normalize_gram_matrix instead', DeprecationWarning) | |||||
diag = gram_matrix.diagonal().copy() | diag = gram_matrix.diagonal().copy() | ||||
for i in range(len(gram_matrix)): | for i in range(len(gram_matrix)): | ||||
for j in range(i, len(gram_matrix)): | for j in range(i, len(gram_matrix)): | ||||
@@ -12,3 +12,4 @@ __date__ = "March 2020" | |||||
from gklearn.preimage.preimage_generator import PreimageGenerator | from gklearn.preimage.preimage_generator import PreimageGenerator | ||||
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | ||||
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv |
@@ -0,0 +1,91 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Tue Apr 14 16:57:18 2020 | |||||
@author: ljia | |||||
""" | |||||
import pandas as pd | |||||
import numpy as np | |||||
import os | |||||
import math | |||||
def summarize_results_of_random_edit_costs(data_dir, ds_name, gkernel): | |||||
sod_sm_list = [] | |||||
sod_gm_list = [] | |||||
dis_k_sm_list = [] | |||||
dis_k_gm_list = [] | |||||
dis_k_min_gi = [] | |||||
time_total_list = [] | |||||
mge_dec_order_list = [] | |||||
mge_inc_order_list = [] | |||||
# get results from .csv. | |||||
file_name = data_dir + 'results_summary.' + ds_name + '.' + gkernel + '.csv' | |||||
try: | |||||
df = pd.read_csv(file_name) | |||||
except FileNotFoundError: | |||||
return | |||||
for index, row in df.iterrows(): | |||||
if row['target'] == 'all' and row['fit method'] == 'random': | |||||
if not math.isnan(float(row['SOD SM'])): | |||||
sod_sm_list.append(float(row['SOD SM'])) | |||||
if not math.isnan(float(row['SOD GM'])): | |||||
sod_gm_list.append(float(row['SOD GM'])) | |||||
if not math.isnan(float(row['dis_k SM'])): | |||||
dis_k_sm_list.append(float(row['dis_k SM'])) | |||||
if not math.isnan(float(row['dis_k GM'])): | |||||
dis_k_gm_list.append(float(row['dis_k GM'])) | |||||
if not math.isnan(float(row['min dis_k gi'])): | |||||
dis_k_min_gi.append(float(row['min dis_k gi'])) | |||||
if not math.isnan(float(row['time total'])): | |||||
time_total_list.append(float(row['time total'])) | |||||
if 'mge num decrease order' in row: | |||||
mge_dec_order_list.append(int(row['mge num decrease order'])) | |||||
if 'mge num increase order' in row: | |||||
mge_inc_order_list.append(int(row['mge num increase order'])) | |||||
# return if no results. | |||||
if len(sod_sm_list) == 0: | |||||
return | |||||
# construct output results. | |||||
op = {} | |||||
op['measure'] = ['max', 'min', 'mean'] | |||||
op['SOD SM'] = [np.max(sod_sm_list), np.min(sod_sm_list), np.mean(sod_sm_list)] | |||||
op['SOD GM'] = [np.max(sod_gm_list), np.min(sod_gm_list), np.mean(sod_gm_list)] | |||||
op['dis_k SM'] = [np.max(dis_k_sm_list), np.min(dis_k_sm_list), np.mean(dis_k_sm_list)] | |||||
op['dis_k GM'] = [np.max(dis_k_gm_list), np.min(dis_k_gm_list), np.mean(dis_k_gm_list)] | |||||
op['min dis_k gi'] = [np.max(dis_k_min_gi), np.min(dis_k_min_gi), np.mean(dis_k_min_gi)] | |||||
op['time total'] = [np.max(time_total_list), np.min(time_total_list), np.mean(time_total_list)] | |||||
if len(mge_dec_order_list) > 0: | |||||
op['mge num decrease order'] = [np.max(mge_dec_order_list), np.min(mge_dec_order_list), np.mean(mge_dec_order_list)] | |||||
if len(mge_inc_order_list) > 0: | |||||
op['mge num increase order'] = [np.max(mge_inc_order_list), np.min(mge_inc_order_list), np.mean(mge_inc_order_list)] | |||||
df = pd.DataFrame(data=op) | |||||
# write results to .csv | |||||
df.to_csv(data_dir + 'summary_for_random_edit_costs.csv', index=False, header=True) | |||||
def compute_for_all_experiments(data_dir): | |||||
dir_list = [i for i in os.listdir(data_dir) if os.path.isdir(data_dir + i)] | |||||
for dir_name in dir_list: | |||||
sp_tmp = dir_name.split('.') | |||||
ds_name = sp_tmp[0].strip('[error]') | |||||
gkernel = sp_tmp[1] | |||||
summarize_results_of_random_edit_costs(data_dir + dir_name + '/', | |||||
ds_name, gkernel) | |||||
if os.path.exists(data_dir + dir_name + '/update_order/'): | |||||
summarize_results_of_random_edit_costs(data_dir + dir_name + '/update_order/', | |||||
ds_name, gkernel) | |||||
if __name__ == '__main__': | |||||
# data_dir = '../results/xp_median_preimage.update_order/' | |||||
root_dir_tnz = '../../results/CRIANN/xp_median_preimage.init10/' | |||||
root_dir_ntnz = '../../results/CRIANN/xp_median_preimage.init10.no_triangle_rule/' | |||||
root_dir_tz = '../../results/CRIANN/xp_median_preimage.init10.triangle_rule.allow_zeros/' | |||||
root_dir_ntz = '../../results/CRIANN/xp_median_preimage.init10.no_triangle_rule.allow_zeros/' | |||||
data_dirs = [root_dir_tnz, root_dir_ntnz, root_dir_tz, root_dir_ntz] | |||||
for data_dir in data_dirs: | |||||
compute_for_all_experiments(data_dir) |
@@ -0,0 +1,228 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Apr 30 10:16:33 2020 | |||||
@author: ljia | |||||
""" | |||||
import pandas as pd | |||||
import numpy as np | |||||
import os | |||||
DS_SYMB = ['MUTAG', 'Monoterpenoides', 'MAO_symb'] | |||||
DS_NON_SYMB = ['Letter-high', 'Letter-med', 'Letter-low', 'COIL-RAG', 'PAH'] | |||||
DS_UNLABELED = ['PAH_unlabeled'] | |||||
def rounder(x, decimals): | |||||
x_strs = str(x).split('.') | |||||
if len(x_strs) == 2: | |||||
before = x_strs[0] | |||||
after = x_strs[1] | |||||
if len(after) > decimals: | |||||
if int(after[decimals]) >= 5: | |||||
after0s = '' | |||||
for c in after: | |||||
if c == '0': | |||||
after0s += '0' | |||||
elif c != '0': | |||||
break | |||||
after = after0s + str(int(after[0:decimals]) + 1)[-decimals:] | |||||
else: | |||||
after = after[0:decimals] | |||||
elif len(after) < decimals: | |||||
after += '0' * (decimals - len(after)) | |||||
return before + '.' + after | |||||
elif len(x_strs) == 1: | |||||
return x_strs[0] | |||||
def replace_nth(string, sub, wanted, n): | |||||
import re | |||||
where = [m.start() for m in re.finditer(sub, string)][n-1] | |||||
before = string[:where] | |||||
after = string[where:] | |||||
after = after.replace(sub, wanted, 1) | |||||
newString = before + after | |||||
return newString | |||||
def df_to_latex_table(df): | |||||
ltx = df.to_latex(index=True, escape=False, multirow=True) | |||||
# modify middle lines. | |||||
ltx = ltx.replace('\\cline{1-9}\n\\cline{2-9}', '\\toprule') | |||||
ltx = ltx.replace('\\cline{2-9}', '\\cmidrule(l){2-9}') | |||||
# modify first row. | |||||
i_start = ltx.find('\n\\toprule\n') | |||||
i_end = ltx.find('\\\\\n\\midrule\n') | |||||
ltx = ltx.replace(ltx[i_start:i_end+12], '\n\\toprule\nDatasets & Graph Kernels & Algorithms & $d_\\mathcal{F}$ SM & $d_\\mathcal{F}$ SM (UO) & $d_\\mathcal{F}$ GM & $d_\\mathcal{F}$ GM (UO) & Runtime & Runtime (UO) \\\\\n\\midrule\n', 1) | |||||
# add row numbers. | |||||
ltx = ltx.replace('lllllllll', 'lllllllll|@{\\makebox[2em][r]{\\textit{\\rownumber\\space}}}', 1) | |||||
ltx = replace_nth(ltx, '\\\\\n', '\\gdef\\rownumber{\\stepcounter{magicrownumbers}\\arabic{magicrownumbers}} \\\\\n', 1) | |||||
return ltx | |||||
def beautify_df(df): | |||||
df = df.sort_values(by=['Datasets', 'Graph Kernels']) | |||||
df = df.set_index(['Datasets', 'Graph Kernels', 'Algorithms']) | |||||
# index = pd.MultiIndex.from_frame(df[['Datasets', 'Graph Kernels', 'Algorithms']]) | |||||
# bold the best results. | |||||
for ds in df.index.get_level_values('Datasets').unique(): | |||||
for gk in df.loc[ds].index.get_level_values('Graph Kernels').unique(): | |||||
min_val = np.inf | |||||
min_indices = [] | |||||
min_labels = [] | |||||
for index, row in df.loc[(ds, gk)].iterrows(): | |||||
for label in ['$d_\mathcal{F}$ SM', '$d_\mathcal{F}$ GM', '$d_\mathcal{F}$ GM (UO)']: | |||||
value = row[label] | |||||
if value != '-': | |||||
value = float(value.strip('/same')) | |||||
if value < min_val: | |||||
min_val = value | |||||
min_indices = [index] | |||||
min_labels = [label] | |||||
elif value == min_val: | |||||
min_indices.append(index) | |||||
min_labels.append(label) | |||||
for idx, index in enumerate(min_indices): | |||||
df.loc[(ds, gk, index), min_labels[idx]] = '\\textbf{' + df.loc[(ds, gk, index), min_labels[idx]] + '}' | |||||
return df | |||||
def get_results(data_dir, ds_name, gkernel): | |||||
# get results from .csv. | |||||
file_name = data_dir + 'results_summary.' + ds_name + '.' + gkernel + '.csv' | |||||
try: | |||||
df_summary = pd.read_csv(file_name) | |||||
except FileNotFoundError: | |||||
return None | |||||
df_results = pd.DataFrame(index=None, columns=['d_F SM', 'd_F GM', 'runtime']) | |||||
for index, row in df_summary.iterrows(): | |||||
if row['target'] == 'all' and row['fit method'] == 'k-graphs': | |||||
df_results.loc['From median set'] = ['-', rounder(row['min dis_k gi'], 3), '-'] | |||||
if_uo = (int(row['mge num decrease order']) > 0 or int(row['mge num increase order']) > 0) | |||||
df_results.loc['Optimized'] = [rounder(row['dis_k SM'], 3), | |||||
rounder(row['dis_k GM'], 3) if if_uo else (rounder(row['dis_k GM'], 3) + '/same'), | |||||
rounder(row['time total'], 2)] | |||||
if row['target'] == 'all' and row['fit method'] == 'expert': | |||||
if_uo = (int(row['mge num decrease order']) > 0 or int(row['mge num increase order']) > 0) | |||||
df_results.loc['IAM: expert costs'] = [rounder(row['dis_k SM'], 3), | |||||
rounder(row['dis_k GM'], 3) if if_uo else (rounder(row['dis_k GM'], 3) + '/same'), | |||||
rounder(row['time total'], 2)] | |||||
# get results from random summary .csv. | |||||
random_fini = True | |||||
file_name = data_dir + 'summary_for_random_edit_costs.csv' | |||||
try: | |||||
df_random = pd.read_csv(file_name) | |||||
except FileNotFoundError: | |||||
random_fini = False | |||||
if random_fini: | |||||
for index, row in df_random.iterrows(): | |||||
if row['measure'] == 'mean': | |||||
if_uo = (float(row['mge num decrease order']) > 0 or float(row['mge num increase order']) > 0) | |||||
df_results.loc['IAM: random costs'] = [rounder(row['dis_k SM'], 3), | |||||
rounder(row['dis_k GM'], 3) if if_uo else (rounder(row['dis_k GM'], 3) + '/same'), | |||||
rounder(row['time total'], 2)] | |||||
# sort index. | |||||
df_results = df_results.reindex([item for item in ['From median set', 'IAM: random costs', 'IAM: expert costs', 'Optimized'] if item in df_results.index]) | |||||
return df_results | |||||
def get_results_of_one_xp(data_dir, ds_name, gkernel): | |||||
df_results = pd.DataFrame() | |||||
df_tmp_uo = None | |||||
if not os.path.isfile(data_dir + 'update_order/error.txt'): | |||||
df_tmp_uo = get_results(data_dir + 'update_order/', ds_name, gkernel) | |||||
df_tmp = None | |||||
if not os.path.isfile(data_dir + 'error.txt'): | |||||
df_tmp = get_results(data_dir, ds_name, gkernel) | |||||
if (df_tmp_uo is not None and not df_tmp_uo.empty) or (df_tmp is not None and not df_tmp.empty): | |||||
df_results = pd.DataFrame(index=['From median set', 'IAM: random costs', 'IAM: expert costs', 'Optimized'], columns=['$d_\mathcal{F}$ SM', '$d_\mathcal{F}$ SM (UO)', '$d_\mathcal{F}$ GM', '$d_\mathcal{F}$ GM (UO)', 'Runtime', 'Runtime (UO)']) | |||||
if df_tmp_uo is not None and not df_tmp_uo.empty: | |||||
for index, row in df_tmp_uo.iterrows(): | |||||
for algo in df_results.index: | |||||
if index == algo: | |||||
df_results.at[algo, '$d_\mathcal{F}$ SM (UO)'] = row['d_F SM'] | |||||
df_results.at[algo, '$d_\mathcal{F}$ GM (UO)'] = row['d_F GM'] | |||||
df_results.at[algo, 'Runtime (UO)'] = row['runtime'] | |||||
if df_tmp is not None and not df_tmp.empty: | |||||
for index, row in df_tmp.iterrows(): | |||||
for algo in df_results.index: | |||||
if index == algo: | |||||
df_results.at[algo, '$d_\mathcal{F}$ SM'] = row['d_F SM'] | |||||
df_results.at[algo, '$d_\mathcal{F}$ GM'] = row['d_F GM'].strip('/same') | |||||
df_results.at[algo, 'Runtime'] = row['runtime'] | |||||
df_results = df_results.dropna(axis=0, how='all') | |||||
df_results = df_results.fillna(value='-') | |||||
df_results = df_results.reset_index().rename(columns={'index': 'Algorithms'}) | |||||
return df_results | |||||
def get_results_for_all_experiments(root_dir): | |||||
columns=['Datasets', 'Graph Kernels', 'Algorithms', '$d_\mathcal{F}$ SM', '$d_\mathcal{F}$ SM (UO)', '$d_\mathcal{F}$ GM', '$d_\mathcal{F}$ GM (UO)', 'Runtime', 'Runtime (UO)'] | |||||
df_symb = pd.DataFrame(columns=columns) | |||||
df_nonsymb = pd.DataFrame(columns=columns) | |||||
df_unlabeled = pd.DataFrame(columns=columns) | |||||
dir_list = [i for i in os.listdir(root_dir) if os.path.isdir(root_dir + i)] | |||||
for dir_name in dir_list: | |||||
sp_tmp = dir_name.split('.') | |||||
gkernel = sp_tmp[1] | |||||
ds_name = sp_tmp[0].strip('[error]') | |||||
suffix = '' | |||||
if sp_tmp[-1] == 'unlabeled': | |||||
suffix = '_unlabeled' | |||||
elif sp_tmp[-1] == 'symb': | |||||
suffix = '_symb' | |||||
df_results = get_results_of_one_xp(root_dir + dir_name + '/', ds_name, gkernel) | |||||
if not df_results.empty: | |||||
ds_name += suffix | |||||
if ds_name in DS_SYMB: | |||||
for index, row in df_results.iterrows(): | |||||
df_symb.loc[len(df_symb)] = [ds_name.replace('_', '\_'), gkernel] + row.tolist() | |||||
elif ds_name in DS_NON_SYMB: | |||||
for index, row in df_results.iterrows(): | |||||
df_nonsymb.loc[len(df_nonsymb)] = [ds_name.replace('_', '\_'), gkernel] + row.tolist() | |||||
elif ds_name in DS_UNLABELED: | |||||
for index, row in df_results.iterrows(): | |||||
df_unlabeled.loc[len(df_unlabeled)] = [ds_name.replace('_', '\_'), gkernel] + row.tolist() | |||||
else: | |||||
raise Exception('dataset' + ds_name + 'is not pre-defined.') | |||||
# sort. | |||||
df_symb = beautify_df(df_symb) | |||||
df_nonsymb = beautify_df(df_nonsymb) | |||||
df_unlabeled = beautify_df(df_unlabeled) | |||||
# convert dfs to latex strings. | |||||
ltx_symb = df_to_latex_table(df_symb) | |||||
ltx_nonsymb = df_to_latex_table(df_nonsymb) | |||||
ltx_unlabeled = df_to_latex_table(df_unlabeled) | |||||
return ltx_symb, ltx_nonsymb, ltx_unlabeled | |||||
if __name__ == '__main__': | |||||
# root_dir = '../results/xp_median_preimage.init20/' | |||||
root_dir = '../../results/CRIANN/xp_median_preimage.init10/' | |||||
ltx_symb, ltx_nonsymb, ltx_unlabeled = get_results_for_all_experiments(root_dir) |
@@ -0,0 +1,418 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Tue May 12 12:52:15 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import csv | |||||
import os | |||||
import os.path | |||||
from gklearn.utils import Dataset | |||||
from sklearn.model_selection import ShuffleSplit | |||||
from gklearn.preimage import MedianPreimageGenerator | |||||
from gklearn.utils import normalize_gram_matrix, compute_distance_matrix | |||||
from gklearn.preimage.utils import get_same_item_indices | |||||
from gklearn.utils.knn import knn_classification | |||||
from gklearn.preimage.utils import compute_k_dis | |||||
def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_options, ged_options, mge_options, save_results=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||||
# 1. get dataset. | |||||
print('1. getting dataset...') | |||||
dataset_all = Dataset() | |||||
dataset_all.load_predefined_dataset(ds_name) | |||||
dataset_all.trim_dataset(edge_required=edge_required) | |||||
if irrelevant_labels is not None: | |||||
dataset_all.remove_labels(**irrelevant_labels) | |||||
if cut_range is not None: | |||||
dataset_all.cut_graphs(cut_range) | |||||
if save_results: | |||||
# create result files. | |||||
print('creating output files...') | |||||
fn_output_detail, fn_output_summary = __init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
else: | |||||
fn_output_detail, fn_output_summary = None, None | |||||
# 2. compute/load Gram matrix a priori. | |||||
print('2. computing/loading Gram matrix...') | |||||
gram_matrix_unnorm, time_precompute_gm = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all) | |||||
# 3. perform k-nn CV. | |||||
print('3. performing k-nn CV...') | |||||
if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random': | |||||
__kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
elif train_examples == 'best-dataset': | |||||
__kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
elif train_examples == 'trainset': | |||||
__kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
print('\ncomplete.\n') | |||||
def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
Gn = dataset_all.graphs | |||||
y_all = dataset_all.targets | |||||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | |||||
# get shuffles. | |||||
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) | |||||
accuracies = [[], [], []] | |||||
for trial in range(len(train_indices)): | |||||
print('\ntrial =', trial) | |||||
train_index = train_indices[trial] | |||||
test_index = test_indices[trial] | |||||
G_app = [Gn[i] for i in train_index] | |||||
G_test = [Gn[i] for i in test_index] | |||||
y_test = [y_all[i] for i in test_index] | |||||
gm_unnorm_trial = gram_matrix_unnorm[train_index,:][:,train_index].copy() | |||||
# compute pre-images for each class. | |||||
medians = [[], [], []] | |||||
train_nums_tmp = [0] + train_nums | |||||
print('\ncomputing pre-image for each class...\n') | |||||
for i_class in range(len(train_nums_tmp) - 1): | |||||
print(i_class + 1, 'of', len(train_nums_tmp) - 1, 'classes:') | |||||
i_start = int(np.sum(train_nums_tmp[0:i_class + 1])) | |||||
i_end = i_start + train_nums_tmp[i_class + 1] | |||||
median_set = G_app[i_start:i_end] | |||||
dataset = dataset_all.copy() | |||||
dataset.load_graphs([g.copy() for g in median_set], targets=None) | |||||
mge_options['update_order'] = True | |||||
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | |||||
mpg_options['runtime_precompute_gm'] = 0 | |||||
set_median, gen_median_uo = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||||
mge_options['update_order'] = False | |||||
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | |||||
mpg_options['runtime_precompute_gm'] = 0 | |||||
_, gen_median = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||||
medians[0].append(set_median) | |||||
medians[1].append(gen_median) | |||||
medians[2].append(gen_median_uo) | |||||
# for each set of medians. | |||||
print('\nperforming k-nn...') | |||||
for i_app, G_app in enumerate(medians): | |||||
# compute dis_mat between medians. | |||||
dataset = dataset_all.copy() | |||||
dataset.load_graphs([g.copy() for g in G_app], targets=None) | |||||
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||||
# compute the entire Gram matrix. | |||||
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||||
kernels_to_medians = [] | |||||
for g in G_app: | |||||
kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | |||||
kernels_to_medians.append(kernels_to_median) | |||||
kernels_to_medians = np.array(kernels_to_medians) | |||||
gm_all = np.concatenate((gm_app_unnorm, kernels_to_medians), axis=1) | |||||
gm_all = np.concatenate((gm_all, np.concatenate((kernels_to_medians.T, gram_matrix_unnorm[test_index,:][:,test_index].copy()), axis=1)), axis=0) | |||||
gm_all = normalize_gram_matrix(gm_all.copy()) | |||||
dis_mat, _, _, _ = compute_distance_matrix(gm_all) | |||||
N = len(G_app) | |||||
d_app = dis_mat[range(N),:][:,range(N)].copy() | |||||
d_test = np.zeros((N, len(test_index))) | |||||
for i in range(N): | |||||
for j in range(len(test_index)): | |||||
d_test[i, j] = dis_mat[i, j] | |||||
accuracies[i_app].append(knn_classification(d_app, d_test, y_app, y_test, n_neighbors, verbose=True, text=train_examples)) | |||||
# write result detail. | |||||
if save_results: | |||||
f_detail = open(dir_save + fn_output_detail, 'a') | |||||
print('writing results to files...') | |||||
for i, median_type in enumerate(['set-median', 'gen median', 'gen median uo']): | |||||
csv.writer(f_detail).writerow([ds_name, kernel_options['name'], | |||||
train_examples + ': ' + median_type, trial, | |||||
knn_options['n_neighbors'], | |||||
len(gm_all), knn_options['test_size'], | |||||
accuracies[i][-1][0], accuracies[i][-1][1]]) | |||||
f_detail.close() | |||||
results = {} | |||||
results['ave_perf_train'] = [np.mean([i[0] for i in j], axis=0) for j in accuracies] | |||||
results['std_perf_train'] = [np.std([i[0] for i in j], axis=0, ddof=1) for j in accuracies] | |||||
results['ave_perf_test'] = [np.mean([i[1] for i in j], axis=0) for j in accuracies] | |||||
results['std_perf_test'] = [np.std([i[1] for i in j], axis=0, ddof=1) for j in accuracies] | |||||
# write result summary for each letter. | |||||
if save_results: | |||||
f_summary = open(dir_save + fn_output_summary, 'a') | |||||
for i, median_type in enumerate(['set-median', 'gen median', 'gen median uo']): | |||||
csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||||
train_examples + ': ' + median_type, | |||||
knn_options['n_neighbors'], | |||||
knn_options['test_size'], results['ave_perf_train'][i], | |||||
results['ave_perf_test'][i], results['std_perf_train'][i], | |||||
results['std_perf_test'][i], time_precompute_gm]) | |||||
f_summary.close() | |||||
def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
Gn = dataset_all.graphs | |||||
y_all = dataset_all.targets | |||||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | |||||
# get shuffles. | |||||
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) | |||||
accuracies = [] | |||||
for trial in range(len(train_indices)): | |||||
print('\ntrial =', trial) | |||||
train_index = train_indices[trial] | |||||
test_index = test_indices[trial] | |||||
G_app = [Gn[i] for i in train_index] | |||||
G_test = [Gn[i] for i in test_index] | |||||
y_test = [y_all[i] for i in test_index] | |||||
gm_unnorm_trial = gram_matrix_unnorm[train_index,:][:,train_index].copy() | |||||
# get best graph from trainset according to distance in kernel space for each class. | |||||
best_graphs = [] | |||||
train_nums_tmp = [0] + train_nums | |||||
print('\ngetting best graph from trainset for each class...') | |||||
for i_class in range(len(train_nums_tmp) - 1): | |||||
print(i_class + 1, 'of', len(train_nums_tmp) - 1, 'classes.') | |||||
i_start = int(np.sum(train_nums_tmp[0:i_class + 1])) | |||||
i_end = i_start + train_nums_tmp[i_class + 1] | |||||
G_class = G_app[i_start:i_end] | |||||
gm_unnorm_class = gm_unnorm_trial[i_start:i_end,i_start:i_end] | |||||
gm_class = normalize_gram_matrix(gm_unnorm_class.copy()) | |||||
k_dis_list = [] | |||||
for idx in range(len(G_class)): | |||||
k_dis_list.append(compute_k_dis(idx, range(0, len(G_class)), [1 / len(G_class)] * len(G_class), gm_class, withterm3=False)) | |||||
idx_k_dis_min = np.argmin(k_dis_list) | |||||
best_graphs.append(G_class[idx_k_dis_min].copy()) | |||||
# perform k-nn. | |||||
print('\nperforming k-nn...') | |||||
# compute dis_mat between medians. | |||||
dataset = dataset_all.copy() | |||||
dataset.load_graphs([g.copy() for g in best_graphs], targets=None) | |||||
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||||
# compute the entire Gram matrix. | |||||
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||||
kernels_to_best_graphs = [] | |||||
for g in best_graphs: | |||||
kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | |||||
kernels_to_best_graphs.append(kernels_to_best_graph) | |||||
kernels_to_best_graphs = np.array(kernels_to_best_graphs) | |||||
gm_all = np.concatenate((gm_app_unnorm, kernels_to_best_graphs), axis=1) | |||||
gm_all = np.concatenate((gm_all, np.concatenate((kernels_to_best_graphs.T, gram_matrix_unnorm[test_index,:][:,test_index].copy()), axis=1)), axis=0) | |||||
gm_all = normalize_gram_matrix(gm_all.copy()) | |||||
dis_mat, _, _, _ = compute_distance_matrix(gm_all) | |||||
N = len(best_graphs) | |||||
d_app = dis_mat[range(N),:][:,range(N)].copy() | |||||
d_test = np.zeros((N, len(test_index))) | |||||
for i in range(N): | |||||
for j in range(len(test_index)): | |||||
d_test[i, j] = dis_mat[i, j] | |||||
accuracies.append(knn_classification(d_app, d_test, y_app, y_test, n_neighbors, verbose=True, text=train_examples)) | |||||
# write result detail. | |||||
if save_results: | |||||
f_detail = open(dir_save + fn_output_detail, 'a') | |||||
print('writing results to files...') | |||||
csv.writer(f_detail).writerow([ds_name, kernel_options['name'], | |||||
train_examples, trial, | |||||
knn_options['n_neighbors'], | |||||
len(gm_all), knn_options['test_size'], | |||||
accuracies[-1][0], accuracies[-1][1]]) | |||||
f_detail.close() | |||||
results = {} | |||||
results['ave_perf_train'] = np.mean([i[0] for i in accuracies], axis=0) | |||||
results['std_perf_train'] = np.std([i[0] for i in accuracies], axis=0, ddof=1) | |||||
results['ave_perf_test'] = np.mean([i[1] for i in accuracies], axis=0) | |||||
results['std_perf_test'] = np.std([i[1] for i in accuracies], axis=0, ddof=1) | |||||
# write result summary for each letter. | |||||
if save_results: | |||||
f_summary = open(dir_save + fn_output_summary, 'a') | |||||
csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||||
train_examples, | |||||
knn_options['n_neighbors'], | |||||
knn_options['test_size'], results['ave_perf_train'], | |||||
results['ave_perf_test'], results['std_perf_train'], | |||||
results['std_perf_test'], time_precompute_gm]) | |||||
f_summary.close() | |||||
def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
y_all = dataset_all.targets | |||||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | |||||
# compute distance matrix. | |||||
gram_matrix = normalize_gram_matrix(gram_matrix_unnorm.copy()) | |||||
dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) | |||||
# get shuffles. | |||||
train_indices, test_indices, _, _ = __get_shuffles(y_all, n_splits, test_size) | |||||
accuracies = [] | |||||
for trial in range(len(train_indices)): | |||||
print('\ntrial =', trial) | |||||
train_index = train_indices[trial] | |||||
test_index = test_indices[trial] | |||||
y_app = [y_all[i] for i in train_index] | |||||
y_test = [y_all[i] for i in test_index] | |||||
N = len(train_index) | |||||
d_app = dis_mat[train_index,:][:,train_index].copy() | |||||
d_test = np.zeros((N, len(test_index))) | |||||
for i in range(N): | |||||
for j in range(len(test_index)): | |||||
d_test[i, j] = dis_mat[train_index[i], test_index[j]] | |||||
accuracies.append(knn_classification(d_app, d_test, y_app, y_test, n_neighbors, verbose=True, text=train_examples)) | |||||
# write result detail. | |||||
if save_results: | |||||
print('writing results to files...') | |||||
f_detail = open(dir_save + fn_output_detail, 'a') | |||||
csv.writer(f_detail).writerow([ds_name, kernel_options['name'], | |||||
train_examples, trial, knn_options['n_neighbors'], | |||||
len(gram_matrix), knn_options['test_size'], | |||||
accuracies[-1][0], accuracies[-1][1]]) | |||||
f_detail.close() | |||||
results = {} | |||||
results['ave_perf_train'] = np.mean([i[0] for i in accuracies], axis=0) | |||||
results['std_perf_train'] = np.std([i[0] for i in accuracies], axis=0, ddof=1) | |||||
results['ave_perf_test'] = np.mean([i[1] for i in accuracies], axis=0) | |||||
results['std_perf_test'] = np.std([i[1] for i in accuracies], axis=0, ddof=1) | |||||
# write result summary for each letter. | |||||
if save_results: | |||||
f_summary = open(dir_save + fn_output_summary, 'a') | |||||
csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||||
train_examples, knn_options['n_neighbors'], | |||||
knn_options['test_size'], results['ave_perf_train'], | |||||
results['ave_perf_test'], results['std_perf_train'], | |||||
results['std_perf_test'], time_precompute_gm]) | |||||
f_summary.close() | |||||
def __get_shuffles(y_all, n_splits, test_size): | |||||
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | |||||
train_indices = [[] for _ in range(n_splits)] | |||||
test_indices = [[] for _ in range(n_splits)] | |||||
idx_targets = get_same_item_indices(y_all) | |||||
train_nums = [] | |||||
keys = [] | |||||
for key, item in idx_targets.items(): | |||||
i = 0 | |||||
for train_i, test_i in rs.split(item): # @todo: careful when parallel. | |||||
train_indices[i] += [item[idx] for idx in train_i] | |||||
test_indices[i] += [item[idx] for idx in test_i] | |||||
i += 1 | |||||
train_nums.append(len(train_i)) | |||||
keys.append(key) | |||||
return train_indices, test_indices, train_nums, keys | |||||
def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options): | |||||
mpg = MedianPreimageGenerator() | |||||
mpg.dataset = dataset.copy() | |||||
mpg.set_options(**mpg_options.copy()) | |||||
mpg.kernel_options = kernel_options.copy() | |||||
mpg.ged_options = ged_options.copy() | |||||
mpg.mge_options = mge_options.copy() | |||||
mpg.run() | |||||
return mpg.set_median, mpg.gen_median | |||||
def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||||
if load_gm == 'auto': | |||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||||
if gmfile_exist: | |||||
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] | |||||
time_precompute_gm = float(gmfile['run_time']) | |||||
else: | |||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | |||||
elif not load_gm: | |||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | |||||
else: | |||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
gmfile = np.load(gm_fname, allow_pickle=True) | |||||
gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] | |||||
time_precompute_gm = float(gmfile['run_time']) | |||||
return gram_matrix_unnorm, time_precompute_gm | |||||
def __get_graph_kernel(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | |||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
kernel_options=kernel_options) | |||||
return graph_kernel | |||||
def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | |||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
kernel_options=kernel_options) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | |||||
gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm | |||||
return gram_matrix_unnorm, run_time | |||||
def __init_output_file_knn(ds_name, gkernel, fit_method, dir_output): | |||||
if not os.path.exists(dir_output): | |||||
os.makedirs(dir_output) | |||||
fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv' | |||||
f_detail = open(dir_output + fn_output_detail, 'a') | |||||
csv.writer(f_detail).writerow(['dataset', 'graph kernel', | |||||
'train examples', 'trial', 'num neighbors', 'num graphs', 'test size', | |||||
'perf train', 'perf test']) | |||||
f_detail.close() | |||||
fn_output_summary = 'results_summary_knn.' + ds_name + '.' + gkernel + '.csv' | |||||
f_summary = open(dir_output + fn_output_summary, 'a') | |||||
csv.writer(f_summary).writerow(['dataset', 'graph kernel', | |||||
'train examples', 'num neighbors', 'test size', | |||||
'ave perf train', 'ave perf test', | |||||
'std perf train', 'std perf test', 'time precompute gm']) | |||||
f_summary.close() | |||||
return fn_output_detail, fn_output_summary |
@@ -39,6 +39,8 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
self.__max_itrs_without_update = 3 | self.__max_itrs_without_update = 3 | ||||
self.__epsilon_residual = 0.01 | self.__epsilon_residual = 0.01 | ||||
self.__epsilon_ec = 0.1 | self.__epsilon_ec = 0.1 | ||||
self.__allow_zeros = False | |||||
self.__triangle_rule = True | |||||
# values to compute. | # values to compute. | ||||
self.__runtime_optimize_ec = None | self.__runtime_optimize_ec = None | ||||
self.__runtime_generate_preimage = None | self.__runtime_generate_preimage = None | ||||
@@ -79,6 +81,8 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) | self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) | ||||
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | ||||
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | ||||
self.__allow_zeros = kwargs.get('allow_zeros', False) | |||||
self.__triangle_rule = kwargs.get('triangle_rule', True) | |||||
def run(self): | def run(self): | ||||
@@ -277,7 +281,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
options['edge_labels'] = self._dataset.edge_labels | options['edge_labels'] = self._dataset.edge_labels | ||||
options['node_attrs'] = self._dataset.node_attrs | options['node_attrs'] = self._dataset.node_attrs | ||||
options['edge_attrs'] = self._dataset.edge_attrs | options['edge_attrs'] = self._dataset.edge_attrs | ||||
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | |||||
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) | |||||
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | ||||
time_list = [time.time() - time0] | time_list = [time.time() - time0] | ||||
edit_cost_list = [self.__init_ecc] | edit_cost_list = [self.__init_ecc] | ||||
@@ -319,7 +323,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
options['edge_labels'] = self._dataset.edge_labels | options['edge_labels'] = self._dataset.edge_labels | ||||
options['node_attrs'] = self._dataset.node_attrs | options['node_attrs'] = self._dataset.node_attrs | ||||
options['edge_attrs'] = self._dataset.edge_attrs | options['edge_attrs'] = self._dataset.edge_attrs | ||||
ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) | |||||
ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) | |||||
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) | residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) | ||||
time_list.append(time.time() - time0) | time_list.append(time.time() - time0) | ||||
edit_cost_list.append(self.__edit_cost_constants) | edit_cost_list.append(self.__edit_cost_constants) | ||||
@@ -382,7 +386,8 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
def __update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'): | def __update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'): | ||||
# if self.__ds_name == 'Letter-high': | # if self.__ds_name == 'Letter-high': | ||||
if self.__ged_options['edit_cost'] == 'LETTER': | |||||
if self.__ged_options['edit_cost'] == 'LETTER': | |||||
raise Exception('Cannot compute for cost "LETTER".') | |||||
pass | pass | ||||
# # method 1: set alpha automatically, just tune c_vir and c_eir by | # # method 1: set alpha automatically, just tune c_vir and c_eir by | ||||
# # LMS using cvxpy. | # # LMS using cvxpy. | ||||
@@ -438,7 +443,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
# # 1. if c_vi != c_vr, c_ei != c_er. | # # 1. if c_vi != c_vr, c_ei != c_er. | ||||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | # x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
# cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
## # 1.1 no constraints. | ## # 1.1 no constraints. | ||||
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | ## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | ||||
# # 1.2 c_vs <= c_vi + c_vr. | # # 1.2 c_vs <= c_vi + c_vr. | ||||
@@ -449,7 +454,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
## nb_cost_mat_new[:,0] += nb_cost_mat[:,1] | ## nb_cost_mat_new[:,0] += nb_cost_mat[:,1] | ||||
## nb_cost_mat_new[:,2] += nb_cost_mat[:,5] | ## nb_cost_mat_new[:,2] += nb_cost_mat[:,5] | ||||
## x = cp.Variable(nb_cost_mat_new.shape[1]) | ## x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
## cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
## cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
## # 2.1 no constraints. | ## # 2.1 no constraints. | ||||
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | ## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | ||||
### # 2.2 c_vs <= c_vi + c_vr. | ### # 2.2 c_vs <= c_vi + c_vr. | ||||
@@ -461,35 +466,37 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | ||||
# edit_costs_new = np.array(edit_costs_new) | # edit_costs_new = np.array(edit_costs_new) | ||||
# residual = np.sqrt(prob.value) | # residual = np.sqrt(prob.value) | ||||
if rw_constraints == 'inequality': | |||||
# c_vs <= c_vi + c_vr. | |||||
if not self.__triangle_rule and self.__allow_zeros: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
self.__execute_cvx(prob) | self.__execute_cvx(prob) | ||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif rw_constraints == '2constraints': | |||||
# c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er. | |||||
elif self.__triangle_rule and self.__allow_zeros: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0, | |||||
np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0, | |||||
np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
prob.solve() | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif rw_constraints == 'no-constraint': | |||||
# no constraint. | |||||
elif not self.__triangle_rule and not self.__allow_zeros: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
prob.solve() | prob.solve() | ||||
@@ -499,7 +506,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
# # c_vs <= c_vi + c_vr. | # # c_vs <= c_vi + c_vr. | ||||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | # x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
# cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | ||||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | ||||
# prob = cp.Problem(cp.Minimize(cost_fun), constraints) | # prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
@@ -508,15 +515,40 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | ||||
# edit_costs_new = np.array(edit_costs_new) | # edit_costs_new = np.array(edit_costs_new) | ||||
# residual = np.sqrt(prob.value) | # residual = np.sqrt(prob.value) | ||||
elif self.__triangle_rule and not self.__allow_zeros: | |||||
# c_vs <= c_vi + c_vr. | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif rw_constraints == '2constraints': # @todo: rearrange it later. | |||||
# c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er. | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0, | |||||
np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0, | |||||
np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
prob.solve() | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | ||||
is_n_attr = np.count_nonzero(nb_cost_mat[:,2]) | is_n_attr = np.count_nonzero(nb_cost_mat[:,2]) | ||||
is_e_attr = np.count_nonzero(nb_cost_mat[:,5]) | is_e_attr = np.count_nonzero(nb_cost_mat[:,5]) | ||||
if self.__ds_name == 'SYNTHETICnew': | |||||
if self.__ds_name == 'SYNTHETICnew': # @todo: rearrenge this later. | |||||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | # nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | ||||
nb_cost_mat_new = nb_cost_mat[:,[2,3,4]] | nb_cost_mat_new = nb_cost_mat[:,[2,3,4]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | ||||
# np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] | # np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] | ||||
# constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]] | # constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]] | ||||
@@ -529,12 +561,154 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
np.array([0.0]))) | np.array([0.0]))) | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif rw_constraints == 'inequality': | |||||
elif not self.__triangle_rule and self.__allow_zeros: | |||||
if is_n_attr and is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif is_n_attr and not is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||||
residual = np.sqrt(prob.value) | |||||
elif not is_n_attr and is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||||
residual = np.sqrt(prob.value) | |||||
else: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||||
x.value[2:], np.array([0.0]))) | |||||
residual = np.sqrt(prob.value) | |||||
elif self.__triangle_rule and self.__allow_zeros: | |||||
if is_n_attr and is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif is_n_attr and not is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||||
residual = np.sqrt(prob.value) | |||||
elif not is_n_attr and is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||||
residual = np.sqrt(prob.value) | |||||
else: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||||
x.value[2:], np.array([0.0]))) | |||||
residual = np.sqrt(prob.value) | |||||
elif not self.__triangle_rule and not self.__allow_zeros: | |||||
if is_n_attr and is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif is_n_attr and not is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||||
residual = np.sqrt(prob.value) | |||||
elif not is_n_attr and is_e_attr: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||||
residual = np.sqrt(prob.value) | |||||
else: | |||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | |||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||||
x.value[2:], np.array([0.0]))) | |||||
residual = np.sqrt(prob.value) | |||||
elif self.__triangle_rule and not self.__allow_zeros: | |||||
# c_vs <= c_vi + c_vr. | # c_vs <= c_vi + c_vr. | ||||
if is_n_attr and is_e_attr: | if is_n_attr and is_e_attr: | ||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | ||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | ||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | ||||
@@ -545,7 +719,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
elif is_n_attr and not is_e_attr: | elif is_n_attr and not is_e_attr: | ||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | ||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
@@ -555,7 +729,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
elif not is_n_attr and is_e_attr: | elif not is_n_attr and is_e_attr: | ||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | ||||
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
@@ -565,24 +739,61 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
else: | else: | ||||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | ||||
x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
self.__execute_cvx(prob) | self.__execute_cvx(prob) | ||||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | ||||
x.value[2:], np.array([0.0]))) | x.value[2:], np.array([0.0]))) | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. | elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. | ||||
x = cp.Variable(nb_cost_mat.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
if not self.__triangle_rule and self.__allow_zeros: | |||||
x = cp.Variable(nb_cost_mat.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif self.__triangle_rule and self.__allow_zeros: | |||||
x = cp.Variable(nb_cost_mat.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | |||||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif not self.__triangle_rule and not self.__allow_zeros: | |||||
x = cp.Variable(nb_cost_mat.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
elif self.__triangle_rule and not self.__allow_zeros: | |||||
x = cp.Variable(nb_cost_mat.shape[1]) | |||||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||||
self.__execute_cvx(prob) | |||||
edit_costs_new = x.value | |||||
residual = np.sqrt(prob.value) | |||||
else: | else: | ||||
raise Exception('The edit cost "', self.__ged_options['edit_cost'], '" is not supported for update progress.') | |||||
# # method 1: simple least square method. | # # method 1: simple least square method. | ||||
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | ||||
# rcond=None) | # rcond=None) | ||||
@@ -607,7 +818,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
# G = -1 * np.identity(nb_cost_mat.shape[1]) | # G = -1 * np.identity(nb_cost_mat.shape[1]) | ||||
# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | # h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | ||||
x = cp.Variable(nb_cost_mat.shape[1]) | x = cp.Variable(nb_cost_mat.shape[1]) | ||||
cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | ||||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | ||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | ||||
@@ -669,6 +880,7 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
options = self.__mge_options.copy() | options = self.__mge_options.copy() | ||||
if not 'seed' in options: | if not 'seed' in options: | ||||
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | ||||
options['parallel'] = self.__parallel | |||||
# Select the GED algorithm. | # Select the GED algorithm. | ||||
self.__mge.set_options(mge_options_to_string(options)) | self.__mge.set_options(mge_options_to_string(options)) | ||||
@@ -676,8 +888,11 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
edge_labels=self._dataset.edge_labels, | edge_labels=self._dataset.edge_labels, | ||||
node_attrs=self._dataset.node_attrs, | node_attrs=self._dataset.node_attrs, | ||||
edge_attrs=self._dataset.edge_attrs) | edge_attrs=self._dataset.edge_attrs) | ||||
self.__mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
self.__mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||||
ged_options = self.__ged_options.copy() | |||||
if self.__parallel: | |||||
ged_options['threads'] = 1 | |||||
self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options)) | |||||
self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options)) | |||||
# Run the estimator. | # Run the estimator. | ||||
self.__mge.run(graph_ids, set_median_id, gen_median_id) | self.__mge.run(graph_ids, set_median_id, gen_median_id) | ||||
@@ -0,0 +1,423 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Wen May 27 14:27:15 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import csv | |||||
import os | |||||
import os.path | |||||
from gklearn.utils import Dataset | |||||
from gklearn.preimage import MedianPreimageGenerator | |||||
from gklearn.utils import normalize_gram_matrix | |||||
from gklearn.utils import split_dataset_by_target | |||||
from gklearn.preimage.utils import compute_k_dis | |||||
from gklearn.utils.graphfiles import saveGXL | |||||
import networkx as nx | |||||
def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): | |||||
"""Remove the best graph from the median set w.r.t. distance in kernel space, and to see if it is possible to generate the removed graph using the graphs left in the median set. | |||||
""" | |||||
# 1. get dataset. | |||||
print('1. getting dataset...') | |||||
dataset_all = Dataset() | |||||
dataset_all.load_predefined_dataset(ds_name) | |||||
dataset_all.trim_dataset(edge_required=edge_required) | |||||
if irrelevant_labels is not None: | |||||
dataset_all.remove_labels(**irrelevant_labels) | |||||
if cut_range is not None: | |||||
dataset_all.cut_graphs(cut_range) | |||||
datasets = split_dataset_by_target(dataset_all) | |||||
if save_results: | |||||
# create result files. | |||||
print('creating output files...') | |||||
fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
else: | |||||
fn_output_detail, fn_output_summary = None, None | |||||
# 2. compute/load Gram matrix a priori. | |||||
print('2. computing/loading Gram matrix...') | |||||
gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets) | |||||
sod_sm_list = [] | |||||
sod_gm_list = [] | |||||
dis_k_sm_list = [] | |||||
dis_k_gm_list = [] | |||||
dis_k_gi_min_list = [] | |||||
time_optimize_ec_list = [] | |||||
time_generate_list = [] | |||||
time_total_list = [] | |||||
itrs_list = [] | |||||
converged_list = [] | |||||
num_updates_ecc_list = [] | |||||
mge_decrease_order_list = [] | |||||
mge_increase_order_list = [] | |||||
mge_converged_order_list = [] | |||||
nb_sod_sm2gm = [0, 0, 0] | |||||
nb_dis_k_sm2gm = [0, 0, 0] | |||||
nb_dis_k_gi2sm = [0, 0, 0] | |||||
nb_dis_k_gi2gm = [0, 0, 0] | |||||
dis_k_max_list = [] | |||||
dis_k_min_list = [] | |||||
dis_k_mean_list = [] | |||||
best_dis_list = [] | |||||
print('starting experiment for each class of target...') | |||||
idx_offset = 0 | |||||
for idx, dataset in enumerate(datasets): | |||||
target = dataset.targets[0] | |||||
print('\ntarget =', target, '\n') | |||||
# if target != 1: | |||||
# continue | |||||
num_graphs = len(dataset.graphs) | |||||
if num_graphs < 2: | |||||
print('\nnumber of graphs = ', num_graphs, ', skip.\n') | |||||
idx_offset += 1 | |||||
continue | |||||
# 3. get the best graph and remove it from median set. | |||||
print('3. getting and removing the best graph...') | |||||
gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset] | |||||
best_index, best_dis, best_graph = __get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy())) | |||||
median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index] | |||||
num_graphs -= 1 | |||||
if num_graphs == 1: | |||||
continue | |||||
best_dis_list.append(best_dis) | |||||
dataset.load_graphs(median_set_new, targets=None) | |||||
gram_matrix_unnorm_new = np.delete(gram_matrix_unnorm, best_index, axis=0) | |||||
gram_matrix_unnorm_new = np.delete(gram_matrix_unnorm_new, best_index, axis=1) | |||||
# 4. set parameters. | |||||
print('4. initializing mpg and setting parameters...') | |||||
mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_new | |||||
mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset] | |||||
mpg = MedianPreimageGenerator() | |||||
mpg.dataset = dataset | |||||
mpg.set_options(**mpg_options.copy()) | |||||
mpg.kernel_options = kernel_options.copy() | |||||
mpg.ged_options = ged_options.copy() | |||||
mpg.mge_options = mge_options.copy() | |||||
# 5. compute median preimage. | |||||
print('5. computing median preimage...') | |||||
mpg.run() | |||||
results = mpg.get_results() | |||||
# 6. compute pairwise kernel distances. | |||||
print('6. computing pairwise kernel distances...') | |||||
_, dis_k_max, dis_k_min, dis_k_mean = mpg.graph_kernel.compute_distance_matrix() | |||||
dis_k_max_list.append(dis_k_max) | |||||
dis_k_min_list.append(dis_k_min) | |||||
dis_k_mean_list.append(dis_k_mean) | |||||
# 7. save results (and median graphs). | |||||
print('7. saving results (and median graphs)...') | |||||
# write result detail. | |||||
if save_results: | |||||
print('writing results to files...') | |||||
sod_sm2gm = get_relations(np.sign(results['sod_gen_median'] - results['sod_set_median'])) | |||||
dis_k_sm2gm = get_relations(np.sign(results['k_dis_gen_median'] - results['k_dis_set_median'])) | |||||
dis_k_gi2sm = get_relations(np.sign(results['k_dis_set_median'] - results['k_dis_dataset'])) | |||||
dis_k_gi2gm = get_relations(np.sign(results['k_dis_gen_median'] - results['k_dis_dataset'])) | |||||
f_detail = open(dir_save + fn_output_detail, 'a') | |||||
csv.writer(f_detail).writerow([ds_name, kernel_options['name'], | |||||
ged_options['edit_cost'], ged_options['method'], | |||||
ged_options['attr_distance'], mpg_options['fit_method'], | |||||
num_graphs, target, 1, | |||||
results['sod_set_median'], results['sod_gen_median'], | |||||
results['k_dis_set_median'], results['k_dis_gen_median'], | |||||
results['k_dis_dataset'], best_dis, best_index, | |||||
sod_sm2gm, dis_k_sm2gm, | |||||
dis_k_gi2sm, dis_k_gi2gm, results['edit_cost_constants'], | |||||
results['runtime_precompute_gm'], results['runtime_optimize_ec'], | |||||
results['runtime_generate_preimage'], results['runtime_total'], | |||||
results['itrs'], results['converged'], | |||||
results['num_updates_ecc'], | |||||
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||||
results['mge']['num_increase_order'] > 0, | |||||
results['mge']['num_converged_descents'] > 0]) | |||||
f_detail.close() | |||||
# compute result summary. | |||||
sod_sm_list.append(results['sod_set_median']) | |||||
sod_gm_list.append(results['sod_gen_median']) | |||||
dis_k_sm_list.append(results['k_dis_set_median']) | |||||
dis_k_gm_list.append(results['k_dis_gen_median']) | |||||
dis_k_gi_min_list.append(results['k_dis_dataset']) | |||||
time_precompute_gm_list.append(results['runtime_precompute_gm']) | |||||
time_optimize_ec_list.append(results['runtime_optimize_ec']) | |||||
time_generate_list.append(results['runtime_generate_preimage']) | |||||
time_total_list.append(results['runtime_total']) | |||||
itrs_list.append(results['itrs']) | |||||
converged_list.append(results['converged']) | |||||
num_updates_ecc_list.append(results['num_updates_ecc']) | |||||
mge_decrease_order_list.append(results['mge']['num_decrease_order'] > 0) | |||||
mge_increase_order_list.append(results['mge']['num_increase_order'] > 0) | |||||
mge_converged_order_list.append(results['mge']['num_converged_descents'] > 0) | |||||
# # SOD SM -> GM | |||||
if results['sod_set_median'] > results['sod_gen_median']: | |||||
nb_sod_sm2gm[0] += 1 | |||||
# repeats_better_sod_sm2gm.append(1) | |||||
elif results['sod_set_median'] == results['sod_gen_median']: | |||||
nb_sod_sm2gm[1] += 1 | |||||
elif results['sod_set_median'] < results['sod_gen_median']: | |||||
nb_sod_sm2gm[2] += 1 | |||||
# # dis_k SM -> GM | |||||
if results['k_dis_set_median'] > results['k_dis_gen_median']: | |||||
nb_dis_k_sm2gm[0] += 1 | |||||
# repeats_better_dis_k_sm2gm.append(1) | |||||
elif results['k_dis_set_median'] == results['k_dis_gen_median']: | |||||
nb_dis_k_sm2gm[1] += 1 | |||||
elif results['k_dis_set_median'] < results['k_dis_gen_median']: | |||||
nb_dis_k_sm2gm[2] += 1 | |||||
# # dis_k gi -> SM | |||||
if results['k_dis_dataset'] > results['k_dis_set_median']: | |||||
nb_dis_k_gi2sm[0] += 1 | |||||
# repeats_better_dis_k_gi2sm.append(1) | |||||
elif results['k_dis_dataset'] == results['k_dis_set_median']: | |||||
nb_dis_k_gi2sm[1] += 1 | |||||
elif results['k_dis_dataset'] < results['k_dis_set_median']: | |||||
nb_dis_k_gi2sm[2] += 1 | |||||
# # dis_k gi -> GM | |||||
if results['k_dis_dataset'] > results['k_dis_gen_median']: | |||||
nb_dis_k_gi2gm[0] += 1 | |||||
# repeats_better_dis_k_gi2gm.append(1) | |||||
elif results['k_dis_dataset'] == results['k_dis_gen_median']: | |||||
nb_dis_k_gi2gm[1] += 1 | |||||
elif results['k_dis_dataset'] < results['k_dis_gen_median']: | |||||
nb_dis_k_gi2gm[2] += 1 | |||||
# write result summary for each letter. | |||||
f_summary = open(dir_save + fn_output_summary, 'a') | |||||
csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||||
ged_options['edit_cost'], ged_options['method'], | |||||
ged_options['attr_distance'], mpg_options['fit_method'], | |||||
num_graphs, target, | |||||
results['sod_set_median'], results['sod_gen_median'], | |||||
results['k_dis_set_median'], results['k_dis_gen_median'], | |||||
results['k_dis_dataset'], best_dis, best_index, | |||||
sod_sm2gm, dis_k_sm2gm, | |||||
dis_k_gi2sm, dis_k_gi2gm, | |||||
results['runtime_precompute_gm'], results['runtime_optimize_ec'], | |||||
results['runtime_generate_preimage'], results['runtime_total'], | |||||
results['itrs'], results['converged'], | |||||
results['num_updates_ecc'], | |||||
results['mge']['num_decrease_order'] > 0, # @todo: not suitable for multi-start mge | |||||
results['mge']['num_increase_order'] > 0, | |||||
results['mge']['num_converged_descents'] > 0, | |||||
nb_sod_sm2gm, | |||||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | |||||
f_summary.close() | |||||
# save median graphs. | |||||
if save_medians: | |||||
if not os.path.exists(dir_save + 'medians/'): | |||||
os.makedirs(dir_save + 'medians/') | |||||
print('Saving median graphs to files...') | |||||
fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||||
saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', | |||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||||
fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||||
saveGXL(mpg.gen_median, fn_pre_gm + '.gxl', method='default', | |||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||||
fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||||
saveGXL(best_graph, fn_best_dataset + '.gxl', method='default', | |||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||||
fn_best_median_set = dir_save + 'medians/g_best_median_set.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | |||||
saveGXL(mpg.best_from_dataset, fn_best_median_set + '.gxl', method='default', | |||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | |||||
# plot median graphs. | |||||
if plot_medians and save_medians: | |||||
if ged_options['edit_cost'] == 'LETTER2' or ged_options['edit_cost'] == 'LETTER' or ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low': | |||||
draw_Letter_graph(mpg.set_median, fn_pre_sm) | |||||
draw_Letter_graph(mpg.gen_median, fn_pre_gm) | |||||
draw_Letter_graph(mpg.best_from_dataset, fn_best_dataset) | |||||
# write result summary for each letter. | |||||
if save_results: | |||||
sod_sm_mean = np.mean(sod_sm_list) | |||||
sod_gm_mean = np.mean(sod_gm_list) | |||||
dis_k_sm_mean = np.mean(dis_k_sm_list) | |||||
dis_k_gm_mean = np.mean(dis_k_gm_list) | |||||
dis_k_gi_min_mean = np.mean(dis_k_gi_min_list) | |||||
best_dis_mean = np.mean(best_dis_list) | |||||
time_precompute_gm_mean = np.mean(time_precompute_gm_list) | |||||
time_optimize_ec_mean = np.mean(time_optimize_ec_list) | |||||
time_generate_mean = np.mean(time_generate_list) | |||||
time_total_mean = np.mean(time_total_list) | |||||
itrs_mean = np.mean(itrs_list) | |||||
num_converged = np.sum(converged_list) | |||||
num_updates_ecc_mean = np.mean(num_updates_ecc_list) | |||||
num_mge_decrease_order = np.sum(mge_decrease_order_list) | |||||
num_mge_increase_order = np.sum(mge_increase_order_list) | |||||
num_mge_converged = np.sum(mge_converged_order_list) | |||||
sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean)) | |||||
dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | |||||
dis_k_gi2sm_mean = get_relations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | |||||
dis_k_gi2gm_mean = get_relations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean)) | |||||
f_summary = open(dir_save + fn_output_summary, 'a') | |||||
csv.writer(f_summary).writerow([ds_name, kernel_options['name'], | |||||
ged_options['edit_cost'], ged_options['method'], | |||||
ged_options['attr_distance'], mpg_options['fit_method'], | |||||
num_graphs, 'all', | |||||
sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, | |||||
dis_k_gi_min_mean, best_dis_mean, '-', | |||||
sod_sm2gm_mean, dis_k_sm2gm_mean, | |||||
dis_k_gi2sm_mean, dis_k_gi2gm_mean, | |||||
time_precompute_gm_mean, time_optimize_ec_mean, | |||||
time_generate_mean, time_total_mean, itrs_mean, | |||||
num_converged, num_updates_ecc_mean, | |||||
num_mge_decrease_order, num_mge_increase_order, | |||||
num_mge_converged]) | |||||
f_summary.close() | |||||
# save total pairwise kernel distances. | |||||
dis_k_max = np.max(dis_k_max_list) | |||||
dis_k_min = np.min(dis_k_min_list) | |||||
dis_k_mean = np.mean(dis_k_mean_list) | |||||
print('The maximum pairwise distance in kernel space:', dis_k_max) | |||||
print('The minimum pairwise distance in kernel space:', dis_k_min) | |||||
print('The average pairwise distance in kernel space:', dis_k_mean) | |||||
print('\ncomplete.\n') | |||||
def __get_best_graph(Gn, gram_matrix): | |||||
k_dis_list = [] | |||||
for idx in range(len(Gn)): | |||||
k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False)) | |||||
best_index = np.argmin(k_dis_list) | |||||
best_dis = k_dis_list[best_index] | |||||
best_graph = Gn[best_index].copy() | |||||
return best_index, best_dis, best_graph | |||||
def get_relations(sign): | |||||
if sign == -1: | |||||
return 'better' | |||||
elif sign == 0: | |||||
return 'same' | |||||
elif sign == 1: | |||||
return 'worse' | |||||
def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||||
if load_gm == 'auto': | |||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||||
if gmfile_exist: | |||||
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] | |||||
time_precompute_gm_list = gmfile['run_time_list'].tolist() | |||||
else: | |||||
gram_matrix_unnorm_list = [] | |||||
time_precompute_gm_list = [] | |||||
for dataset in datasets: | |||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) | |||||
gram_matrix_unnorm_list.append(gram_matrix_unnorm) | |||||
time_precompute_gm_list.append(time_precompute_gm) | |||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | |||||
elif not load_gm: | |||||
gram_matrix_unnorm_list = [] | |||||
time_precompute_gm_list = [] | |||||
for dataset in datasets: | |||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) | |||||
gram_matrix_unnorm_list.append(gram_matrix_unnorm) | |||||
time_precompute_gm_list.append(time_precompute_gm) | |||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | |||||
else: | |||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] | |||||
time_precompute_gm_list = gmfile['run_time_list'].tolist() | |||||
return gram_matrix_unnorm_list, time_precompute_gm_list | |||||
def __get_graph_kernel(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | |||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
kernel_options=kernel_options) | |||||
return graph_kernel | |||||
def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | |||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
kernel_options=kernel_options) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | |||||
gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm | |||||
return gram_matrix_unnorm, run_time | |||||
def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
if not os.path.exists(dir_output): | |||||
os.makedirs(dir_output) | |||||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | |||||
f_detail = open(dir_output + fn_output_detail, 'a') | |||||
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', | |||||
'GED method', 'attr distance', 'fit method', 'num graphs', | |||||
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||||
'min dis_k gi', 'best kernel dis', 'best graph index', | |||||
'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||||
'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', | |||||
'time optimize ec', 'time generate preimage', 'time total', | |||||
'itrs', 'converged', 'num updates ecc', 'mge decrease order', | |||||
'mge increase order', 'mge converged']) | |||||
f_detail.close() | |||||
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv' | |||||
f_summary = open(dir_output + fn_output_summary, 'a') | |||||
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', | |||||
'GED method', 'attr distance', 'fit method', 'num graphs', | |||||
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||||
'min dis_k gi', 'best kernel dis', 'best graph index', | |||||
'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||||
'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', | |||||
'time generate preimage', 'time total', 'itrs', 'num converged', | |||||
'num updates ecc', 'mge num decrease order', 'mge num increase order', | |||||
'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', | |||||
'# dis_k gi -> SM', '# dis_k gi -> GM']) | |||||
f_summary.close() | |||||
return fn_output_detail, fn_output_summary | |||||
#Dessin median courrant | |||||
def draw_Letter_graph(graph, file_prefix): | |||||
import matplotlib | |||||
matplotlib.use('agg') | |||||
import matplotlib.pyplot as plt | |||||
plt.figure() | |||||
pos = {} | |||||
for n in graph.nodes: | |||||
pos[n] = np.array([float(graph.nodes[n]['x']),float(graph.nodes[n]['y'])]) | |||||
nx.draw_networkx(graph, pos) | |||||
plt.savefig(file_prefix + '.eps', format='eps', dpi=300) | |||||
# plt.show() | |||||
plt.clf() | |||||
plt.close() |
@@ -45,7 +45,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
if save_results: | if save_results: | ||||
# create result files. | # create result files. | ||||
print('creating output files...') | print('creating output files...') | ||||
fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
sod_sm_list = [] | sod_sm_list = [] | ||||
sod_gm_list = [] | sod_gm_list = [] | ||||
@@ -82,22 +82,22 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
gram_matrix_unnorm_list = [] | gram_matrix_unnorm_list = [] | ||||
time_precompute_gm_list = [] | time_precompute_gm_list = [] | ||||
else: | else: | ||||
gmfile = np.load() | |||||
gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list'] | |||||
time_precompute_gm_list = gmfile['run_time_list'] | |||||
# repeats_better_sod_sm2gm = [] | |||||
# repeats_better_dis_k_sm2gm = [] | |||||
# repeats_better_dis_k_gi2sm = [] | |||||
# repeats_better_dis_k_gi2gm = [] | |||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||||
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. | |||||
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] | |||||
time_precompute_gm_list = gmfile['run_time_list'].tolist() | |||||
# repeats_better_sod_sm2gm = [] | |||||
# repeats_better_dis_k_sm2gm = [] | |||||
# repeats_better_dis_k_gi2sm = [] | |||||
# repeats_better_dis_k_gi2gm = [] | |||||
print('start generating preimage for each class of target...') | |||||
print('starting generating preimage for each class of target...') | |||||
idx_offset = 0 | idx_offset = 0 | ||||
for idx, dataset in enumerate(datasets): | for idx, dataset in enumerate(datasets): | ||||
target = dataset.targets[0] | target = dataset.targets[0] | ||||
print('\ntarget =', target, '\n') | print('\ntarget =', target, '\n') | ||||
# if target != 1: | |||||
# continue | |||||
# if target != 1: | |||||
# continue | |||||
num_graphs = len(dataset.graphs) | num_graphs = len(dataset.graphs) | ||||
if num_graphs < 2: | if num_graphs < 2: | ||||
@@ -148,7 +148,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
results['sod_set_median'], results['sod_gen_median'], | results['sod_set_median'], results['sod_gen_median'], | ||||
results['k_dis_set_median'], results['k_dis_gen_median'], | results['k_dis_set_median'], results['k_dis_gen_median'], | ||||
results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, | results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, | ||||
dis_k_gi2sm, dis_k_gi2gm, results['edit_cost_constants'], | |||||
dis_k_gi2sm, dis_k_gi2gm, results['edit_cost_constants'], | |||||
results['runtime_precompute_gm'], results['runtime_optimize_ec'], | results['runtime_precompute_gm'], results['runtime_optimize_ec'], | ||||
results['runtime_generate_preimage'], results['runtime_total'], | results['runtime_generate_preimage'], results['runtime_total'], | ||||
results['itrs'], results['converged'], | results['itrs'], results['converged'], | ||||
@@ -177,7 +177,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
# # SOD SM -> GM | # # SOD SM -> GM | ||||
if results['sod_set_median'] > results['sod_gen_median']: | if results['sod_set_median'] > results['sod_gen_median']: | ||||
nb_sod_sm2gm[0] += 1 | nb_sod_sm2gm[0] += 1 | ||||
# repeats_better_sod_sm2gm.append(1) | |||||
# repeats_better_sod_sm2gm.append(1) | |||||
elif results['sod_set_median'] == results['sod_gen_median']: | elif results['sod_set_median'] == results['sod_gen_median']: | ||||
nb_sod_sm2gm[1] += 1 | nb_sod_sm2gm[1] += 1 | ||||
elif results['sod_set_median'] < results['sod_gen_median']: | elif results['sod_set_median'] < results['sod_gen_median']: | ||||
@@ -185,7 +185,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
# # dis_k SM -> GM | # # dis_k SM -> GM | ||||
if results['k_dis_set_median'] > results['k_dis_gen_median']: | if results['k_dis_set_median'] > results['k_dis_gen_median']: | ||||
nb_dis_k_sm2gm[0] += 1 | nb_dis_k_sm2gm[0] += 1 | ||||
# repeats_better_dis_k_sm2gm.append(1) | |||||
# repeats_better_dis_k_sm2gm.append(1) | |||||
elif results['k_dis_set_median'] == results['k_dis_gen_median']: | elif results['k_dis_set_median'] == results['k_dis_gen_median']: | ||||
nb_dis_k_sm2gm[1] += 1 | nb_dis_k_sm2gm[1] += 1 | ||||
elif results['k_dis_set_median'] < results['k_dis_gen_median']: | elif results['k_dis_set_median'] < results['k_dis_gen_median']: | ||||
@@ -193,7 +193,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
# # dis_k gi -> SM | # # dis_k gi -> SM | ||||
if results['k_dis_dataset'] > results['k_dis_set_median']: | if results['k_dis_dataset'] > results['k_dis_set_median']: | ||||
nb_dis_k_gi2sm[0] += 1 | nb_dis_k_gi2sm[0] += 1 | ||||
# repeats_better_dis_k_gi2sm.append(1) | |||||
# repeats_better_dis_k_gi2sm.append(1) | |||||
elif results['k_dis_dataset'] == results['k_dis_set_median']: | elif results['k_dis_dataset'] == results['k_dis_set_median']: | ||||
nb_dis_k_gi2sm[1] += 1 | nb_dis_k_gi2sm[1] += 1 | ||||
elif results['k_dis_dataset'] < results['k_dis_set_median']: | elif results['k_dis_dataset'] < results['k_dis_set_median']: | ||||
@@ -201,7 +201,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
# # dis_k gi -> GM | # # dis_k gi -> GM | ||||
if results['k_dis_dataset'] > results['k_dis_gen_median']: | if results['k_dis_dataset'] > results['k_dis_gen_median']: | ||||
nb_dis_k_gi2gm[0] += 1 | nb_dis_k_gi2gm[0] += 1 | ||||
# repeats_better_dis_k_gi2gm.append(1) | |||||
# repeats_better_dis_k_gi2gm.append(1) | |||||
elif results['k_dis_dataset'] == results['k_dis_gen_median']: | elif results['k_dis_dataset'] == results['k_dis_gen_median']: | ||||
nb_dis_k_gi2gm[1] += 1 | nb_dis_k_gi2gm[1] += 1 | ||||
elif results['k_dis_dataset'] < results['k_dis_gen_median']: | elif results['k_dis_dataset'] < results['k_dis_gen_median']: | ||||
@@ -225,7 +225,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
results['mge']['num_increase_order'] > 0, | results['mge']['num_increase_order'] > 0, | ||||
results['mge']['num_converged_descents'] > 0, | results['mge']['num_converged_descents'] > 0, | ||||
nb_sod_sm2gm, | nb_sod_sm2gm, | ||||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | |||||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm]) | |||||
f_summary.close() | f_summary.close() | ||||
# save median graphs. | # save median graphs. | ||||
@@ -235,15 +235,15 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
print('Saving median graphs to files...') | print('Saving median graphs to files...') | ||||
fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | ||||
saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', | saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', | ||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | ||||
fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | ||||
saveGXL(mpg.gen_median, fn_pre_gm + '.gxl', method='default', | saveGXL(mpg.gen_median, fn_pre_gm + '.gxl', method='default', | ||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | ||||
fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | ||||
saveGXL(mpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', | saveGXL(mpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', | ||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) | ||||
# plot median graphs. | # plot median graphs. | ||||
@@ -304,10 +304,10 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | if (load_gm == 'auto' and not gmfile_exist) or not load_gm: | ||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | ||||
print('\ncomplete.') | |||||
print('\ncomplete.\n') | |||||
def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): | |||||
if not os.path.exists(dir_output): | if not os.path.exists(dir_output): | ||||
os.makedirs(dir_output) | os.makedirs(dir_output) | ||||
# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | ||||
@@ -335,9 +335,9 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
'num updates ecc', 'mge num decrease order', 'mge num increase order', | 'num updates ecc', 'mge num decrease order', 'mge num increase order', | ||||
'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', | 'mge num converged', '# SOD SM -> GM', '# dis_k SM -> GM', | ||||
'# dis_k gi -> SM', '# dis_k gi -> GM']) | '# dis_k gi -> SM', '# dis_k gi -> GM']) | ||||
# 'repeats better SOD SM -> GM', | |||||
# 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | |||||
# 'repeats better dis_k gi -> GM']) | |||||
# 'repeats better SOD SM -> GM', | |||||
# 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | |||||
# 'repeats better dis_k gi -> GM']) | |||||
f_summary.close() | f_summary.close() | ||||
return fn_output_detail, fn_output_summary | return fn_output_detail, fn_output_summary | ||||
@@ -462,6 +462,8 @@ def gram2distances(Kmatrix): | |||||
def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, | def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, | ||||
gkernel=None, verbose=True): | gkernel=None, verbose=True): | ||||
import warnings | |||||
warnings.warn('gklearn.preimage.utils.kernel_distance_matrix is deprecated, use gklearn.kernels.graph_kernel.compute_distance_matrix or gklearn.utils.compute_distance_matrix instead', DeprecationWarning) | |||||
dis_mat = np.empty((len(Gn), len(Gn))) | dis_mat = np.empty((len(Gn), len(Gn))) | ||||
if Kmatrix is None: | if Kmatrix is None: | ||||
Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, verbose) | Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, verbose) | ||||
@@ -0,0 +1,71 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Tue Jan 14 15:39:29 2020 | |||||
@author: ljia | |||||
""" | |||||
import multiprocessing | |||||
import functools | |||||
from gklearn.preimage.utils import generate_median_preimages_by_class | |||||
def test_median_preimage_generator(): | |||||
"""MAO, Treelet, using CONSTANT, symbolic only. | |||||
""" | |||||
from gklearn.utils.kernels import polynomialkernel | |||||
# set parameters. | |||||
ds_name = 'MAO' # | |||||
mpg_options = {'fit_method': 'k-graphs', | |||||
'init_ecc': [4, 4, 2, 1, 1, 1], # | |||||
'ds_name': ds_name, | |||||
'parallel': True, # False | |||||
'time_limit_in_sec': 0, | |||||
'max_itrs': 3, # | |||||
'max_itrs_without_update': 3, | |||||
'epsilon_residual': 0.01, | |||||
'epsilon_ec': 0.1, | |||||
'verbose': 2} | |||||
pkernel = functools.partial(polynomialkernel, d=4, c=1e+7) | |||||
kernel_options = {'name': 'Treelet', # | |||||
'sub_kernel': pkernel, | |||||
'parallel': 'imap_unordered', | |||||
# 'parallel': None, | |||||
'n_jobs': multiprocessing.cpu_count(), | |||||
'normalize': True, | |||||
'verbose': 2} | |||||
ged_options = {'method': 'IPFP', | |||||
'initialization_method': 'RANDOM', # 'NODE' | |||||
'initial_solutions': 1, # 1 | |||||
'edit_cost': 'CONSTANT', # | |||||
'attr_distance': 'euclidean', | |||||
'ratio_runs_from_initial_solutions': 1, | |||||
'threads': multiprocessing.cpu_count(), | |||||
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||||
mge_options = {'init_type': 'MEDOID', | |||||
'random_inits': 10, | |||||
'time_limit': 600, | |||||
'verbose': 2, | |||||
'refine': False} | |||||
save_results = True | |||||
dir_save = ds_name + '.' + kernel_options['name'] + '.symb.pytest/' | |||||
irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # | |||||
edge_required = False # | |||||
# print settings. | |||||
print('parameters:') | |||||
print('dataset name:', ds_name) | |||||
print('mpg_options:', mpg_options) | |||||
print('kernel_options:', kernel_options) | |||||
print('ged_options:', ged_options) | |||||
print('mge_options:', mge_options) | |||||
print('save_results:', save_results) | |||||
print('irrelevant_labels:', irrelevant_labels) | |||||
print() | |||||
# generate preimages. | |||||
for fit_method in ['k-graphs', 'expert', 'random']: | |||||
print('\n-------------------------------------') | |||||
print('fit method:', fit_method, '\n') | |||||
mpg_options['fit_method'] = fit_method | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) |
@@ -21,4 +21,6 @@ from gklearn.utils.timer import Timer | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
from gklearn.utils.utils import compute_gram_matrices_by_class | from gklearn.utils.utils import compute_gram_matrices_by_class | ||||
from gklearn.utils.utils import SpecialLabel | from gklearn.utils.utils import SpecialLabel | ||||
from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix | |||||
from gklearn.utils.trie import Trie | from gklearn.utils.trie import Trie | ||||
from gklearn.utils.knn import knn_cv, knn_classification |
@@ -522,6 +522,20 @@ class Dataset(object): | |||||
self.__targets = [self.__targets[i] for i in idx] | self.__targets = [self.__targets[i] for i in idx] | ||||
self.clean_labels() | self.clean_labels() | ||||
def copy(self): | |||||
dataset = Dataset() | |||||
graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None | |||||
target = self.__targets.copy() if self.__targets is not None else None | |||||
node_labels = self.__node_labels.copy() if self.__node_labels is not None else None | |||||
node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None | |||||
edge_labels = self.__edge_labels.copy() if self.__edge_labels is not None else None | |||||
edge_attrs = self.__edge_attrs.copy() if self.__edge_attrs is not None else None | |||||
dataset.load_graphs(graphs, target) | |||||
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) | |||||
# @todo: clean_labels and add other class members? | |||||
return dataset | |||||
def __get_dataset_size(self): | def __get_dataset_size(self): | ||||
return len(self.__graphs) | return len(self.__graphs) | ||||
@@ -721,7 +735,11 @@ def split_dataset_by_target(dataset): | |||||
sub_graphs = [graphs[i] for i in val] | sub_graphs = [graphs[i] for i in val] | ||||
sub_dataset = Dataset() | sub_dataset = Dataset() | ||||
sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | sub_dataset.load_graphs(sub_graphs, [key] * len(val)) | ||||
sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs) | |||||
node_labels = dataset.node_labels.copy() if dataset.node_labels is not None else None | |||||
node_attrs = dataset.node_attrs.copy() if dataset.node_attrs is not None else None | |||||
edge_labels = dataset.edge_labels.copy() if dataset.edge_labels is not None else None | |||||
edge_attrs = dataset.edge_attrs.copy() if dataset.edge_attrs is not None else None | |||||
sub_dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) | |||||
datasets.append(sub_dataset) | datasets.append(sub_dataset) | ||||
# @todo: clean_labels? | # @todo: clean_labels? | ||||
return datasets | return datasets |
@@ -494,7 +494,8 @@ def load_tud(filename): | |||||
'edge_labels': [], 'edge_attrs': []} | 'edge_labels': [], 'edge_attrs': []} | ||||
class_label_map = None | class_label_map = None | ||||
class_label_map_strings = [] | class_label_map_strings = [] | ||||
content_rm = open(frm).read().splitlines() | |||||
with open(frm) as rm: | |||||
content_rm = rm.read().splitlines() | |||||
i = 0 | i = 0 | ||||
while i < len(content_rm): | while i < len(content_rm): | ||||
line = content_rm[i].strip() | line = content_rm[i].strip() | ||||
@@ -558,16 +559,20 @@ def load_tud(filename): | |||||
label_names = {'node_labels': [], 'node_attrs': [], | label_names = {'node_labels': [], 'node_attrs': [], | ||||
'edge_labels': [], 'edge_attrs': []} | 'edge_labels': [], 'edge_attrs': []} | ||||
class_label_map = None | class_label_map = None | ||||
content_gi = open(fgi).read().splitlines() # graph indicator | |||||
content_am = open(fam).read().splitlines() # adjacency matrix | |||||
with open(fgi) as gi: | |||||
content_gi = gi.read().splitlines() # graph indicator | |||||
with open(fam) as am: | |||||
content_am = am.read().splitlines() # adjacency matrix | |||||
# load targets. | # load targets. | ||||
if 'fgl' in locals(): | if 'fgl' in locals(): | ||||
content_targets = open(fgl).read().splitlines() # targets (classification) | |||||
with open(fgl) as gl: | |||||
content_targets = gl.read().splitlines() # targets (classification) | |||||
targets = [float(i) for i in content_targets] | targets = [float(i) for i in content_targets] | ||||
elif 'fga' in locals(): | elif 'fga' in locals(): | ||||
content_targets = open(fga).read().splitlines() # targets (regression) | |||||
with open(fga) as ga: | |||||
content_targets = ga.read().splitlines() # targets (regression) | |||||
targets = [int(i) for i in content_targets] | targets = [int(i) for i in content_targets] | ||||
else: | else: | ||||
raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.') | raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.') | ||||
@@ -577,7 +582,8 @@ def load_tud(filename): | |||||
# create graphs and add nodes | # create graphs and add nodes | ||||
data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))] | data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))] | ||||
if 'fnl' in locals(): | if 'fnl' in locals(): | ||||
content_nl = open(fnl).read().splitlines() # node labels | |||||
with open(fnl) as nl: | |||||
content_nl = nl.read().splitlines() # node labels | |||||
for idx, line in enumerate(content_gi): | for idx, line in enumerate(content_gi): | ||||
# transfer to int first in case of unexpected blanks | # transfer to int first in case of unexpected blanks | ||||
data[int(line) - 1].add_node(idx) | data[int(line) - 1].add_node(idx) | ||||
@@ -605,7 +611,8 @@ def load_tud(filename): | |||||
# add edge labels | # add edge labels | ||||
if 'fel' in locals(): | if 'fel' in locals(): | ||||
content_el = open(fel).read().splitlines() | |||||
with open(fel) as el: | |||||
content_el = el.read().splitlines() | |||||
for idx, line in enumerate(content_el): | for idx, line in enumerate(content_el): | ||||
labels = [l.strip() for l in line.split(',')] | labels = [l.strip() for l in line.split(',')] | ||||
n = [int(i) - 1 for i in content_am[idx].split(',')] | n = [int(i) - 1 for i in content_am[idx].split(',')] | ||||
@@ -621,7 +628,8 @@ def load_tud(filename): | |||||
# add node attributes | # add node attributes | ||||
if 'fna' in locals(): | if 'fna' in locals(): | ||||
content_na = open(fna).read().splitlines() | |||||
with open(fna) as na: | |||||
content_na = na.read().splitlines() | |||||
for idx, line in enumerate(content_na): | for idx, line in enumerate(content_na): | ||||
attrs = [a.strip() for a in line.split(',')] | attrs = [a.strip() for a in line.split(',')] | ||||
g = int(content_gi[idx]) - 1 | g = int(content_gi[idx]) - 1 | ||||
@@ -636,7 +644,8 @@ def load_tud(filename): | |||||
# add edge attributes | # add edge attributes | ||||
if 'fea' in locals(): | if 'fea' in locals(): | ||||
content_ea = open(fea).read().splitlines() | |||||
with open(fea) as ea: | |||||
content_ea = ea.read().splitlines() | |||||
for idx, line in enumerate(content_ea): | for idx, line in enumerate(content_ea): | ||||
attrs = [a.strip() for a in line.split(',')] | attrs = [a.strip() for a in line.split(',')] | ||||
n = [int(i) - 1 for i in content_am[idx].split(',')] | n = [int(i) - 1 for i in content_am[idx].split(',')] | ||||
@@ -669,7 +678,8 @@ def load_from_ds(filename, filename_targets): | |||||
data = [] | data = [] | ||||
y = [] | y = [] | ||||
label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []} | label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []} | ||||
content = open(filename).read().splitlines() | |||||
with open(filename) as fn: | |||||
content = fn.read().splitlines() | |||||
extension = splitext(content[0].split(' ')[0])[1][1:] | extension = splitext(content[0].split(' ')[0])[1][1:] | ||||
if extension == 'ct': | if extension == 'ct': | ||||
load_file_fun = load_ct | load_file_fun = load_ct | ||||
@@ -691,8 +701,9 @@ def load_from_ds(filename, filename_targets): | |||||
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) | g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) | ||||
data.append(g) | data.append(g) | ||||
__append_label_names(label_names, l_names) | __append_label_names(label_names, l_names) | ||||
content_y = open(filename_targets).read().splitlines() | |||||
with open(filename_targets) as fnt: | |||||
content_y = fnt.read().splitlines() | |||||
# assume entries in filename and filename_targets have the same order. | # assume entries in filename and filename_targets have the same order. | ||||
for item in content_y: | for item in content_y: | ||||
tmp = item.split(' ') | tmp = item.split(' ') | ||||
@@ -0,0 +1,141 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Mon May 11 11:03:01 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
from sklearn.model_selection import ShuffleSplit | |||||
from sklearn.neighbors import KNeighborsClassifier | |||||
from sklearn.metrics import accuracy_score | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | |||||
# from gklearn.preimage.utils import get_same_item_indices | |||||
def sum_squares(a, b): | |||||
""" | |||||
Return the sum of squares of the difference between a and b, aka MSE | |||||
""" | |||||
return np.sum([(a[i] - b[i])**2 for i in range(len(a))]) | |||||
def euclid_d(x, y): | |||||
""" | |||||
1D euclidean distance | |||||
""" | |||||
return np.sqrt((x-y)**2) | |||||
def man_d(x, y): | |||||
""" | |||||
1D manhattan distance | |||||
""" | |||||
return np.abs((x-y)) | |||||
def knn_regression(D_app, D_test, y_app, y_test, n_neighbors, verbose=True, text=None): | |||||
from sklearn.neighbors import KNeighborsRegressor | |||||
knn = KNeighborsRegressor(n_neighbors=n_neighbors, metric='precomputed') | |||||
knn.fit(D_app, y_app) | |||||
y_pred = knn.predict(D_app) | |||||
y_pred_test = knn.predict(D_test.T) | |||||
perf_app = np.sqrt(sum_squares(y_pred, y_app)/len(y_app)) | |||||
perf_test = np.sqrt(sum_squares(y_pred_test, y_test)/len(y_test)) | |||||
if (verbose): | |||||
print("Learning error with {} train examples : {}".format(text, perf_app)) | |||||
print("Test error with {} train examples : {}".format(text, perf_test)) | |||||
return perf_app, perf_test | |||||
def knn_classification(d_app, d_test, y_app, y_test, n_neighbors, verbose=True, text=None): | |||||
knn = KNeighborsClassifier(n_neighbors=n_neighbors, metric='precomputed') | |||||
knn.fit(d_app, y_app) | |||||
y_pred = knn.predict(d_app) | |||||
y_pred_test = knn.predict(d_test.T) | |||||
perf_app = accuracy_score(y_app, y_pred) | |||||
perf_test = accuracy_score(y_test, y_pred_test) | |||||
if (verbose): | |||||
print("Learning accuracy with {} costs : {}".format(text, perf_app)) | |||||
print("Test accuracy with {} costs : {}".format(text, perf_test)) | |||||
return perf_app, perf_test | |||||
def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, test_size=0.9, verbose=True): | |||||
''' | |||||
Perform a knn classification cross-validation on given dataset. | |||||
''' | |||||
# Gn = dataset.graphs | |||||
y_all = dataset.targets | |||||
# compute kernel distances. | |||||
dis_mat = __compute_kernel_distances(dataset, kernel_options, trainset=trainset) | |||||
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | |||||
# train_indices = [[] for _ in range(n_splits)] | |||||
# test_indices = [[] for _ in range(n_splits)] | |||||
# idx_targets = get_same_item_indices(y_all) | |||||
# for key, item in idx_targets.items(): | |||||
# i = 0 | |||||
# for train_i, test_i in rs.split(item): # @todo: careful when parallel. | |||||
# train_indices[i] += [item[idx] for idx in train_i] | |||||
# test_indices[i] += [item[idx] for idx in test_i] | |||||
# i += 1 | |||||
accuracies = [] | |||||
# for trial in range(len(train_indices)): | |||||
# train_index = train_indices[trial] | |||||
# test_index = test_indices[trial] | |||||
for train_index, test_index in rs.split(y_all): | |||||
# print(train_index, test_index) | |||||
# G_app = [Gn[i] for i in train_index] | |||||
# G_test = [Gn[i] for i in test_index] | |||||
y_app = [y_all[i] for i in train_index] | |||||
y_test = [y_all[i] for i in test_index] | |||||
N = len(train_index) | |||||
d_app = dis_mat.copy() | |||||
d_app = d_app[train_index,:] | |||||
d_app = d_app[:,train_index] | |||||
d_test = np.zeros((N, len(test_index))) | |||||
for i in range(N): | |||||
for j in range(len(test_index)): | |||||
d_test[i, j] = dis_mat[train_index[i], test_index[j]] | |||||
accuracies.append(knn_classification(d_app, d_test, y_app, y_test, n_neighbors, verbose=verbose, text='')) | |||||
results = {} | |||||
results['ave_perf_train'] = np.mean([i[0] for i in accuracies], axis=0) | |||||
results['std_perf_train'] = np.std([i[0] for i in accuracies], axis=0, ddof=1) | |||||
results['ave_perf_test'] = np.mean([i[1] for i in accuracies], axis=0) | |||||
results['std_perf_test'] = np.std([i[1] for i in accuracies], axis=0, ddof=1) | |||||
return results | |||||
def __compute_kernel_distances(dataset, kernel_options, trainset=None): | |||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||||
node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
kernel_options=kernel_options) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) | |||||
dis_mat, _, _, _ = graph_kernel.compute_distance_matrix() | |||||
if trainset is not None: | |||||
gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm | |||||
return dis_mat |
@@ -1,7 +1,7 @@ | |||||
import networkx as nx | import networkx as nx | ||||
import numpy as np | import numpy as np | ||||
from copy import deepcopy | from copy import deepcopy | ||||
from enum import Enum, auto | |||||
from enum import Enum, unique | |||||
#from itertools import product | #from itertools import product | ||||
# from tqdm import tqdm | # from tqdm import tqdm | ||||
@@ -468,7 +468,36 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||||
return attributes | return attributes | ||||
@unique | |||||
class SpecialLabel(Enum): | class SpecialLabel(Enum): | ||||
"""can be used to define special labels. | |||||
""" | |||||
DUMMY = auto # The dummy label. | |||||
"""can be used to define special labels. | |||||
""" | |||||
DUMMY = 1 # The dummy label. | |||||
# DUMMY = auto # enum.auto does not exist in Python 3.5. | |||||
def normalize_gram_matrix(gram_matrix): | |||||
diag = gram_matrix.diagonal().copy() | |||||
for i in range(len(gram_matrix)): | |||||
for j in range(i, len(gram_matrix)): | |||||
gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j]) | |||||
gram_matrix[j][i] = gram_matrix[i][j] | |||||
return gram_matrix | |||||
def compute_distance_matrix(gram_matrix): | |||||
dis_mat = np.empty((len(gram_matrix), len(gram_matrix))) | |||||
for i in range(len(gram_matrix)): | |||||
for j in range(i, len(gram_matrix)): | |||||
dis = gram_matrix[i, i] + gram_matrix[j, j] - 2 * gram_matrix[i, j] | |||||
if dis < 0: | |||||
if dis > -1e-10: | |||||
dis = 0 | |||||
else: | |||||
raise ValueError('The distance is negative.') | |||||
dis_mat[i, j] = np.sqrt(dis) | |||||
dis_mat[j, i] = dis_mat[i, j] | |||||
dis_max = np.max(np.max(dis_mat)) | |||||
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||||
dis_mean = np.mean(np.mean(dis_mat)) | |||||
return dis_mat, dis_max, dis_min, dis_mean |
@@ -1,10 +1,11 @@ | |||||
numpy>=1.15.2 | |||||
numpy>=1.16.2 | |||||
scipy>=1.1.0 | scipy>=1.1.0 | ||||
matplotlib>=3.0.0 | matplotlib>=3.0.0 | ||||
networkx>=2.2 | networkx>=2.2 | ||||
scikit-learn>=0.20.0 | scikit-learn>=0.20.0 | ||||
tabulate>=0.8.2 | tabulate>=0.8.2 | ||||
tqdm>=4.26.0 | tqdm>=4.26.0 | ||||
# cvxpy # for preimage. | |||||
# cvxopt # for preimage. | |||||
# mosek # for preimage. | |||||
cvxpy>=1.0.31 # for preimage. Does not work for "pip install graphkit-learn". | |||||
# -e https://files.pythonhosted.org/packages/11/d0/d900870dc2d02ea74961b90c353666c6528a33ea61a10aa59a0d5574ae59/cvxpy-1.0.31.tar.gz # for preimage. | |||||
cvxopt>=1.2.5 # for preimage. | |||||
mosek>=9.2.5; python_version >= '3.6' # for preimage. |
@@ -0,0 +1,11 @@ | |||||
numpy>=1.16.2 | |||||
scipy>=1.1.0 | |||||
matplotlib>=3.0.0 | |||||
networkx>=2.2 | |||||
scikit-learn>=0.20.0 | |||||
tabulate>=0.8.2 | |||||
tqdm>=4.26.0 | |||||
# cvxpy>=1.0.31 # for preimage. Does not work for "pip install graphkit-learn". | |||||
# -e https://files.pythonhosted.org/packages/11/d0/d900870dc2d02ea74961b90c353666c6528a33ea61a10aa59a0d5574ae59/cvxpy-1.0.31.tar.gz # for preimage. | |||||
cvxopt>=1.2.5 # for preimage. | |||||
mosek>=9.2.5; python_version >= '3.6' # for preimage. |
@@ -3,15 +3,15 @@ import setuptools | |||||
with open("README.md", "r") as fh: | with open("README.md", "r") as fh: | ||||
long_description = fh.read() | long_description = fh.read() | ||||
with open('requirements.txt') as fp: | |||||
with open('requirements_pypi.txt') as fp: | |||||
install_requires = fp.read() | install_requires = fp.read() | ||||
setuptools.setup( | setuptools.setup( | ||||
name="graphkit-learn", | name="graphkit-learn", | ||||
version="0.2b1", | |||||
version="0.2b2", | |||||
author="Linlin Jia", | author="Linlin Jia", | ||||
author_email="linlin.jia@insa-rouen.fr", | author_email="linlin.jia@insa-rouen.fr", | ||||
description="A Python library for graph kernels based on linear patterns", | |||||
description="A Python library for graph kernels, graph edit distances, and graph pre-images", | |||||
long_description=long_description, | long_description=long_description, | ||||
long_description_content_type="text/markdown", | long_description_content_type="text/markdown", | ||||
url="https://github.com/jajupmochi/graphkit-learn", | url="https://github.com/jajupmochi/graphkit-learn", | ||||