|
|
@@ -0,0 +1,389 @@ |
|
|
|
#!/usr/bin/env python3 |
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
""" |
|
|
|
Created on Fri May 29 14:29:52 2020 |
|
|
|
|
|
|
|
@author: ljia |
|
|
|
""" |
|
|
|
|
|
|
|
import numpy as np |
|
|
|
import time |
|
|
|
import sys |
|
|
|
from tqdm import tqdm |
|
|
|
import multiprocessing |
|
|
|
import networkx as nx |
|
|
|
from multiprocessing import Pool |
|
|
|
from functools import partial |
|
|
|
from gklearn.preimage import PreimageGenerator |
|
|
|
from gklearn.preimage.utils import compute_k_dis |
|
|
|
from gklearn.utils import Timer |
|
|
|
from gklearn.utils.utils import get_graph_kernel_by_name |
|
|
|
# from gklearn.utils.dataset import Dataset |
|
|
|
|
|
|
|
|
|
|
|
class RandomPreimageGenerator(PreimageGenerator): |
|
|
|
|
|
|
|
def __init__(self, dataset=None): |
|
|
|
PreimageGenerator.__init__(self, dataset=dataset) |
|
|
|
# arguments to set. |
|
|
|
self.__k = 5 # number of nearest neighbors of phi in D_N. |
|
|
|
self.__r_max = 10 # maximum number of iterations. |
|
|
|
self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. |
|
|
|
self.__alphas = None # weights of linear combinations of points in kernel space. |
|
|
|
self.__parallel = True |
|
|
|
self.__n_jobs = multiprocessing.cpu_count() |
|
|
|
self.__time_limit_in_sec = 0 |
|
|
|
self.__max_itrs = 20 |
|
|
|
# values to compute. |
|
|
|
self.__runtime_generate_preimage = None |
|
|
|
self.__runtime_total = None |
|
|
|
self.__preimage = None |
|
|
|
self.__best_from_dataset = None |
|
|
|
self.__k_dis_preimage = None |
|
|
|
self.__k_dis_dataset = None |
|
|
|
self.__itrs = 0 |
|
|
|
self.__converged = False # @todo |
|
|
|
self.__num_updates = 0 |
|
|
|
# values that can be set or to be computed. |
|
|
|
self.__gram_matrix_unnorm = None |
|
|
|
self.__runtime_precompute_gm = None |
|
|
|
|
|
|
|
|
|
|
|
def set_options(self, **kwargs): |
|
|
|
self._kernel_options = kwargs.get('kernel_options', {}) |
|
|
|
self._graph_kernel = kwargs.get('graph_kernel', None) |
|
|
|
self._verbose = kwargs.get('verbose', 2) |
|
|
|
self.__k = kwargs.get('k', 5) |
|
|
|
self.__r_max = kwargs.get('r_max', 10) |
|
|
|
self.__l = kwargs.get('l', 500) |
|
|
|
self.__alphas = kwargs.get('alphas', None) |
|
|
|
self.__parallel = kwargs.get('parallel', True) |
|
|
|
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) |
|
|
|
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) |
|
|
|
self.__max_itrs = kwargs.get('max_itrs', 20) |
|
|
|
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) |
|
|
|
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) |
|
|
|
|
|
|
|
|
|
|
|
def run(self): |
|
|
|
self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], |
|
|
|
node_labels=self._dataset.node_labels, |
|
|
|
edge_labels=self._dataset.edge_labels, |
|
|
|
node_attrs=self._dataset.node_attrs, |
|
|
|
edge_attrs=self._dataset.edge_attrs, |
|
|
|
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), |
|
|
|
kernel_options=self._kernel_options) |
|
|
|
|
|
|
|
# record start time. |
|
|
|
start = time.time() |
|
|
|
|
|
|
|
# 1. precompute gram matrix. |
|
|
|
if self.__gram_matrix_unnorm is None: |
|
|
|
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) |
|
|
|
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm |
|
|
|
end_precompute_gm = time.time() |
|
|
|
self.__runtime_precompute_gm = end_precompute_gm - start |
|
|
|
else: |
|
|
|
if self.__runtime_precompute_gm is None: |
|
|
|
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') |
|
|
|
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm |
|
|
|
if self._kernel_options['normalize']: |
|
|
|
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) |
|
|
|
else: |
|
|
|
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) |
|
|
|
end_precompute_gm = time.time() |
|
|
|
start -= self.__runtime_precompute_gm |
|
|
|
|
|
|
|
# 2. compute k nearest neighbors of phi in D_N. |
|
|
|
if self._verbose >= 2: |
|
|
|
print('\nstart computing k nearest neighbors of phi in D_N...\n') |
|
|
|
D_N = self._dataset.graphs |
|
|
|
if self.__alphas is None: |
|
|
|
self.__alphas = [1 / len(D_N)] * len(D_N) |
|
|
|
k_dis_list = [] # distance between g_star and each graph. |
|
|
|
term3 = 0 |
|
|
|
for i1, a1 in enumerate(self.__alphas): |
|
|
|
for i2, a2 in enumerate(self.__alphas): |
|
|
|
term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] |
|
|
|
for idx in range(len(D_N)): |
|
|
|
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) |
|
|
|
|
|
|
|
# sort. |
|
|
|
sort_idx = np.argsort(k_dis_list) |
|
|
|
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances. |
|
|
|
nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) |
|
|
|
g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N |
|
|
|
self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. |
|
|
|
self.__k_dis_dataset = dis_gs[0] |
|
|
|
|
|
|
|
if self.__k_dis_dataset == 0: # get the exact pre-image. |
|
|
|
end_generate_preimage = time.time() |
|
|
|
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm |
|
|
|
self.__runtime_total = end_generate_preimage - start |
|
|
|
self.__preimage = self.__best_from_dataset.copy() |
|
|
|
self.__k_dis_preimage = self.__k_dis_dataset |
|
|
|
if self._verbose: |
|
|
|
print() |
|
|
|
print('=============================================================================') |
|
|
|
print('The exact pre-image is found from the input dataset.') |
|
|
|
print('-----------------------------------------------------------------------------') |
|
|
|
print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset) |
|
|
|
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) |
|
|
|
print('Time to generate pre-images:', self.__runtime_generate_preimage) |
|
|
|
print('Total time:', self.__runtime_total) |
|
|
|
print('=============================================================================') |
|
|
|
print() |
|
|
|
return |
|
|
|
|
|
|
|
dhat = dis_gs[0] # the nearest distance |
|
|
|
Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors |
|
|
|
Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] |
|
|
|
|
|
|
|
# 3. start iterations. |
|
|
|
if self._verbose >= 2: |
|
|
|
print('starting iterations...') |
|
|
|
gihat_list = [] |
|
|
|
dihat_list = [] |
|
|
|
r = 0 |
|
|
|
dis_of_each_itr = [dhat] |
|
|
|
if self.__parallel: |
|
|
|
self._kernel_options['parallel'] = None |
|
|
|
self.__itrs = 0 |
|
|
|
self.__num_updates = 0 |
|
|
|
timer = Timer(self.__time_limit_in_sec) |
|
|
|
while not self.__termination_criterion_met(timer, self.__itrs, r): |
|
|
|
print('\n- r =', r) |
|
|
|
found = False |
|
|
|
dis_bests = dis_gs + dihat_list |
|
|
|
|
|
|
|
# compute numbers of edges to be inserted/deleted. |
|
|
|
# @todo what if the log is negetive? how to choose alpha (scalar)? |
|
|
|
fdgs_list = np.array(dis_bests) |
|
|
|
if np.min(fdgs_list) < 1: # in case the log is negetive. |
|
|
|
fdgs_list /= np.min(fdgs_list) |
|
|
|
fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] |
|
|
|
if np.min(fdgs_list) < 1: # in case the log is smaller than 1. |
|
|
|
fdgs_list = np.array(fdgs_list) + 1 |
|
|
|
# expand the number of modifications to increase the possiblity. |
|
|
|
nb_vpairs_list = [nx.number_of_nodes(g) * (nx.number_of_nodes(g) - 1) for g in (Gs_nearest + gihat_list)] |
|
|
|
nb_vpairs_min = np.min(nb_vpairs_list) |
|
|
|
idx_fdgs_max = np.argmax(fdgs_list) |
|
|
|
fdgs_max_old = fdgs_list[idx_fdgs_max] |
|
|
|
fdgs_max = fdgs_max_old |
|
|
|
nb_modif = 1 |
|
|
|
for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): |
|
|
|
nb_modif *= nb / (fdgs_max - idx) |
|
|
|
while fdgs_max < nb_vpairs_min and nb_modif < self.__l: |
|
|
|
fdgs_max += 1 |
|
|
|
nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max |
|
|
|
nb_increase = int(fdgs_max - fdgs_max_old) |
|
|
|
if nb_increase > 0: |
|
|
|
fdgs_list += 1 |
|
|
|
|
|
|
|
|
|
|
|
for ig, gs in enumerate(Gs_nearest + gihat_list): |
|
|
|
if self._verbose >= 2: |
|
|
|
print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) |
|
|
|
gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) |
|
|
|
|
|
|
|
if found: |
|
|
|
r = 0 |
|
|
|
gihat_list = [gnew] |
|
|
|
dihat_list = [dhat] |
|
|
|
else: |
|
|
|
r += 1 |
|
|
|
|
|
|
|
dis_of_each_itr.append(dhat) |
|
|
|
self.__itrs += 1 |
|
|
|
if self._verbose >= 2: |
|
|
|
print('Total number of iterations is', self.__itrs, '.') |
|
|
|
print('The preimage is updated', self.__num_updates, 'times.') |
|
|
|
print('The shortest distances for previous iterations are', dis_of_each_itr, '.') |
|
|
|
|
|
|
|
|
|
|
|
# get results and print. |
|
|
|
end_generate_preimage = time.time() |
|
|
|
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm |
|
|
|
self.__runtime_total = end_generate_preimage - start |
|
|
|
self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) |
|
|
|
self.__k_dis_preimage = dhat |
|
|
|
if self._verbose: |
|
|
|
print() |
|
|
|
print('=============================================================================') |
|
|
|
print('Finished generation of preimages.') |
|
|
|
print('-----------------------------------------------------------------------------') |
|
|
|
print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) |
|
|
|
print('Distance in kernel space for the preimage:', self.__k_dis_preimage) |
|
|
|
print('Total number of iterations for optimizing:', self.__itrs) |
|
|
|
print('Total number of updating preimage:', self.__num_updates) |
|
|
|
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) |
|
|
|
print('Time to generate pre-images:', self.__runtime_generate_preimage) |
|
|
|
print('Total time:', self.__runtime_total) |
|
|
|
print('=============================================================================') |
|
|
|
print() |
|
|
|
|
|
|
|
|
|
|
|
def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): |
|
|
|
if self.__parallel: |
|
|
|
gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) |
|
|
|
else: |
|
|
|
gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) |
|
|
|
return gnew, dhat, found |
|
|
|
|
|
|
|
|
|
|
|
def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): |
|
|
|
gnew = None |
|
|
|
updated = False |
|
|
|
for trial in range(0, self.__l): |
|
|
|
if self._verbose >= 2: |
|
|
|
print('---', trial + 1, 'trial out of', self.__l) |
|
|
|
|
|
|
|
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) |
|
|
|
|
|
|
|
# get the better graph preimage. |
|
|
|
if dnew <= dhat: # @todo: the new distance is smaller or also equal? |
|
|
|
if dhat - dnew > 1e-6: |
|
|
|
if self._verbose >= 2: |
|
|
|
print('trial =', str(trial)) |
|
|
|
print('\nI am smaller!') |
|
|
|
print('index (as in D_k U {gihat} =', str(ig)) |
|
|
|
print('distance:', dhat, '->', dnew) |
|
|
|
updated = True |
|
|
|
else: |
|
|
|
if self._verbose >= 2: |
|
|
|
print('I am equal!') |
|
|
|
dhat = dnew |
|
|
|
gnew = gtemp.copy() |
|
|
|
found = True # found better or equally good graph. |
|
|
|
|
|
|
|
if updated: |
|
|
|
self.__num_updates += 1 |
|
|
|
|
|
|
|
return gnew, dhat, found |
|
|
|
|
|
|
|
|
|
|
|
def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): |
|
|
|
gnew = None |
|
|
|
len_itr = self.__l |
|
|
|
gnew_list = [None] * len_itr |
|
|
|
dnew_list = [None] * len_itr |
|
|
|
itr = range(0, len_itr) |
|
|
|
n_jobs = multiprocessing.cpu_count() |
|
|
|
if len_itr < 100 * n_jobs: |
|
|
|
chunksize = int(len_itr / n_jobs) + 1 |
|
|
|
else: |
|
|
|
chunksize = 100 |
|
|
|
do_fun = partial(self._generate_graph_parallel, g_init, fdgs, term3) |
|
|
|
pool = Pool(processes=n_jobs) |
|
|
|
if self._verbose >= 2: |
|
|
|
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), |
|
|
|
desc='Generating l graphs', file=sys.stdout) |
|
|
|
else: |
|
|
|
iterator = pool.imap_unordered(do_fun, itr, chunksize) |
|
|
|
for idx, gnew, dnew in iterator: |
|
|
|
gnew_list[idx] = gnew |
|
|
|
dnew_list[idx] = dnew |
|
|
|
pool.close() |
|
|
|
pool.join() |
|
|
|
|
|
|
|
# check if get the better graph preimage. |
|
|
|
idx_min = np.argmin(dnew_list) |
|
|
|
dnew = dnew_list[idx_min] |
|
|
|
if dnew <= dhat: # @todo: the new distance is smaller or also equal? |
|
|
|
if dhat - dnew > 1e-6: # @todo: use a proportion and watch out for 0. |
|
|
|
if self._verbose >= 2: |
|
|
|
print('I am smaller!') |
|
|
|
print('index (as in D_k U {gihat}) =', str(ig)) |
|
|
|
print('distance:', dhat, '->', dnew, '\n') |
|
|
|
self.__num_updates += 1 |
|
|
|
else: |
|
|
|
if self._verbose >= 2: |
|
|
|
print('I am equal!') |
|
|
|
dhat = dnew |
|
|
|
gnew = gnew_list[idx_min] |
|
|
|
found = True # found better graph. |
|
|
|
|
|
|
|
return gnew, dhat, found |
|
|
|
|
|
|
|
|
|
|
|
def _generate_graph_parallel(self, g_init, fdgs, term3, itr): |
|
|
|
trial = itr |
|
|
|
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) |
|
|
|
return trial, gtemp, dnew |
|
|
|
|
|
|
|
|
|
|
|
def __do_trial(self, g_init, fdgs, term3, trial): |
|
|
|
# add and delete edges. |
|
|
|
gtemp = g_init.copy() |
|
|
|
seed = (trial + int(time.time())) % (2 ** 32 - 1) |
|
|
|
rdm_state = np.random.RandomState(seed=seed) |
|
|
|
# which edges to change. |
|
|
|
# @todo: should we use just half of the adjacency matrix for undirected graphs? |
|
|
|
nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) |
|
|
|
# @todo: what if fdgs is bigger than nb_vpairs? |
|
|
|
idx_change = rdm_state.randint(0, high=nb_vpairs, size=(fdgs if |
|
|
|
fdgs < nb_vpairs else nb_vpairs)) |
|
|
|
# print(idx_change) |
|
|
|
for item in idx_change: |
|
|
|
node1 = int(item / (nx.number_of_nodes(g_init) - 1)) |
|
|
|
node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) |
|
|
|
if node2 >= node1: # skip the self pair. |
|
|
|
node2 += 1 |
|
|
|
# @todo: is the randomness correct? |
|
|
|
if not gtemp.has_edge(node1, node2): |
|
|
|
gtemp.add_edge(node1, node2) |
|
|
|
else: |
|
|
|
gtemp.remove_edge(node1, node2) |
|
|
|
|
|
|
|
# compute new distances. |
|
|
|
kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) |
|
|
|
kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) |
|
|
|
if self._kernel_options['normalize']: |
|
|
|
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize |
|
|
|
kernel_gtmp = 1 |
|
|
|
# @todo: not correct kernel value |
|
|
|
gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) |
|
|
|
gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) |
|
|
|
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) |
|
|
|
|
|
|
|
return gtemp, dnew |
|
|
|
|
|
|
|
|
|
|
|
def get_results(self): |
|
|
|
results = {} |
|
|
|
results['runtime_precompute_gm'] = self.__runtime_precompute_gm |
|
|
|
results['runtime_generate_preimage'] = self.__runtime_generate_preimage |
|
|
|
results['runtime_total'] = self.__runtime_total |
|
|
|
results['k_dis_dataset'] = self.__k_dis_dataset |
|
|
|
results['k_dis_preimage'] = self.__k_dis_preimage |
|
|
|
results['itrs'] = self.__itrs |
|
|
|
results['num_updates'] = self.__num_updates |
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
def __termination_criterion_met(self, timer, itr, r): |
|
|
|
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): |
|
|
|
# if self.__state == AlgorithmState.TERMINATED: |
|
|
|
# self.__state = AlgorithmState.INITIALIZED |
|
|
|
return True |
|
|
|
return (r >= self.__r_max if self.__r_max >= 0 else False) |
|
|
|
# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
def preimage(self): |
|
|
|
return self.__preimage |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
def best_from_dataset(self): |
|
|
|
return self.__best_from_dataset |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
def gram_matrix_unnorm(self): |
|
|
|
return self.__gram_matrix_unnorm |
|
|
|
|
|
|
|
@gram_matrix_unnorm.setter |
|
|
|
def gram_matrix_unnorm(self, value): |
|
|
|
self.__gram_matrix_unnorm = value |