#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri May 29 14:29:52 2020 @author: ljia """ import numpy as np import time import random import sys import tqdm import multiprocessing import networkx as nx from gklearn.preimage import PreimageGenerator from gklearn.preimage.utils import compute_k_dis from gklearn.utils import Timer from gklearn.utils.utils import get_graph_kernel_by_name # from gklearn.utils.dataset import Dataset class RandomPreimageGenerator(PreimageGenerator): def __init__(self, dataset=None): PreimageGenerator.__init__(self, dataset=dataset) # arguments to set. self.__k = 5 # number of nearest neighbors of phi in D_N. self.__r_max = 10 # maximum number of iterations. self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. self.__alphas = None # weights of linear combinations of points in kernel space. self.__parallel = True self.__n_jobs = multiprocessing.cpu_count() self.__time_limit_in_sec = 0 # @todo self.__max_itrs = 100 # @todo # values to compute. self.__runtime_generate_preimage = None self.__runtime_total = None self.__preimage = None self.__best_from_dataset = None self.__k_dis_preimage = None self.__k_dis_dataset = None self.__itrs = 0 self.__converged = False # @todo self.__num_updates = 0 # values that can be set or to be computed. self.__gram_matrix_unnorm = None self.__runtime_precompute_gm = None def set_options(self, **kwargs): self._kernel_options = kwargs.get('kernel_options', {}) self._graph_kernel = kwargs.get('graph_kernel', None) self._verbose = kwargs.get('verbose', 2) self.__k = kwargs.get('k', 5) self.__r_max = kwargs.get('r_max', 10) self.__l = kwargs.get('l', 500) self.__alphas = kwargs.get('alphas', None) self.__parallel = kwargs.get('parallel', True) self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) self.__max_itrs = kwargs.get('max_itrs', 100) self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) def run(self): self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], node_labels=self._dataset.node_labels, edge_labels=self._dataset.edge_labels, node_attrs=self._dataset.node_attrs, edge_attrs=self._dataset.edge_attrs, ds_infos=self._dataset.get_dataset_infos(keys=['directed']), kernel_options=self._kernel_options) # record start time. start = time.time() # 1. precompute gram matrix. if self.__gram_matrix_unnorm is None: gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm end_precompute_gm = time.time() self.__runtime_precompute_gm = end_precompute_gm - start else: if self.__runtime_precompute_gm is None: raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm if self._kernel_options['normalize']: self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) else: self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) end_precompute_gm = time.time() start -= self.__runtime_precompute_gm # 2. compute k nearest neighbors of phi in D_N. if self._verbose >= 2: print('\nstart computing k nearest neighbors of phi in D_N...\n') D_N = self._dataset.graphs if self.__alphas is None: self.__alphas = [1 / len(D_N)] * len(D_N) k_dis_list = [] # distance between g_star and each graph. term3 = 0 for i1, a1 in enumerate(self.__alphas): for i2, a2 in enumerate(self.__alphas): term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] for idx in range(len(D_N)): k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) # sort. sort_idx = np.argsort(k_dis_list) dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. self.__k_dis_dataset = dis_gs[0] if self.__k_dis_dataset == 0: # get the exact pre-image. end_generate_preimage = time.time() self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm self.__runtime_total = end_generate_preimage - start self.__preimage = self.__best_from_dataset.copy() self.__k_dis_preimage = self.__k_dis_dataset if self._verbose: print() print('=============================================================================') print('The exact pre-image is found from the input dataset.') print('-----------------------------------------------------------------------------') print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset) print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) print('Time to generate pre-images:', self.__runtime_generate_preimage) print('Total time:', self.__runtime_total) print('=============================================================================') print() return dhat = dis_gs[0] # the nearest distance Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] # 3. start iterations. if self._verbose >= 2: print('starting iterations...') gihat_list = [] dihat_list = [] r = 0 dis_of_each_itr = [dhat] while r < self.__r_max: print('\n- r =', r) found = False dis_bests = dis_gs + dihat_list # compute numbers of nodes to be inserted/deleted. # @todo what if the log is negetive? how to choose alpha (scalar)? fdgs_list = np.array(dis_bests) if np.min(fdgs_list) < 1: fdgs_list /= np.min(dis_bests) fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] if np.min(fdgs_list) < 1: fdgs_list = np.array(fdgs_list) + 1 for ig, gs in enumerate(Gs_nearest + gihat_list): if self._verbose >= 2: print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) for trail in range(0, self.__l): if self._verbose >= 2: print('---', trail + 1, 'trail out of', self.__l) # add and delete edges. gtemp = gs.copy() np.random.seed() # @todo: may not work for possible parallel. # which edges to change. # @todo: should we use just half of the adjacency matrix for undirected graphs? nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) # @todo: what if fdgs is bigger than nb_vpairs? idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if fdgs_list[ig] < nb_vpairs else nb_vpairs) for item in idx_change: node1 = int(item / (nx.number_of_nodes(gs) - 1)) node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) if node2 >= node1: # skip the self pair. node2 += 1 # @todo: is the randomness correct? if not gtemp.has_edge(node1, node2): gtemp.add_edge(node1, node2) else: gtemp.remove_edge(node1, node2) # compute new distances. kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, D_N, **self._kernel_options) kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize # @todo: not correct kernel value gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) dnew = compute_k_dis(0, range(1, 1 + len(D_N)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) # get the better graph preimage. if dnew <= dhat: # @todo: the new distance is smaller or also equal? if dnew < dhat: if self._verbose >= 2: print('trail =', str(trail)) print('\nI am smaller!') print('index (as in D_k U {gihat} =', str(ig)) print('distance:', dhat, '->', dnew) self.__num_updates += 1 elif dnew == dhat: if self._verbose >= 2: print('I am equal!') dhat = dnew gnew = gtemp.copy() found = True # found better graph. if found: r = 0 gihat_list = [gnew] dihat_list = [dhat] else: r += 1 dis_of_each_itr.append(dhat) self.__itrs += 1 if self._verbose >= 2: print('Total number of iterations is', self.__itrs) print('The preimage is updated', self.__num_updates, 'times.') print('The shortest distances for previous iterations are', dis_of_each_itr) # get results and print. end_generate_preimage = time.time() self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm self.__runtime_total = end_generate_preimage - start self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) self.__k_dis_preimage = dhat if self._verbose: print() print('=============================================================================') print('Finished generalization of preimages.') print('-----------------------------------------------------------------------------') print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) print('Distance in kernel space for the preimage:', self.__k_dis_preimage) print('Total number of iterations for optimizing:', self.__itrs) print('Total number of updating preimage:', self.__num_updates) print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) print('Time to generate pre-images:', self.__runtime_generate_preimage) print('Total time:', self.__runtime_total) print('=============================================================================') print() def get_results(self): results = {} results['runtime_precompute_gm'] = self.__runtime_precompute_gm results['runtime_generate_preimage'] = self.__runtime_generate_preimage results['runtime_total'] = self.__runtime_total results['k_dis_dataset'] = self.__k_dis_dataset results['k_dis_preimage'] = self.__k_dis_preimage results['itrs'] = self.__itrs results['num_updates'] = self.__num_updates return results def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): # if self.__state == AlgorithmState.TERMINATED: # self.__state = AlgorithmState.INITIALIZED return True return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) @property def preimage(self): return self.__preimage @property def best_from_dataset(self): return self.__best_from_dataset @property def gram_matrix_unnorm(self): return self.__gram_matrix_unnorm @gram_matrix_unnorm.setter def gram_matrix_unnorm(self, value): self.__gram_matrix_unnorm = value