|
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Wed Mar 6 16:03:11 2019
-
- pre-image
- @author: ljia
- """
-
- import sys
- import numpy as np
- import random
- from tqdm import tqdm
- import networkx as nx
- import matplotlib.pyplot as plt
-
- from gklearn.preimage.utils import compute_kernel, dis_gstar
-
-
- def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
- Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
-
- # compute k nearest neighbors of phi in DN.
- dis_list = [] # distance between g_star and each graph.
- term3 = 0
- for i1, a1 in enumerate(alpha):
- for i2, a2 in enumerate(alpha):
- term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
- for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
- dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
- dis_list.append(dtemp)
- # print(np.max(dis_list))
- # print(np.min(dis_list))
- # print(np.min([item for item in dis_list if item != 0]))
- # print(np.mean(dis_list))
-
- # sort
- sort_idx = np.argsort(dis_list)
- dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
- nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
- g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
- if dis_gs[0] == 0: # the exact pre-image.
- print('The exact pre-image is found from the input dataset.')
- return 0, g0hat_list[0], 0
- dhat = dis_gs[0] # the nearest distance
- # ghat_list = [g.copy() for g in g0hat_list]
- # for g in ghat_list:
- # draw_Letter_graph(g)
- # nx.draw_networkx(g)
- # plt.show()
- # print(g.nodes(data=True))
- # print(g.edges(data=True))
- Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
- # for gi in Gk:
- ## nx.draw_networkx(gi)
- ## plt.show()
- # draw_Letter_graph(g)
- # print(gi.nodes(data=True))
- # print(gi.edges(data=True))
- Gs_nearest = [g.copy() for g in Gk]
- gihat_list = []
- dihat_list = []
-
- # i = 1
- r = 0
- # sod_list = [dhat]
- # found = False
- dis_of_each_itr = [dhat]
- nb_updated = 0
- g_best = []
- while r < r_max:
- print('\nr =', r)
- print('itr for gk =', nb_updated, '\n')
- found = False
- dis_bests = dis_gs + dihat_list
- # @todo what if the log is negetive? how to choose alpha (scalar)?
- fdgs_list = np.array(dis_bests)
- if np.min(fdgs_list) < 1:
- fdgs_list /= np.min(dis_bests)
- fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
- if np.min(fdgs_list) < 1:
- fdgs_list = np.array(fdgs_list) + 1
-
- for ig, gs in enumerate(Gs_nearest + gihat_list):
- # nx.draw_networkx(gs)
- # plt.show()
- for trail in range(0, l):
- # for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
- # add and delete edges.
- gtemp = gs.copy()
- np.random.seed()
- # which edges to change.
- # @todo: should we use just half of the adjacency matrix for undirected graphs?
- nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
- # @todo: what if fdgs is bigger than nb_vpairs?
- idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if
- fdgs_list[ig] < nb_vpairs else nb_vpairs)
- # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
- # (nx.number_of_nodes(gs) - 1), fdgs)
- for item in idx_change:
- node1 = int(item / (nx.number_of_nodes(gs) - 1))
- node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
- if node2 >= node1: # skip the self pair.
- node2 += 1
- # @todo: is the randomness correct?
- if not gtemp.has_edge(node1, node2):
- gtemp.add_edge(node1, node2)
- # nx.draw_networkx(gs)
- # plt.show()
- # nx.draw_networkx(gtemp)
- # plt.show()
- else:
- gtemp.remove_edge(node1, node2)
- # nx.draw_networkx(gs)
- # plt.show()
- # nx.draw_networkx(gtemp)
- # plt.show()
- # nx.draw_networkx(gtemp)
- # plt.show()
-
- # compute distance between \psi and the new generated graph.
- # knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
- # p_quit=lmbda, n_iteration=20, remove_totters=False,
- # n_jobs=multiprocessing.cpu_count(), verbose=False)
- knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
- dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew,
- withterm3=False)
- if dnew <= dhat: # @todo: the new distance is smaller or also equal?
- if dnew < dhat:
- print('\nI am smaller!')
- print('ig =', str(ig), ', l =', str(trail))
- print(dhat, '->', dnew)
- nb_updated += 1
- elif dnew == dhat:
- print('I am equal!')
- # nx.draw_networkx(gtemp)
- # plt.show()
- # print(gtemp.nodes(data=True))
- # print(gtemp.edges(data=True))
- dhat = dnew
- gnew = gtemp.copy()
- found = True # found better graph.
- if found:
- r = 0
- gihat_list = [gnew]
- dihat_list = [dhat]
- else:
- r += 1
-
- dis_of_each_itr.append(dhat)
- print('the shortest distances for previous iterations are', dis_of_each_itr)
- # dis_best.append(dhat)
- g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
- print('distances in kernel space:', dis_of_each_itr, '\n')
-
- return dhat, g_best, nb_updated
- # return 0, 0, 0
-
-
- if __name__ == '__main__':
- from gklearn.utils.graphfiles import loadDataset
-
- # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
- # 'extra_params': {}} # node/edge symb
- ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
- 'extra_params': {}} # node nsymb
- # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
- # 'extra_params': {}}
- # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
- # 'extra_params': {}} # node symb
-
- DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- #DN = DN[0:10]
-
- lmbda = 0.03 # termination probalility
- r_max = 3 # 10 # iteration limit.
- l = 500
- alpha_range = np.linspace(0.5, 0.5, 1)
- #alpha_range = np.linspace(0.1, 0.9, 9)
- k = 10 # 5 # k nearest neighbors
-
- # randomly select two molecules
- #np.random.seed(1)
- #idx1, idx2 = np.random.randint(0, len(DN), 2)
- #g1 = DN[idx1]
- #g2 = DN[idx2]
- idx1 = 0
- idx2 = 6
- g1 = DN[idx1]
- g2 = DN[idx2]
-
- # compute
- k_list = [] # kernel between each graph and itself.
- k_g1_list = [] # kernel between each graph and g1
- k_g2_list = [] # kernel between each graph and g2
- for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout):
- # ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
- # p_quit=lmbda, n_iteration=20, remove_totters=False,
- # n_jobs=multiprocessing.cpu_count(), verbose=False)
- ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False)
- k_list.append(ktemp[0, 0])
- k_g1_list.append(ktemp[0, 1])
- k_g2_list.append(ktemp[0, 2])
-
- g_best = []
- dis_best = []
- # for each alpha
- for alpha in alpha_range:
- print('alpha =', alpha)
- # compute k nearest neighbors of phi in DN.
- dis_list = [] # distance between g_star and each graph.
- for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout):
- dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
- k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
- (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
- k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
- dis_list.append(np.sqrt(dtemp))
-
- # sort
- sort_idx = np.argsort(dis_list)
- dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
- g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN
- if dis_gs[0] == 0: # the exact pre-image.
- print('The exact pre-image is found from the input dataset.')
- g_pimg = g0hat
- break
- dhat = dis_gs[0] # the nearest distance
- Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
- gihat_list = []
-
- i = 1
- r = 1
- while r < r_max:
- print('r =', r)
- found = False
- for ig, gs in enumerate(Dk + gihat_list):
- # nx.draw_networkx(gs)
- # plt.show()
- # @todo what if the log is negetive?
- fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig]))))
- for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout):
- # add and delete edges.
- gtemp = gs.copy()
- np.random.seed()
- # which edges to change.
- # @todo: should we use just half of the adjacency matrix for undirected graphs?
- nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
- # @todo: what if fdgs is bigger than nb_vpairs?
- idx_change = random.sample(range(nb_vpairs), fdgs if fdgs < nb_vpairs else nb_vpairs)
- # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
- # (nx.number_of_nodes(gs) - 1), fdgs)
- for item in idx_change:
- node1 = int(item / (nx.number_of_nodes(gs) - 1))
- node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
- if node2 >= node1: # skip the self pair.
- node2 += 1
- # @todo: is the randomness correct?
- if not gtemp.has_edge(node1, node2):
- # @todo: how to update the bond_type? 0 or 1?
- gtemp.add_edges_from([(node1, node2, {'bond_type': 1})])
- # nx.draw_networkx(gs)
- # plt.show()
- # nx.draw_networkx(gtemp)
- # plt.show()
- else:
- gtemp.remove_edge(node1, node2)
- # nx.draw_networkx(gs)
- # plt.show()
- # nx.draw_networkx(gtemp)
- # plt.show()
- # nx.draw_networkx(gtemp)
- # plt.show()
-
- # compute distance between phi and the new generated graph.
- # knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
- # p_quit=lmbda, n_iteration=20, remove_totters=False,
- # n_jobs=multiprocessing.cpu_count(), verbose=False)
- knew = compute_kernel([gtemp, g1, g2], 'untilhpathkernel', verbose=False)
- dnew = np.sqrt(knew[0, 0] - 2 * (alpha * knew[0, 1] + (1 - alpha) *
- knew[0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
- (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
- k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]))
- if dnew < dhat: # @todo: the new distance is smaller or also equal?
- print('I am smaller!')
- print(dhat, '->', dnew)
- nx.draw_networkx(gtemp)
- plt.show()
- print(gtemp.nodes(data=True))
- print(gtemp.edges(data=True))
- dhat = dnew
- gnew = gtemp.copy()
- found = True # found better graph.
- r = 0
- elif dnew == dhat:
- print('I am equal!')
- if found:
- gihat_list = [gnew]
- dis_gs.append(dhat)
- else:
- r += 1
- dis_best.append(dhat)
- g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list)
-
-
- for idx, item in enumerate(alpha_range):
- print('when alpha is', item, 'the shortest distance is', dis_best[idx])
- print('the corresponding pre-image is')
- nx.draw_networkx(g_best[idx])
- plt.show()
|