#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Apr 30 17:07:43 2019 A graph pre-image method combining iterative pre-image method in reference [1] and the iterative alternate minimizations (IAM) in reference [2]. @author: ljia @references: [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004. [2] Generalized median graph via iterative alternate minimization. """ import sys import numpy as np import multiprocessing from tqdm import tqdm import networkx as nx import matplotlib.pyplot as plt from iam import iam, test_iam_with_more_graphs_as_init, test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations sys.path.insert(0, "../") from pygraph.kernels.marginalizedKernel import marginalizedkernel from pygraph.kernels.untilHPathKernel import untilhpathkernel def gk_iam(Gn, alpha): """This function constructs graph pre-image by the iterative pre-image framework in reference [1], algorithm 1, where the step of generating new graphs randomly is replaced by the IAM algorithm in reference [2]. notes ----- Every time a better graph is acquired, the older one is replaced by it. """ pass # # compute k nearest neighbors of phi in DN. # dis_list = [] # distance between g_star and each graph. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) # dis_list.append(dtemp) # # # sort # sort_idx = np.argsort(dis_list) # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN # if dis_gs[0] == 0: # the exact pre-image. # print('The exact pre-image is found from the input dataset.') # return 0, g0hat # dhat = dis_gs[0] # the nearest distance # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors # gihat_list = [] # ## i = 1 # r = 1 # while r < r_max: # print('r =', r) ## found = False # Gs_nearest = Gk + gihat_list # g_tmp = iam(Gs_nearest) # # # compute distance between phi and the new generated graph. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, # p_quit=lmbda, n_iteration=20, remove_totters=False, # n_jobs=multiprocessing.cpu_count(), verbose=False) # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) # if dnew <= dhat: # the new distance is smaller # print('I am smaller!') # dhat = dnew # g_new = g_tmp.copy() # found better graph. # gihat_list = [g_new] # dis_gs.append(dhat) # r = 0 # else: # r += 1 # # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list) # # return dhat, ghat def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): """This function constructs graph pre-image by the iterative pre-image framework in reference [1], algorithm 1, where the step of generating new graphs randomly is replaced by the IAM algorithm in reference [2]. notes ----- Every time a better graph is acquired, its distance in kernel space is compared with the k nearest ones, and the k nearest distances from the k+1 distances will be used as the new ones. """ # compute k nearest neighbors of phi in DN. dis_list = [] # distance between g_star and each graph. for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * # k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * # (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * # k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) dis_list.append(dtemp) # sort sort_idx = np.argsort(dis_list) dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN if dis_gs[0] == 0: # the exact pre-image. print('The exact pre-image is found from the input dataset.') return 0, g0hat dhat = dis_gs[0] # the nearest distance ghat = g0hat.copy() Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors for gi in Gk: nx.draw_networkx(gi) plt.show() Gs_nearest = Gk.copy() # gihat_list = [] # i = 1 r = 1 while r < r_max: print('r =', r) # found = False # Gs_nearest = Gk + gihat_list # g_tmp = iam(Gs_nearest) g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1) nx.draw_networkx(g_tmp) plt.show() # compute distance between phi and the new generated graph. gi_list = [Gn[i] for i in idx_gi] knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False) dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew) # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) if dnew <= dhat and g_tmp != ghat: # the new distance is smaller print('I am smaller!') print(str(dhat) + '->' + str(dnew)) # nx.draw_networkx(ghat) # plt.show() # print('->') # nx.draw_networkx(g_tmp) # plt.show() dhat = dnew g_new = g_tmp.copy() # found better graph. ghat = g_tmp.copy() dis_gs.append(dhat) # add the new nearest distance. Gs_nearest.append(g_new) # add the corresponding graph. sort_idx = np.argsort(dis_gs) dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] r = 0 else: r += 1 return dhat, ghat def dis_gstar(idx_g, idx_gi, alpha, Kmatrix): term1 = Kmatrix[idx_g, idx_g] term2 = 0 for i, a in enumerate(alpha): term2 += a * Kmatrix[idx_g, idx_gi[i]] term2 *= 2 term3 = 0 for i1, a1 in enumerate(alpha): for i2, a2 in enumerate(alpha): term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] return np.sqrt(term1 - term2 + term3) def compute_kernel(Gn, graph_kernel, verbose): if graph_kernel == 'marginalizedkernel': Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, p_quit=0.3, n_iteration=19, remove_totters=False, n_jobs=multiprocessing.cpu_count(), verbose=verbose) elif graph_kernel == 'untilhpathkernel': Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type', depth=2, k_func='MinMax', compute_method='trie', n_jobs=multiprocessing.cpu_count(), verbose=verbose) # normalization Kmatrix_diag = Kmatrix.diagonal().copy() for i in range(len(Kmatrix)): for j in range(i, len(Kmatrix)): Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) Kmatrix[j][i] = Kmatrix[i][j] return Kmatrix def gram2distances(Kmatrix): dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) for i1 in range(len(Kmatrix)): for i2 in range(len(Kmatrix)): dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] dmatrix = np.sqrt(dmatrix) return dmatrix # --------------------------- These are tests --------------------------------# def test_who_is_the_closest_in_kernel_space(Gn): idx_gi = [0, 6] g1 = Gn[idx_gi[0]] g2 = Gn[idx_gi[1]] # create the "median" graph. gnew = g2.copy() gnew.remove_node(0) nx.draw_networkx(gnew) plt.show() print(gnew.nodes(data=True)) Gn = [gnew] + Gn # compute gram matrix Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True) # the distance matrix dmatrix = gram2distances(Kmatrix) print(np.sort(dmatrix[idx_gi[0] + 1])) print(np.argsort(dmatrix[idx_gi[0] + 1])) print(np.sort(dmatrix[idx_gi[1] + 1])) print(np.argsort(dmatrix[idx_gi[1] + 1])) # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2 dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))] print(np.sort(dis_median)) print(np.argsort(dis_median)) return def test_who_is_the_closest_in_GED_space(Gn): from iam import GED idx_gi = [0, 6] g1 = Gn[idx_gi[0]] g2 = Gn[idx_gi[1]] # create the "median" graph. gnew = g2.copy() gnew.remove_node(0) nx.draw_networkx(gnew) plt.show() print(gnew.nodes(data=True)) Gn = [gnew] + Gn # compute GEDs ged_matrix = np.zeros((len(Gn), len(Gn))) for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): for i2 in range(len(Gn)): dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib') ged_matrix[i1, i2] = dis print(np.sort(ged_matrix[idx_gi[0] + 1])) print(np.argsort(ged_matrix[idx_gi[0] + 1])) print(np.sort(ged_matrix[idx_gi[1] + 1])) print(np.argsort(ged_matrix[idx_gi[1] + 1])) # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2 dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))] print(np.sort(dis_median)) print(np.argsort(dis_median)) return def test_will_IAM_give_the_median_graph_we_wanted(Gn): idx_gi = [0, 6] g1 = Gn[idx_gi[0]].copy() g2 = Gn[idx_gi[1]].copy() # del Gn[idx_gi[0]] # del Gn[idx_gi[1] - 1] g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1) # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1) nx.draw_networkx(g_median) plt.show() print(g_median.nodes(data=True)) print(g_median.edges(data=True)) def test_new_IAM_allGraph_deleteNodes(Gn): idx_gi = [0, 6] # g1 = Gn[idx_gi[0]].copy() # g2 = Gn[idx_gi[1]].copy() g1 = nx.Graph(name='haha') g1.add_nodes_from([(2, {'atom': 'C'}), (3, {'atom': 'O'}), (4, {'atom': 'C'})]) g1.add_edges_from([(2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})]) g2 = nx.Graph(name='hahaha') g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}), (3, {'atom': 'O'}), (4, {'atom': 'C'})]) g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}), (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})]) # g2 = g1.copy() # g2.add_nodes_from([(3, {'atom': 'O'})]) # g2.add_nodes_from([(4, {'atom': 'C'})]) # g2.add_edges_from([(1, 3, {'bond_type': '1'})]) # g2.add_edges_from([(3, 4, {'bond_type': '1'})]) # del Gn[idx_gi[0]] # del Gn[idx_gi[1] - 1] g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1) # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1) nx.draw_networkx(g_median) plt.show() print(g_median.nodes(data=True)) print(g_median.edges(data=True)) if __name__ == '__main__': from pygraph.utils.graphfiles import loadDataset # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', # 'extra_params': {}} # node nsymb # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', # 'extra_params': {}} ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', 'extra_params': {}} # node symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) # Gn = Gn[0:20] test_new_IAM_allGraph_deleteNodes(Gn) test_will_IAM_give_the_median_graph_we_wanted(Gn) test_who_is_the_closest_in_GED_space(Gn) test_who_is_the_closest_in_kernel_space(Gn) lmbda = 0.03 # termination probalility r_max = 10 # recursions l = 500 alpha_range = np.linspace(0.5, 0.5, 1) k = 20 # k nearest neighbors # randomly select two molecules np.random.seed(1) idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2) g1 = Gn[idx_gi[0]] g2 = Gn[idx_gi[1]] # g_tmp = iam([g1, g2]) # nx.draw_networkx(g_tmp) # plt.show() # compute # k_list = [] # kernel between each graph and itself. # k_g1_list = [] # kernel between each graph and g1 # k_g2_list = [] # kernel between each graph and g2 # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout): # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False) # k_list.append(ktemp[0][0, 0]) # k_g1_list.append(ktemp[0][0, 1]) # k_g2_list.append(ktemp[0][0, 2]) km = compute_kernel(Gn, 'untilhpathkernel', True) # k_list = np.diag(km) # kernel between each graph and itself. # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1 # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2 g_best = [] dis_best = [] # for each alpha for alpha in alpha_range: print('alpha =', alpha) dhat, ghat = gk_iam_nearest(Gn, [alpha, 1 - alpha], idx_gi, km, k, r_max) dis_best.append(dhat) g_best.append(ghat) for idx, item in enumerate(alpha_range): print('when alpha is', item, 'the shortest distance is', dis_best[idx]) print('the corresponding pre-image is') nx.draw_networkx(g_best[idx]) plt.show()