|
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Thu Oct 17 19:05:07 2019
-
- Useful functions.
- @author: ljia
- """
- #import networkx as nx
-
- import multiprocessing
- import numpy as np
-
- import sys
- sys.path.insert(0, "../")
- from pygraph.kernels.marginalizedKernel import marginalizedkernel
- from pygraph.kernels.untilHPathKernel import untilhpathkernel
- from pygraph.kernels.spKernel import spkernel
- import functools
- from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct, polynomialkernel
- from pygraph.kernels.structuralspKernel import structuralspkernel
- from pygraph.kernels.treeletKernel import treeletkernel
- from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
-
-
- def remove_edges(Gn):
- for G in Gn:
- for _, _, attrs in G.edges(data=True):
- attrs.clear()
-
- def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
- term1 = Kmatrix[idx_g, idx_g]
- term2 = 0
- for i, a in enumerate(alpha):
- term2 += a * Kmatrix[idx_g, idx_gi[i]]
- term2 *= 2
- if withterm3 == False:
- for i1, a1 in enumerate(alpha):
- for i2, a2 in enumerate(alpha):
- term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
- return np.sqrt(term1 - term2 + term3)
-
-
- def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
- if graph_kernel == 'marginalizedkernel':
- Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
- p_quit=0.03, n_iteration=10, remove_totters=False,
- n_jobs=multiprocessing.cpu_count(), verbose=verbose)
- elif graph_kernel == 'untilhpathkernel':
- Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
- depth=7, k_func='MinMax', compute_method='trie',
- n_jobs=multiprocessing.cpu_count(), verbose=verbose)
- elif graph_kernel == 'spkernel':
- mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
- Kmatrix, _, _ = spkernel(Gn, node_label=node_label, node_kernels=
- {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
- n_jobs=multiprocessing.cpu_count(), verbose=verbose)
- elif graph_kernel == 'structuralspkernel':
- mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
- Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, node_kernels=
- {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
- n_jobs=multiprocessing.cpu_count(), verbose=verbose)
- elif graph_kernel == 'treeletkernel':
- # pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
- pkernel = functools.partial(gaussiankernel, gamma=1e-6)
- mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
- Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
- sub_kernel=pkernel,
- n_jobs=multiprocessing.cpu_count(), verbose=verbose)
- elif graph_kernel == 'weisfeilerlehmankernel':
- Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
- height=4, base_kernel='subtree',
- n_jobs=multiprocessing.cpu_count(), verbose=verbose)
-
- # normalization
- Kmatrix_diag = Kmatrix.diagonal().copy()
- for i in range(len(Kmatrix)):
- for j in range(i, len(Kmatrix)):
- Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
- Kmatrix[j][i] = Kmatrix[i][j]
- return Kmatrix
-
-
- def gram2distances(Kmatrix):
- dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
- for i1 in range(len(Kmatrix)):
- for i2 in range(len(Kmatrix)):
- dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
- dmatrix = np.sqrt(dmatrix)
- return dmatrix
-
-
- def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, gkernel=None):
- dis_mat = np.empty((len(Gn), len(Gn)))
- if Kmatrix is None:
- Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
- for i in range(len(Gn)):
- for j in range(i, len(Gn)):
- dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
- if dis < 0:
- if dis > -1e-10:
- dis = 0
- else:
- raise ValueError('The distance is negative.')
- dis_mat[i, j] = np.sqrt(dis)
- dis_mat[j, i] = dis_mat[i, j]
- dis_max = np.max(np.max(dis_mat))
- dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
- dis_mean = np.mean(np.mean(dis_mat))
- return dis_mat, dis_max, dis_min, dis_mean
-
-
- def get_same_item_indices(ls):
- """Get the indices of the same items in a list. Return a dict keyed by items.
- """
- idx_dict = {}
- for idx, item in enumerate(ls):
- if item in idx_dict:
- idx_dict[item].append(idx)
- else:
- idx_dict[item] = [idx]
- return idx_dict
-
-
- def k_nearest_neighbors_to_median_in_kernel_space(Gn, Kmatrix=None, gkernel=None,
- node_label=None, edge_label=None):
- dis_k_all = [] # distance between g_star and each graph.
- alpha = [1 / len(Gn)] * len(Gn)
- if Kmatrix is None:
- Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
- term3 = 0
- for i1, a1 in enumerate(alpha):
- for i2, a2 in enumerate(alpha):
- term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
- for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
- dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
- dis_all.append(dtemp)
-
-
- def normalize_distance_matrix(D):
- max_value = np.amax(D)
- min_value = np.amin(D)
- return (D - min_value) / (max_value - min_value)
|