You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 6.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Oct 17 19:05:07 2019
  5. Useful functions.
  6. @author: ljia
  7. """
  8. #import networkx as nx
  9. import multiprocessing
  10. import numpy as np
  11. from gklearn.kernels.marginalizedKernel import marginalizedkernel
  12. from gklearn.kernels.untilHPathKernel import untilhpathkernel
  13. from gklearn.kernels.spKernel import spkernel
  14. import functools
  15. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct, polynomialkernel
  16. from gklearn.kernels.structuralspKernel import structuralspkernel
  17. from gklearn.kernels.treeletKernel import treeletkernel
  18. from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
  19. def remove_edges(Gn):
  20. for G in Gn:
  21. for _, _, attrs in G.edges(data=True):
  22. attrs.clear()
  23. def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
  24. term1 = Kmatrix[idx_g, idx_g]
  25. term2 = 0
  26. for i, a in enumerate(alpha):
  27. term2 += a * Kmatrix[idx_g, idx_gi[i]]
  28. term2 *= 2
  29. if withterm3 == False:
  30. for i1, a1 in enumerate(alpha):
  31. for i2, a2 in enumerate(alpha):
  32. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  33. return np.sqrt(term1 - term2 + term3)
  34. def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'):
  35. if graph_kernel == 'marginalizedkernel':
  36. Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
  37. p_quit=0.03, n_iteration=10, remove_totters=False,
  38. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  39. elif graph_kernel == 'untilhpathkernel':
  40. Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
  41. depth=7, k_func='MinMax', compute_method='trie',
  42. parallel=parallel,
  43. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  44. elif graph_kernel == 'spkernel':
  45. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  46. Kmatrix = np.empty((len(Gn), len(Gn)))
  47. # Kmatrix[:] = np.nan
  48. Kmatrix, _, idx = spkernel(Gn, node_label=node_label, node_kernels=
  49. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  50. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  51. # for i, row in enumerate(idx):
  52. # for j, col in enumerate(idx):
  53. # Kmatrix[row, col] = Kmatrix_tmp[i, j]
  54. elif graph_kernel == 'structuralspkernel':
  55. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  56. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  57. Kmatrix, _ = structuralspkernel(Gn, node_label=node_label,
  58. edge_label=edge_label, node_kernels=sub_kernels,
  59. edge_kernels=sub_kernels,
  60. parallel=parallel, n_jobs=multiprocessing.cpu_count(),
  61. verbose=verbose)
  62. elif graph_kernel == 'treeletkernel':
  63. pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
  64. # pkernel = functools.partial(gaussiankernel, gamma=1e-6)
  65. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  66. Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
  67. sub_kernel=pkernel, parallel=parallel,
  68. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  69. elif graph_kernel == 'weisfeilerlehmankernel':
  70. Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
  71. height=4, base_kernel='subtree', parallel=None,
  72. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  73. # normalization
  74. Kmatrix_diag = Kmatrix.diagonal().copy()
  75. for i in range(len(Kmatrix)):
  76. for j in range(i, len(Kmatrix)):
  77. Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  78. Kmatrix[j][i] = Kmatrix[i][j]
  79. return Kmatrix
  80. def gram2distances(Kmatrix):
  81. dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
  82. for i1 in range(len(Kmatrix)):
  83. for i2 in range(len(Kmatrix)):
  84. dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
  85. dmatrix = np.sqrt(dmatrix)
  86. return dmatrix
  87. def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None,
  88. gkernel=None, verbose=True):
  89. dis_mat = np.empty((len(Gn), len(Gn)))
  90. if Kmatrix is None:
  91. Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, verbose)
  92. for i in range(len(Gn)):
  93. for j in range(i, len(Gn)):
  94. dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
  95. if dis < 0:
  96. if dis > -1e-10:
  97. dis = 0
  98. else:
  99. raise ValueError('The distance is negative.')
  100. dis_mat[i, j] = np.sqrt(dis)
  101. dis_mat[j, i] = dis_mat[i, j]
  102. dis_max = np.max(np.max(dis_mat))
  103. dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
  104. dis_mean = np.mean(np.mean(dis_mat))
  105. return dis_mat, dis_max, dis_min, dis_mean
  106. def get_same_item_indices(ls):
  107. """Get the indices of the same items in a list. Return a dict keyed by items.
  108. """
  109. idx_dict = {}
  110. for idx, item in enumerate(ls):
  111. if item in idx_dict:
  112. idx_dict[item].append(idx)
  113. else:
  114. idx_dict[item] = [idx]
  115. return idx_dict
  116. def k_nearest_neighbors_to_median_in_kernel_space(Gn, Kmatrix=None, gkernel=None,
  117. node_label=None, edge_label=None):
  118. dis_k_all = [] # distance between g_star and each graph.
  119. alpha = [1 / len(Gn)] * len(Gn)
  120. if Kmatrix is None:
  121. Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
  122. term3 = 0
  123. for i1, a1 in enumerate(alpha):
  124. for i2, a2 in enumerate(alpha):
  125. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  126. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  127. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  128. dis_all.append(dtemp)
  129. def normalize_distance_matrix(D):
  130. max_value = np.amax(D)
  131. min_value = np.amin(D)
  132. return (D - min_value) / (max_value - min_value)

A Python package for graph kernels, graph edit distances and graph pre-image problem.