You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 6.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Oct 17 19:05:07 2019
  5. Useful functions.
  6. @author: ljia
  7. """
  8. #import networkx as nx
  9. import multiprocessing
  10. import numpy as np
  11. import sys
  12. sys.path.insert(0, "../")
  13. from pygraph.kernels.marginalizedKernel import marginalizedkernel
  14. from pygraph.kernels.untilHPathKernel import untilhpathkernel
  15. from pygraph.kernels.spKernel import spkernel
  16. import functools
  17. from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct, polynomialkernel
  18. from pygraph.kernels.structuralspKernel import structuralspkernel
  19. from pygraph.kernels.treeletKernel import treeletkernel
  20. from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
  21. def remove_edges(Gn):
  22. for G in Gn:
  23. for _, _, attrs in G.edges(data=True):
  24. attrs.clear()
  25. def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
  26. term1 = Kmatrix[idx_g, idx_g]
  27. term2 = 0
  28. for i, a in enumerate(alpha):
  29. term2 += a * Kmatrix[idx_g, idx_gi[i]]
  30. term2 *= 2
  31. if withterm3 == False:
  32. for i1, a1 in enumerate(alpha):
  33. for i2, a2 in enumerate(alpha):
  34. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  35. return np.sqrt(term1 - term2 + term3)
  36. def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
  37. if graph_kernel == 'marginalizedkernel':
  38. Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
  39. p_quit=0.03, n_iteration=10, remove_totters=False,
  40. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  41. elif graph_kernel == 'untilhpathkernel':
  42. Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
  43. depth=7, k_func='MinMax', compute_method='trie',
  44. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  45. elif graph_kernel == 'spkernel':
  46. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  47. Kmatrix, _, _ = spkernel(Gn, node_label=node_label, node_kernels=
  48. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  49. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  50. elif graph_kernel == 'structuralspkernel':
  51. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  52. Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, node_kernels=
  53. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  54. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  55. elif graph_kernel == 'treeletkernel':
  56. pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
  57. # pkernel = functools.partial(gaussiankernel, gamma=1e-6)
  58. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  59. Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
  60. sub_kernel=pkernel,
  61. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  62. elif graph_kernel == 'weisfeilerlehmankernel':
  63. Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
  64. height=4, base_kernel='subtree',
  65. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  66. # normalization
  67. Kmatrix_diag = Kmatrix.diagonal().copy()
  68. for i in range(len(Kmatrix)):
  69. for j in range(i, len(Kmatrix)):
  70. Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  71. Kmatrix[j][i] = Kmatrix[i][j]
  72. return Kmatrix
  73. def gram2distances(Kmatrix):
  74. dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
  75. for i1 in range(len(Kmatrix)):
  76. for i2 in range(len(Kmatrix)):
  77. dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
  78. dmatrix = np.sqrt(dmatrix)
  79. return dmatrix
  80. def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, gkernel=None):
  81. dis_mat = np.empty((len(Gn), len(Gn)))
  82. if Kmatrix is None:
  83. Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
  84. for i in range(len(Gn)):
  85. for j in range(i, len(Gn)):
  86. dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
  87. if dis < 0:
  88. if dis > -1e-10:
  89. dis = 0
  90. else:
  91. raise ValueError('The distance is negative.')
  92. dis_mat[i, j] = np.sqrt(dis)
  93. dis_mat[j, i] = dis_mat[i, j]
  94. dis_max = np.max(np.max(dis_mat))
  95. dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
  96. dis_mean = np.mean(np.mean(dis_mat))
  97. return dis_mat, dis_max, dis_min, dis_mean
  98. def get_same_item_indices(ls):
  99. """Get the indices of the same items in a list. Return a dict keyed by items.
  100. """
  101. idx_dict = {}
  102. for idx, item in enumerate(ls):
  103. if item in idx_dict:
  104. idx_dict[item].append(idx)
  105. else:
  106. idx_dict[item] = [idx]
  107. return idx_dict
  108. def k_nearest_neighbors_to_median_in_kernel_space(Gn, Kmatrix=None, gkernel=None,
  109. node_label=None, edge_label=None):
  110. dis_k_all = [] # distance between g_star and each graph.
  111. alpha = [1 / len(Gn)] * len(Gn)
  112. if Kmatrix is None:
  113. Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
  114. term3 = 0
  115. for i1, a1 in enumerate(alpha):
  116. for i2, a2 in enumerate(alpha):
  117. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  118. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  119. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  120. dis_all.append(dtemp)
  121. def normalize_distance_matrix(D):
  122. max_value = np.amax(D)
  123. min_value = np.amin(D)
  124. return (D - min_value) / (max_value - min_value)

A Python package for graph kernels, graph edit distances and graph pre-image problem.