You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

check_gm_gstsp.py 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. compute gm with load_data.py and test them.
  5. Created on Wed Sep 19 16:12:13 2018
  6. @author: ljia
  7. """
  8. """Shortest-Path graph kernel.
  9. Python implementation based on: "Shortest-path kernels on graphs", by
  10. Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE
  11. International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005
  12. doi: 10.1109/ICDM.2005.132
  13. Author : Sandro Vega-Pons, Emanuele Olivetti
  14. """
  15. import sys
  16. sys.path.insert(0, "../../")
  17. import numpy as np
  18. import networkx as nx
  19. from pygraph.utils.graphfiles import loadDataset
  20. import matplotlib.pyplot as plt
  21. from numpy.linalg import eig
  22. class GK_SP:
  23. """
  24. Shorthest path graph kernel.
  25. """
  26. def compare(self, g_1, g_2, verbose=False):
  27. """Compute the kernel value (similarity) between two graphs.
  28. Parameters
  29. ----------
  30. g1 : networkx.Graph
  31. First graph.
  32. g2 : networkx.Graph
  33. Second graph.
  34. Returns
  35. -------
  36. k : The similarity value between g1 and g2.
  37. """
  38. # Diagonal superior matrix of the floyd warshall shortest
  39. # paths:
  40. fwm1 = np.array(nx.floyd_warshall_numpy(g_1))
  41. fwm1 = np.where(fwm1 == np.inf, 0, fwm1)
  42. fwm1 = np.where(fwm1 == np.nan, 0, fwm1)
  43. fwm1 = np.triu(fwm1, k=1)
  44. bc1 = np.bincount(fwm1.reshape(-1).astype(int))
  45. fwm2 = np.array(nx.floyd_warshall_numpy(g_2))
  46. fwm2 = np.where(fwm2 == np.inf, 0, fwm2)
  47. fwm2 = np.where(fwm2 == np.nan, 0, fwm2)
  48. fwm2 = np.triu(fwm2, k=1)
  49. bc2 = np.bincount(fwm2.reshape(-1).astype(int))
  50. # Copy into arrays with the same length the non-zero shortests
  51. # paths:
  52. v1 = np.zeros(max(len(bc1), len(bc2)) - 1)
  53. v1[range(0, len(bc1)-1)] = bc1[1:]
  54. v2 = np.zeros(max(len(bc1), len(bc2)) - 1)
  55. v2[range(0, len(bc2)-1)] = bc2[1:]
  56. return np.sum(v1 * v2)
  57. def compare_normalized(self, g_1, g_2, verbose=False):
  58. """Compute the normalized kernel value between two graphs.
  59. A normalized version of the kernel is given by the equation:
  60. k_norm(g1, g2) = k(g1, g2) / sqrt(k(g1,g1) * k(g2,g2))
  61. Parameters
  62. ----------
  63. g1 : networkx.Graph
  64. First graph.
  65. g2 : networkx.Graph
  66. Second graph.
  67. Returns
  68. -------
  69. k : The similarity value between g1 and g2.
  70. """
  71. return self.compare(g_1, g_2) / (np.sqrt(self.compare(g_1, g_1) *
  72. self.compare(g_2, g_2)))
  73. def compare_list(self, graph_list, verbose=False):
  74. """Compute the all-pairs kernel values for a list of graphs.
  75. This function can be used to directly compute the kernel
  76. matrix for a list of graphs. The direct computation of the
  77. kernel matrix is faster than the computation of all individual
  78. pairwise kernel values.
  79. Parameters
  80. ----------
  81. graph_list: list
  82. A list of graphs (list of networkx graphs)
  83. Return
  84. ------
  85. K: numpy.array, shape = (len(graph_list), len(graph_list))
  86. The similarity matrix of all graphs in graph_list.
  87. """
  88. n = len(graph_list)
  89. k = np.zeros((n, n))
  90. for i in range(n):
  91. for j in range(i, n):
  92. k[i, j] = self.compare(graph_list[i], graph_list[j])
  93. k[j, i] = k[i, j]
  94. k_norm = np.zeros(k.shape)
  95. for i in range(k.shape[0]):
  96. for j in range(k.shape[1]):
  97. k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j])
  98. return k_norm
  99. ds_name = 'PAH'
  100. datafile = '../../datasets/PAH/dataset.ds'
  101. dataset, y = loadDataset(datafile, filename_y=None, extra_params=None)
  102. gk_sp = GK_SP()
  103. x = gk_sp.compare_list(dataset)
  104. np.savez('../check_gm/' + ds_name + '.gm.jstsp', gms=x)
  105. plt.imshow(x)
  106. plt.colorbar()
  107. plt.savefig('../check_gm/' + ds_name + '.gm.jstsp.eps', format='eps', dpi=300)
  108. # print(np.transpose(x))
  109. print('if symmetric: ', np.array_equal(x, np.transpose(x)))
  110. print('diag: ', np.diag(x))
  111. print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1))
  112. print('min, max diag: ', min(np.diag(x)), max(np.diag(x)))
  113. print('mean x: ', np.mean(np.mean(x)))
  114. [lamnda, v] = eig(x)
  115. print('min, max lambda: ', min(lamnda), max(lamnda))

A Python package for graph kernels, graph edit distances and graph pre-image problem.