You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

spKernel.py 3.6 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. """
  2. @author: linlin
  3. @references: Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  4. """
  5. import sys
  6. import pathlib
  7. sys.path.insert(0, "../")
  8. from tqdm import tqdm
  9. import time
  10. import networkx as nx
  11. import numpy as np
  12. from pygraph.utils.utils import getSPGraph
  13. from pygraph.utils.graphdataset import get_dataset_attributes
  14. def spkernel(*args, node_label='atom', edge_weight=None):
  15. """Calculate shortest-path kernels between graphs.
  16. Parameters
  17. ----------
  18. Gn : List of NetworkX graph
  19. List of graphs between which the kernels are calculated.
  20. /
  21. G1, G2 : NetworkX graphs
  22. 2 graphs between which the kernel is calculated.
  23. edge_weight : string
  24. Edge attribute corresponding to the edge weight.
  25. Return
  26. ------
  27. Kmatrix : Numpy matrix
  28. Kernel matrix, each element of which is the sp kernel between 2 praphs.
  29. """
  30. Gn = args[0] if len(args) == 1 else [args[0], args[1]]
  31. Kmatrix = np.zeros((len(Gn), len(Gn)))
  32. try:
  33. some_weight = list(
  34. nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
  35. weight = edge_label if isinstance(some_weight, float) or isinstance(
  36. some_weight, int) else None
  37. except:
  38. weight = None
  39. ds_attrs = get_dataset_attributes(
  40. Gn, attr_names=['node_labeled'], node_label=node_label)
  41. start_time = time.time()
  42. # get shortest path graphs of Gn
  43. Gn = [
  44. getSPGraph(G, edge_weight=edge_weight)
  45. for G in tqdm(Gn, desc='getting sp graphs', file=sys.stdout)
  46. ]
  47. pbar = tqdm(
  48. total=((len(Gn) + 1) * len(Gn) / 2),
  49. desc='calculating kernels',
  50. file=sys.stdout)
  51. if ds_attrs['node_labeled']:
  52. for i in range(0, len(Gn)):
  53. for j in range(i, len(Gn)):
  54. for e1 in Gn[i].edges(data=True):
  55. for e2 in Gn[j].edges(data=True):
  56. # cost of a node to itself equals to 0, cost between two disconnected nodes is Inf.
  57. if e1[2]['cost'] != 0 and e1[2] != np.Inf and e1[2]['cost'] == e2[2]['cost'] and {
  58. Gn[i].nodes[e1[0]][node_label],
  59. Gn[i].nodes[e1[1]][node_label]
  60. } == {
  61. Gn[j].nodes[e2[0]][node_label],
  62. Gn[j].nodes[e2[1]][node_label]
  63. }:
  64. Kmatrix[i][j] += 1
  65. Kmatrix[j][i] = Kmatrix[i][j]
  66. pbar.update(1)
  67. else:
  68. for i in range(0, len(Gn)):
  69. for j in range(i, len(Gn)):
  70. # kernel_t = [ e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])) \
  71. # for e1 in Sn[i].edges(data = True) for e2 in Sn[j].edges(data = True) ]
  72. # Kmatrix[i][j] = np.sum(kernel_t)
  73. # Kmatrix[j][i] = Kmatrix[i][j]
  74. for e1 in Gn[i].edges(data=True):
  75. for e2 in Gn[j].edges(data=True):
  76. if e1[2]['cost'] != 0 and e1[2] != np.Inf and e1[2]['cost'] == e2[2]['cost']:
  77. Kmatrix[i][j] += 1
  78. Kmatrix[j][i] = Kmatrix[i][j]
  79. pbar.update(1)
  80. run_time = time.time() - start_time
  81. print(
  82. "--- shortest path kernel matrix of size %d built in %s seconds ---" %
  83. (len(Gn), run_time))
  84. return Kmatrix, run_time

A Python package for graph kernels, graph edit distances and graph pre-image problem.