You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

randomwalkKernel.py 3.5 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. """
  2. @author: linlin
  3. @references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  4. """
  5. import sys
  6. import pathlib
  7. sys.path.insert(0, "../")
  8. import time
  9. # from collections import Counter
  10. import networkx as nx
  11. import numpy as np
  12. def randomwalkkernel(*args, node_label='atom', edge_label='bond_type', labeled=True, n=10, method=''):
  13. """Calculate random walk graph kernels.
  14. Parameters
  15. ----------
  16. Gn : List of NetworkX graph
  17. List of graphs between which the kernels are calculated.
  18. /
  19. G1, G2 : NetworkX graphs
  20. 2 graphs between which the kernel is calculated.
  21. node_label : string
  22. node attribute used as label. The default node label is atom.
  23. edge_label : string
  24. edge attribute used as label. The default edge label is bond_type.
  25. labeled : boolean
  26. Whether the graphs are labeled. The default is True.
  27. n : integer
  28. Longest length of walks.
  29. method : string
  30. Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.
  31. Return
  32. ------
  33. Kmatrix : Numpy matrix
  34. Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
  35. """
  36. method = method.lower()
  37. Gn = args[0] if len(args) == 1 else [args[0], args[1]] # arrange all graphs in a list
  38. Kmatrix = np.zeros((len(Gn), len(Gn)))
  39. n = int(n)
  40. start_time = time.time()
  41. # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
  42. all_walks = [ find_all_walks_until_length(Gn[i], n, node_label = node_label, edge_label = edge_label, labeled = labeled) for i in range(0, len(Gn)) ]
  43. for i in range(0, len(Gn)):
  44. for j in range(i, len(Gn)):
  45. Kmatrix[i][j] = _randomwalkkernel_do(all_walks[i], all_walks[j], node_label = node_label, edge_label = edge_label, labeled = labeled)
  46. Kmatrix[j][i] = Kmatrix[i][j]
  47. run_time = time.time() - start_time
  48. print("\n --- kernel matrix of walk kernel up to %d of size %d built in %s seconds ---" % (n, len(Gn), run_time))
  49. return Kmatrix, run_time
  50. def _randomwalkkernel_do(walks1, walks2, node_label = 'atom', edge_label = 'bond_type', labeled = True, method=''):
  51. """Calculate walk graph kernels up to n between 2 graphs.
  52. Parameters
  53. ----------
  54. walks1, walks2 : list
  55. List of walks in 2 graphs, where for unlabeled graphs, each walk is represented by a list of nodes; while for labeled graphs, each walk is represented by a string consists of labels of nodes and edges on that walk.
  56. node_label : string
  57. node attribute used as label. The default node label is atom.
  58. edge_label : string
  59. edge attribute used as label. The default edge label is bond_type.
  60. labeled : boolean
  61. Whether the graphs are labeled. The default is True.
  62. Return
  63. ------
  64. kernel : float
  65. Treelet Kernel between 2 graphs.
  66. """
  67. if method == 'sylvester':
  68. import warnings
  69. warnings.warn('The Sylvester equation (rather than generalized Sylvester equation) is used; only walks of length 1 is considered.')
  70. from control import dlyap
  71. dpg = nx.tensor_product(G1, G2) # direct product graph
  72. X = dlyap(A, Q, C)
  73. pass
  74. else:
  75. raise Exception('No computation method specified.')
  76. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.