You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_vertex_differs_rw.py 5.0 kB

5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Jan 8 15:22:35 2019
  5. @author: ljia
  6. """
  7. import sys
  8. import numpy as np
  9. import networkx as nx
  10. sys.path.insert(0, "../../")
  11. from gklearn.utils.graphfiles import loadDataset
  12. from gklearn.utils.model_selection_precomputed import compute_gram_matrices
  13. from sklearn.model_selection import ParameterGrid
  14. from libs import *
  15. import multiprocessing
  16. import functools
  17. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  18. dslist = [
  19. # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  20. # 'task': 'regression'}, # node symb
  21. # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
  22. # 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
  23. # # contains single node graph, node symb
  24. # {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
  25. {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds', }, # unlabeled
  26. {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
  27. 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
  28. # {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
  29. # # node nsymb
  30. {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
  31. # node symb/nsymb
  32. ]
  33. def run_ms(dataset, y, ds):
  34. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  35. estimator = randomwalkkernel
  36. param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
  37. {'alpha': np.logspace(-10, 10, num=41, base=10)}]
  38. ave_time = {}
  39. std_time = {}
  40. for compute_method in ['sylvester', 'conjugate', 'fp', 'spectral']:
  41. if compute_method == 'sylvester':
  42. param_grid_precomputed = {'compute_method': ['sylvester'],
  43. # 'weight': np.linspace(0.01, 0.10, 10)}
  44. 'weight': np.logspace(-1, -10, num=10, base=10)}
  45. elif compute_method == 'conjugate':
  46. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  47. param_grid_precomputed = {'compute_method': ['conjugate'],
  48. 'node_kernels':
  49. [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
  50. 'edge_kernels':
  51. [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
  52. 'weight': np.logspace(-1, -10, num=10, base=10)}
  53. elif compute_method == 'fp':
  54. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  55. param_grid_precomputed = {'compute_method': ['fp'],
  56. 'node_kernels':
  57. [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
  58. 'edge_kernels':
  59. [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}],
  60. 'weight': np.logspace(-3, -10, num=8, base=10)}
  61. elif compute_method == 'spectral':
  62. param_grid_precomputed = {'compute_method': ['spectral'],
  63. 'weight': np.logspace(-1, -10, num=10, base=10),
  64. 'sub_kernel': ['geo', 'exp']}
  65. _, gram_matrix_time, _, _, _ = compute_gram_matrices(
  66. dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
  67. '../../notebooks/results/' + estimator.__name__, ds['name'],
  68. n_jobs=multiprocessing.cpu_count(), verbose=False)
  69. average_gram_matrix_time = np.mean(gram_matrix_time)
  70. std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
  71. print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
  72. .format(average_gram_matrix_time, std_gram_matrix_time))
  73. ave_time[compute_method] = average_gram_matrix_time
  74. std_time[compute_method] = std_gram_matrix_time
  75. print()
  76. return ave_time, std_time
  77. for ds in dslist:
  78. print()
  79. print(ds['name'])
  80. Gn, y_all = loadDataset(
  81. ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
  82. extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
  83. vn_list = [nx.number_of_nodes(g) for g in Gn]
  84. idx_sorted = np.argsort(vn_list)
  85. vn_list.sort()
  86. Gn = [Gn[idx] for idx in idx_sorted]
  87. y_all = [y_all[idx] for idx in idx_sorted]
  88. len_1piece = int(len(Gn) / 5)
  89. ave_time = []
  90. std_time = []
  91. for piece in range(0, 5):
  92. print('piece', str(piece), ':')
  93. Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
  94. y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
  95. avet, stdt = run_ms(Gn_p, y_all_p, ds)
  96. ave_time.append(avet)
  97. std_time.append(stdt)
  98. print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
  99. '\nthe time std is \n', std_time)
  100. print()

A Python package for graph kernels, graph edit distances and graph pre-image problem.