You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_degree_differs_ma.py 3.0 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Jan 8 17:43:38 2019
  5. @author: ljia
  6. """
  7. import sys
  8. import numpy as np
  9. import networkx as nx
  10. sys.path.insert(0, "../../")
  11. from gklearn.utils.graphfiles import loadDataset
  12. from gklearn.utils.model_selection_precomputed import compute_gram_matrices
  13. from sklearn.model_selection import ParameterGrid
  14. from libs import *
  15. import multiprocessing
  16. dslist = [
  17. {'name': 'ENZYMES', 'dataset': '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
  18. # node symb/nsymb
  19. ]
  20. def run_ms(dataset, y, ds):
  21. from gklearn.kernels.marginalizedKernel import marginalizedkernel
  22. estimator = marginalizedkernel
  23. #param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.3, 3),
  24. # 'n_iteration': np.linspace(1, 1, 1),
  25. param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9),
  26. 'n_iteration': np.linspace(1, 19, 7),
  27. 'remove_totters': [False]}
  28. param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)},
  29. {'alpha': np.logspace(-10, 10, num=41, base=10)}]
  30. _, gram_matrix_time, _, _, _ = compute_gram_matrices(
  31. dataset, y, estimator, list(ParameterGrid(param_grid_precomputed)),
  32. '../../notebooks/results/' + estimator.__name__, ds['name'],
  33. n_jobs=multiprocessing.cpu_count(), verbose=False)
  34. average_gram_matrix_time = np.mean(gram_matrix_time)
  35. std_gram_matrix_time = np.std(gram_matrix_time, ddof=1)
  36. print('\n***** time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
  37. .format(average_gram_matrix_time, std_gram_matrix_time))
  38. print()
  39. return average_gram_matrix_time, std_gram_matrix_time
  40. for ds in dslist:
  41. print()
  42. print(ds['name'])
  43. Gn, y_all = loadDataset(
  44. ds['dataset'], filename_y=(ds['dataset_y'] if 'dataset_y' in ds else None),
  45. extra_params=(ds['extra_params'] if 'extra_params' in ds else None))
  46. degree_list = [np.mean(list(dict(g.degree()).values())) for g in Gn]
  47. idx_sorted = np.argsort(degree_list)
  48. degree_list.sort()
  49. Gn = [Gn[idx] for idx in idx_sorted]
  50. y_all = [y_all[idx] for idx in idx_sorted]
  51. len_1piece = int(len(Gn) / 5)
  52. ave_time = []
  53. std_time = []
  54. ave_degree = []
  55. for piece in range(0, 5):
  56. print('piece', str(piece), ':')
  57. Gn_p = Gn[len_1piece * piece:len_1piece * (piece + 1)]
  58. y_all_p = y_all[len_1piece * piece:len_1piece * (piece + 1)]
  59. aved = np.mean(degree_list[len_1piece * piece:len_1piece * (piece + 1)])
  60. ave_degree.append(aved)
  61. # print(np.mean([nx.number_of_nodes(g) for g in Gn_p]))
  62. avet, stdt = run_ms(Gn_p, y_all_p, ds)
  63. ave_time.append(avet)
  64. std_time.append(stdt)
  65. print('\n****** for dataset', ds['name'], ', the average time is \n', ave_time,
  66. '\nthe time std is \n', std_time)
  67. print('corresponding average vertex degrees are', ave_degree)
  68. print()

A Python package for graph kernels, graph edit distances and graph pre-image problem.