You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

learning.py 3.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. from distances import euclid_d
  2. def split_data(D, y, train_index, test_index):
  3. D_app = [D[i] for i in train_index]
  4. D_test = [D[i] for i in test_index]
  5. y_app = [y[i] for i in train_index]
  6. y_test = [y[i] for i in test_index]
  7. return D_app, D_test, y_app, y_test
  8. def evaluate_D(D_app, y_app, D_test, y_test, mode='reg'):
  9. from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
  10. from distances import rmse, accuracy
  11. from sklearn.model_selection import GridSearchCV
  12. if (mode == 'reg'):
  13. knn = KNeighborsRegressor(metric='precomputed')
  14. scoring = 'neg_root_mean_squared_error'
  15. perf_eval = rmse
  16. else:
  17. knn = KNeighborsClassifier(metric='precomputed')
  18. scoring = 'accuracy'
  19. perf_eval = accuracy
  20. grid_params = {
  21. 'n_neighbors': [3, 5, 7, 9, 11]
  22. }
  23. clf = GridSearchCV(knn, param_grid=grid_params,
  24. scoring=scoring,
  25. cv=5, return_train_score=True, refit=True)
  26. clf.fit(D_app, y_app)
  27. y_pred_app = clf.predict(D_app)
  28. y_pred_test = clf.predict(D_test)
  29. return perf_eval(y_pred_app, y_app), perf_eval(y_pred_test, y_test), clf
  30. def xp_knn(Gn, y_all, y_distance=euclid_d,
  31. mode='reg', unlabeled=False, ed_method='BIPARTITE', **kwargs):
  32. '''
  33. Perform a knn regressor on given dataset
  34. '''
  35. from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
  36. from ged import compute_D_random, compute_D_expert
  37. from ged import compute_D_fitted
  38. stratified = False
  39. if mode == 'classif':
  40. stratified = True
  41. if stratified:
  42. rs = StratifiedShuffleSplit(n_splits=10, test_size=.1)
  43. else:
  44. rs = ShuffleSplit(n_splits=10, test_size=.1)
  45. if stratified:
  46. split_scheme = rs.split(Gn, y_all)
  47. else:
  48. split_scheme = rs.split(Gn)
  49. results = []
  50. i = 1
  51. for train_index, test_index in split_scheme:
  52. print()
  53. print("Split {0}/{1}".format(i, 10))
  54. i = i + 1
  55. cur_results = {}
  56. # Get splitted data
  57. G_app, G_test, y_app, y_test = split_data(Gn, y_all,
  58. train_index, test_index)
  59. cur_results['y_app'] = y_app
  60. cur_results['y_test'] = y_test
  61. # Feed distances will all methods to compare
  62. distances = {}
  63. distances['random'] = compute_D_random(G_app, G_test, ed_method, **kwargs)
  64. distances['expert'] = compute_D_expert(G_app, G_test, ed_method, **kwargs)
  65. distances['fitted'] = compute_D_fitted(
  66. G_app, y_app, G_test,
  67. y_distance=y_distance,
  68. mode=mode, unlabeled=unlabeled, ed_method=ed_method,
  69. **kwargs)
  70. for setup in distances.keys():
  71. print("{0} Mode".format(setup))
  72. setup_results = {}
  73. D_app, D_test, edit_costs = distances[setup]
  74. setup_results['D_app'] = D_app
  75. setup_results['D_test'] = D_test
  76. setup_results['edit_costs'] = edit_costs
  77. print(edit_costs)
  78. perf_app, perf_test, clf = evaluate_D(
  79. D_app, y_app, D_test, y_test, mode)
  80. setup_results['perf_app'] = perf_app
  81. setup_results['perf_test'] = perf_test
  82. setup_results['clf'] = clf
  83. print(
  84. "Learning performance with {1} costs : {0:.2f}".format(
  85. perf_app, setup))
  86. print(
  87. "Test performance with {1} costs : {0:.2f}".format(
  88. perf_test, setup))
  89. cur_results[setup] = setup_results
  90. results.append(cur_results)
  91. return results

A Python package for graph kernels, graph edit distances and graph pre-image problem.