|
- from distances import euclid_d
-
-
- def split_data(D, y, train_index, test_index):
- D_app = [D[i] for i in train_index]
- D_test = [D[i] for i in test_index]
- y_app = [y[i] for i in train_index]
- y_test = [y[i] for i in test_index]
- return D_app, D_test, y_app, y_test
-
-
- def evaluate_D(D_app, y_app, D_test, y_test, mode='reg'):
- from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
- from distances import rmse, accuracy
- from sklearn.model_selection import GridSearchCV
-
- if (mode == 'reg'):
- knn = KNeighborsRegressor(metric='precomputed')
- scoring = 'neg_root_mean_squared_error'
- perf_eval = rmse
- else:
- knn = KNeighborsClassifier(metric='precomputed')
- scoring = 'accuracy'
- perf_eval = accuracy
- grid_params = {
- 'n_neighbors': [3, 5, 7, 9, 11]
- }
-
- clf = GridSearchCV(knn, param_grid=grid_params,
- scoring=scoring,
- cv=5, return_train_score=True, refit=True)
- clf.fit(D_app, y_app)
- y_pred_app = clf.predict(D_app)
- y_pred_test = clf.predict(D_test)
- return perf_eval(y_pred_app, y_app), perf_eval(y_pred_test, y_test), clf
-
-
- def xp_knn(Gn, y_all, y_distance=euclid_d,
- mode='reg', unlabeled=False, ed_method='BIPARTITE', **kwargs):
- '''
- Perform a knn regressor on given dataset
- '''
- from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
- from ged import compute_D_random, compute_D_expert
- from ged import compute_D_fitted
-
- stratified = False
- if mode == 'classif':
- stratified = True
-
- if stratified:
- rs = StratifiedShuffleSplit(n_splits=10, test_size=.1)
- else:
- rs = ShuffleSplit(n_splits=10, test_size=.1)
-
- if stratified:
- split_scheme = rs.split(Gn, y_all)
- else:
- split_scheme = rs.split(Gn)
-
- results = []
- i = 1
- for train_index, test_index in split_scheme:
- print()
- print("Split {0}/{1}".format(i, 10))
- i = i + 1
- cur_results = {}
- # Get splitted data
- G_app, G_test, y_app, y_test = split_data(Gn, y_all,
- train_index, test_index)
-
- cur_results['y_app'] = y_app
- cur_results['y_test'] = y_test
-
- # Feed distances will all methods to compare
- distances = {}
- distances['random'] = compute_D_random(G_app, G_test, ed_method, **kwargs)
- distances['expert'] = compute_D_expert(G_app, G_test, ed_method, **kwargs)
- distances['fitted'] = compute_D_fitted(
- G_app, y_app, G_test,
- y_distance=y_distance,
- mode=mode, unlabeled=unlabeled, ed_method=ed_method,
- **kwargs)
-
- for setup in distances.keys():
- print("{0} Mode".format(setup))
- setup_results = {}
- D_app, D_test, edit_costs = distances[setup]
- setup_results['D_app'] = D_app
- setup_results['D_test'] = D_test
- setup_results['edit_costs'] = edit_costs
- print(edit_costs)
- perf_app, perf_test, clf = evaluate_D(
- D_app, y_app, D_test, y_test, mode)
-
- setup_results['perf_app'] = perf_app
- setup_results['perf_test'] = perf_test
- setup_results['clf'] = clf
-
- print(
- "Learning performance with {1} costs : {0:.2f}".format(
- perf_app, setup))
- print(
- "Test performance with {1} costs : {0:.2f}".format(
- perf_test, setup))
- cur_results[setup] = setup_results
- results.append(cur_results)
- return results
|