|
-
- import sys
-
-
- def run_xp(ds_name, output_file, unlabeled, mode, y_distance, ed_method):
- from gklearn.dataset import Dataset
- from gklearn.experiments import DATASET_ROOT
- from learning import xp_knn
-
- ds = Dataset(ds_name, root=DATASET_ROOT, verbose=True)
- ds.remove_labels(node_attrs=ds.node_attrs, edge_attrs=ds.edge_attrs) # @todo: ged can not deal with sym and unsym labels.
- Gn = ds.graphs
- y_all = ds.targets
-
- resu = {}
- resu['y_distance'] = y_distance
- resu['dataset'] = ds_name
- unlabeled = (len(ds.node_labels) == 0 and len(ds.edge_labels) == 0)
- results = xp_knn(Gn, y_all, y_distance=y_distances[y_distance],
- mode=mode,
- unlabeled=unlabeled, ed_method=ed_method,
- node_labels=ds.node_labels, edge_labels=ds.edge_labels)
- resu['results'] = results
- resu['unlabeled'] = unlabeled
- resu['mode'] = mode
- resu['ed_method'] = ed_method
- pickle.dump(resu, open(output_result, 'wb'))
- return output_result
-
-
- def run_from_args():
- import argparse
- parser = argparse.ArgumentParser()
- parser.add_argument("dataset", help="path to / name of the dataset to predict")
- parser.add_argument(
- "output_file", help="path to file which will contains the results")
- parser.add_argument("-u", "--unlabeled", help="Specify that the dataset is unlabeled graphs",
- action="store_true")
- parser.add_argument("-m", "--mode", type=str, choices=['reg', 'classif'],
- help="Specify if the dataset a classification or regression problem")
- parser.add_argument("-y", "--y_distance", type=str, choices=['euclidean', 'manhattan', 'classif'],
- default='euclid',
- help="Specify the distance on y to fit the costs")
-
- args = parser.parse_args()
-
- dataset = args.dataset
- output_result = args.output_file
- unlabeled = args.unlabeled
- mode = args.mode
-
- print(args)
- y_distances = {
- 'euclidean': euclid_d,
- 'manhattan': man_d,
- 'classif': classif_d
- }
- y_distance = y_distances['euclid']
-
- run_xp(dataset, output_result, unlabeled, mode, y_distance)
- print("Fini")
-
-
- if __name__ == "__main__":
-
- import pickle
- import os
-
- from distances import euclid_d, man_d, classif_d
- y_distances = {
- 'euclidean': euclid_d,
- 'manhattan': man_d,
- 'classif': classif_d
- }
-
- # Read arguments.
- if len(sys.argv) > 1:
- run_from_args()
- else:
- from sklearn.model_selection import ParameterGrid
-
- # Get task grid.
- Edit_Cost_List = ['BIPARTITE', 'IPFP']
- Dataset_list = ['Alkane_unlabeled', 'Acyclic', 'Chiral', 'Vitamin_D',
- 'Steroid']
- Dis_List = ['euclidean', 'manhattan']
- task_grid = ParameterGrid({'edit_cost': Edit_Cost_List[0:1],
- 'dataset': Dataset_list[1:2],
- 'distance': Dis_List[:]})
-
- unlabeled = False # @todo: Not actually used.
- mode = 'reg'
- # Run.
- for task in list(task_grid):
- print()
- print(task)
-
- output_result = 'outputs/results.' + '.'.join([task['dataset'], task['edit_cost'], task['distance']]) + '.pkl'
- if not os.path.isfile(output_result):
- run_xp(task['dataset'], output_result, unlabeled, mode, task['distance'], task['edit_cost'])
|