You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_xps.py 3.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import sys
  2. def run_xp(ds_name, output_file, unlabeled, mode, y_distance, ed_method):
  3. from gklearn.dataset import Dataset
  4. from gklearn.experiments import DATASET_ROOT
  5. from learning import xp_knn
  6. ds = Dataset(ds_name, root=DATASET_ROOT, verbose=True)
  7. ds.remove_labels(node_attrs=ds.node_attrs, edge_attrs=ds.edge_attrs) # @todo: ged can not deal with sym and unsym labels.
  8. Gn = ds.graphs
  9. y_all = ds.targets
  10. resu = {}
  11. resu['y_distance'] = y_distance
  12. resu['dataset'] = ds_name
  13. unlabeled = (len(ds.node_labels) == 0 and len(ds.edge_labels) == 0)
  14. results = xp_knn(Gn, y_all, y_distance=y_distances[y_distance],
  15. mode=mode,
  16. unlabeled=unlabeled, ed_method=ed_method,
  17. node_labels=ds.node_labels, edge_labels=ds.edge_labels)
  18. resu['results'] = results
  19. resu['unlabeled'] = unlabeled
  20. resu['mode'] = mode
  21. resu['ed_method'] = ed_method
  22. pickle.dump(resu, open(output_result, 'wb'))
  23. return output_result
  24. def run_from_args():
  25. import argparse
  26. parser = argparse.ArgumentParser()
  27. parser.add_argument("dataset", help="path to / name of the dataset to predict")
  28. parser.add_argument(
  29. "output_file", help="path to file which will contains the results")
  30. parser.add_argument("-u", "--unlabeled", help="Specify that the dataset is unlabeled graphs",
  31. action="store_true")
  32. parser.add_argument("-m", "--mode", type=str, choices=['reg', 'classif'],
  33. help="Specify if the dataset a classification or regression problem")
  34. parser.add_argument("-y", "--y_distance", type=str, choices=['euclidean', 'manhattan', 'classif'],
  35. default='euclid',
  36. help="Specify the distance on y to fit the costs")
  37. args = parser.parse_args()
  38. dataset = args.dataset
  39. output_result = args.output_file
  40. unlabeled = args.unlabeled
  41. mode = args.mode
  42. print(args)
  43. y_distances = {
  44. 'euclidean': euclid_d,
  45. 'manhattan': man_d,
  46. 'classif': classif_d
  47. }
  48. y_distance = y_distances['euclid']
  49. run_xp(dataset, output_result, unlabeled, mode, y_distance)
  50. print("Fini")
  51. if __name__ == "__main__":
  52. import pickle
  53. import os
  54. from distances import euclid_d, man_d, classif_d
  55. y_distances = {
  56. 'euclidean': euclid_d,
  57. 'manhattan': man_d,
  58. 'classif': classif_d
  59. }
  60. # Read arguments.
  61. if len(sys.argv) > 1:
  62. run_from_args()
  63. else:
  64. from sklearn.model_selection import ParameterGrid
  65. # Get task grid.
  66. Edit_Cost_List = ['BIPARTITE', 'IPFP']
  67. Dataset_list = ['Alkane_unlabeled', 'Acyclic', 'Chiral', 'Vitamin_D',
  68. 'Steroid']
  69. Dis_List = ['euclidean', 'manhattan']
  70. task_grid = ParameterGrid({'edit_cost': Edit_Cost_List[0:1],
  71. 'dataset': Dataset_list[1:2],
  72. 'distance': Dis_List[:]})
  73. unlabeled = False # @todo: Not actually used.
  74. mode = 'reg'
  75. # Run.
  76. for task in list(task_grid):
  77. print()
  78. print(task)
  79. output_result = 'outputs/results.' + '.'.join([task['dataset'], task['edit_cost'], task['distance']]) + '.pkl'
  80. if not os.path.isfile(output_result):
  81. run_xp(task['dataset'], output_result, unlabeled, mode, task['distance'], task['edit_cost'])

A Python package for graph kernels, graph edit distances and graph pre-image problem.