OpenI
/
graphkit-learn


import sys


def run_xp(ds_name, output_file, unlabeled, mode, y_distance, ed_method):
	from gklearn.dataset import Dataset
	from gklearn.experiments import DATASET_ROOT
	from learning import xp_knn

	ds = Dataset(ds_name, root=DATASET_ROOT, verbose=True)
	ds.remove_labels(node_attrs=ds.node_attrs, edge_attrs=ds.edge_attrs) # @todo: ged can not deal with sym and unsym labels.
	Gn = ds.graphs
	y_all = ds.targets

	resu = {}
	resu['y_distance'] = y_distance
	resu['dataset'] = ds_name
	unlabeled = (len(ds.node_labels) == 0 and len(ds.edge_labels) == 0)
	results = xp_knn(Gn, y_all, y_distance=y_distances[y_distance],
				  mode=mode,
				  unlabeled=unlabeled, ed_method=ed_method,
				  node_labels=ds.node_labels, edge_labels=ds.edge_labels)
	resu['results'] = results
	resu['unlabeled'] = unlabeled
	resu['mode'] = mode
	resu['ed_method'] = ed_method
	pickle.dump(resu, open(output_result, 'wb'))
	return output_result


def run_from_args():
	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument("dataset", help="path to / name of the dataset to predict")
	parser.add_argument(
		"output_file", help="path to file which will contains the results")
	parser.add_argument("-u", "--unlabeled", help="Specify that the dataset is unlabeled graphs",
						action="store_true")
	parser.add_argument("-m", "--mode", type=str, choices=['reg', 'classif'],
						help="Specify if the dataset a classification or regression problem")
	parser.add_argument("-y", "--y_distance", type=str, choices=['euclidean', 'manhattan', 'classif'],
						default='euclid',
						help="Specify the distance on y  to fit the costs")

	args = parser.parse_args()

	dataset = args.dataset
	output_result = args.output_file
	unlabeled = args.unlabeled
	mode = args.mode

	print(args)
	y_distances = {
		'euclidean': euclid_d,
		'manhattan': man_d,
		'classif': classif_d
	}
	y_distance = y_distances['euclid']

	run_xp(dataset, output_result, unlabeled, mode, y_distance)
	print("Fini")


if __name__ == "__main__":

	import pickle
	import os

	from distances import euclid_d, man_d, classif_d
	y_distances = {
		'euclidean': euclid_d,
		'manhattan': man_d,
		'classif': classif_d
	}

	# Read arguments.
	if len(sys.argv) > 1:
		run_from_args()
	else:
		from sklearn.model_selection import ParameterGrid

		# Get task grid.
		Edit_Cost_List = ['BIPARTITE', 'IPFP']
		Dataset_list = ['Alkane_unlabeled', 'Acyclic', 'Chiral', 'Vitamin_D',
					    'Steroid']
		Dis_List = ['euclidean', 'manhattan']
		task_grid = ParameterGrid({'edit_cost': Edit_Cost_List[0:1],
							 'dataset': Dataset_list[1:2],
							 'distance': Dis_List[:]})

		unlabeled = False # @todo: Not actually used.
		mode = 'reg'
		# Run.
		for task in list(task_grid):
			print()
			print(task)

			output_result = 'outputs/results.' + '.'.join([task['dataset'], task['edit_cost'], task['distance']]) + '.pkl'
			if not os.path.isfile(output_result):
				run_xp(task['dataset'], output_result, unlabeled, mode, task['distance'], task['edit_cost'])