#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Nov 27 19:33:51 2020 @author: ljia """ import os import numpy as np import pickle from gklearn.dataset import Dataset from gklearn.model_learning import NestedCV from gklearn.kernels import GRAPH_KERNELS class Workflow(object): def __init__(self, **kwargs): self._job_prefix = kwargs.get('job_prefix', 'gktask') self._max_num_running_tasks = kwargs.get('max_num_running_tasks', np.inf) self._root_dir = kwargs.get('root_dir', 'outputs/') def run(self, tasks): ### Check inputs. if self._check_inputs(tasks): self._tasks = tasks else: raise ValueError('The input "tasks" is not correct.') ### Sort tasks. self.sort_tasks_by_complexity() ### The main process. complete = False while not complete: self.get_running_tasks() if self._num_running_tasks < self._max_num_running_tasks: ### Load results from table. self.load_results_from_table() for task in self._tasks: state = self.get_task_state(task) if state != 'complete' and state != 'runnning': self.run_task(task) if self._num_running_tasks >= self._max_num_running_tasks: break ### Save results. self.save_results() complete = self.check_completeness() # sleep() def _check_inputs(self, tasks): if not isinstance(tasks, list): return False else: for i in tasks: if not 'kernel' in i or not 'dataset' in i: return False return True def sort_tasks_by_complexity(self): return def get_running_tasks(self): command = 'squeue --user $USER --format "%.50j" --noheader' stream = os.popen(command) output = stream.readlines() running_tasks = [o for o in output if o.strip().startswith(self._job_prefix)] self._num_running_tasks = len(running_tasks) def load_results_from_table(self): pass def get_task_state(self, task): task_dir = os.path.join(self._root_dir, task['kernel'] + '.' + task['dataset'] + '/') fn_summary = os.path.join(task_dir, 'results_summary.pkl') if os.path.isfile(fn_summary): output = pickle.loads(fn_summary) state = output['state'] return state else: return 'unstarted' def run_task(self, task): ds_name = task['dataset'] k_name = task['kernel'] # Get dataset. ds = Dataset(ds_name) graph_kernel = GRAPH_KERNELS[k_name] # Start CV. results = NestedCV(ds, graph_kernel)