diff --git a/examples/run_certain_pipeline.py b/examples/run_certain_pipeline.py deleted file mode 100644 index 4f02ea5..0000000 --- a/examples/run_certain_pipeline.py +++ /dev/null @@ -1,30 +0,0 @@ - -import os - -results_dir = './' -pipeline_run_yml_dir = './' - -pipeline_yml_name = './pipeline.yml' # './pipeline_yml/pipeline_10.yml' -pipline_yml_index = pipeline_yml_name[11:-4] - -python_command = 'python3 -m d3m runtime fit-produce -p ' + pipeline_yml_name \ - + ' -r ./datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json' \ - + ' -i ./datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json' \ - + ' -t ./datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json -o ' \ - + results_dir + 'result.csv' \ - + ' -O ' \ - + pipeline_run_yml_dir + 'pipeline_run' + '.yml' - -print(python_command) -os.system(python_command) -# 'python3 -m d3m runtime fit-produce -p pipeline.yml -# -r ../datasets/anomaly/kpi/TRAIN/problem_TRAIN/problemDoc.json -# -i ../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/datasetDoc.json -# -t ../datasets/anomaly/kpi/TEST/dataset_TEST/datasetDoc.json -# -o results.csv -O pipeline_run.yml' - -# python3 -m d3m runtime fit-produce -p pipeline.yml -# -r ../datasets/anomaly/yahoo_sub_5/TRAIN/problem_TRAIN/problemDoc.json -# -i ../datasets/anomaly/yahoo_sub_5/TRAIN/dataset_TRAIN/datasetDoc.json -# -t ../datasets/anomaly/yahoo_sub_5/TEST/dataset_TEST/datasetDoc.json -# -o result.csv -O pipeline_run.yml \ No newline at end of file diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py new file mode 100644 index 0000000..ae9d8ec --- /dev/null +++ b/examples/run_pipeline.py @@ -0,0 +1,42 @@ +import pandas as pd +import sys +import argparse + +from searcher import schemas as schemas_utils +from searcher.utils import generate_dataset_problem, evaluate_pipeline +from axolotl.utils import pipeline as pipeline_utils +import os + +this_path = os.path.dirname(os.path.abspath(__file__)) +#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset + +parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') +parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../datasets/yahoo_sub_5.csv'), + help='Input the path of the input data table') +parser.add_argument('--target_index', type=int, default=6, + help='Index of the ground truth (for evaluation)') +parser.add_argument('--metric',type=str, default='F1_MACRO', + help='Evaluation Metric (F1, F1_MACRO)') +parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/searcher/resources/default_pipeline.json'), + help='Input the path of the pre-built pipeline description') + +args = parser.parse_args() + +table_path = args.table_path +target_index = args.target_index # what column is the target +pipeline_path = args.pipeline_path +metric = args.metric # F1 on both label 0 and 1 + +time_limit = 30 # How many seconds you wanna search + +# Read data and generate dataset and problem +df = pd.read_csv(table_path) +dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric) + +# Load the default pipeline +pipeline = pipeline_utils.load_pipeline(pipeline_path) + +# Run the pipeline +pipeline_result = evaluate_pipeline(problem_description, dataset, pipeline) +print(pipeline_result) +