diff --git a/README.md b/README.md index c3b6b46..3a6b038 100644 --- a/README.md +++ b/README.md @@ -24,24 +24,22 @@ Examples are available in [/examples](examples/). For basic usage, you can evalu import pandas as pd from tods import schemas as schemas_utils -from tods.utils import generate_dataset_problem, evaluate_pipeline +from tods import generate_dataset, evaluate_pipeline table_path = 'datasets/yahoo_sub_5.csv' target_index = 6 # what column is the target -#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset -time_limit = 30 # How many seconds you wanna search -#metric = 'F1' # F1 on label 1 metric = 'F1_MACRO' # F1 on both label 0 and 1 -# Read data and generate dataset and problem +# Read data and generate dataset df = pd.read_csv(table_path) -dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric) +dataset = generate_dataset(df, target_index) # Load the default pipeline pipeline = schemas_utils.load_default_pipeline() # Run the pipeline -pipeline_result = evaluate_pipeline(problem_description, dataset, pipeline) +pipeline_result = evaluate_pipeline(dataset, pipeline, metric) +print(pipeline_result) ``` We also provide AutoML support to help you automatically find a good pipeline for a your data. ```python @@ -49,29 +47,26 @@ import pandas as pd from axolotl.backend.simple import SimpleRunner -from tods.utils import generate_dataset_problem -from tods.search import BruteForceSearch +from tods import generate_dataset, generate_problem +from tods.searcher import BruteForceSearch # Some information -#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv' # The path of the dataset -#target_index = 2 # what column is the target - table_path = 'datasets/yahoo_sub_5.csv' target_index = 6 # what column is the target -#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset time_limit = 30 # How many seconds you wanna search -#metric = 'F1' # F1 on label 1 metric = 'F1_MACRO' # F1 on both label 0 and 1 # Read data and generate dataset and problem df = pd.read_csv(table_path) -dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric) +dataset = generate_dataset(df, target_index=target_index) +problem_description = generate_problem(dataset, metric) # Start backend backend = SimpleRunner(random_seed=0) # Start search algorithm -search = BruteForceSearch(problem_description=problem_description, backend=backend) +search = BruteForceSearch(problem_description=problem_description, + backend=backend) # Find the best pipeline best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit)