Browse Source

Merge branch 'dev'

Former-commit-id: 2ea7191643 [formerly 34f73880dc] [formerly c61a214f78 [formerly 142892aa58]] [formerly f5dead0e0b [formerly 8101346265] [formerly bf88edb7e3 [formerly 76949195d7]]] [formerly df8663a02f [formerly 521ea55138] [formerly 8ec9dafd7b [formerly 310bcb29e4]] [formerly f1c2f5b995 [formerly 56fd4252ef] [formerly 96a9106332 [formerly 358367e791]]]] [formerly 4aa0c803b4 [formerly af97b465fc] [formerly e9cb22e928 [formerly 6d3d3a42ea]] [formerly 6b2efdda2c [formerly f977d78d88] [formerly aa8fa776ce [formerly 884fa04872]]] [formerly a01ea898ca [formerly b17442c025] [formerly 14d778b518 [formerly 0bd2ccf583]] [formerly 21312225a1 [formerly df0aa9bb9b] [formerly cc29a57219 [formerly 21d9c54c28]]]]] [formerly fe6885bb39 [formerly cc9433fab5] [formerly e4b1f00998 [formerly 95d6366266]] [formerly d71540edf4 [formerly 2860f0b499] [formerly cffd6850ca [formerly 1f5aa2b8b1]]] [formerly 62981dbc54 [formerly d8e7e40e44] [formerly 7062fac351 [formerly a560d81727]] [formerly fa01b10490 [formerly 841948a68c] [formerly 58e2d71f76 [formerly 5e12fab781]]]] [formerly c41f31f1e0 [formerly d026b5089f] [formerly 611e601bd6 [formerly 652faa3fb8]] [formerly 9f3890037f [formerly fe1f85ae5b] [formerly 73974dfe8c [formerly c3e992352c]]] [formerly d525627410 [formerly 9d161fcffe] [formerly b719c12ea3 [formerly f52365ac47]] [formerly 65cebc10aa [formerly d0334d6bb2] [formerly 9383e9b9c8 [formerly b8999a8312]]]]]]
Former-commit-id: 3a1328c46d [formerly 23c4be820e] [formerly 689fae83b7 [formerly 9a83f53f3a]] [formerly 341649e7ea [formerly b1f2f9f814] [formerly f55570b0b1 [formerly 731396f02f]]] [formerly 646163ff44 [formerly a5ebcb7cb3] [formerly 30d288c447 [formerly d4ab09e565]] [formerly 8c393771fa [formerly c1fd18ce89] [formerly 3449089150 [formerly 499493fa78]]]] [formerly 3089e5d9ad [formerly bd0f15e9de] [formerly 212888a461 [formerly b5b0c11cb0]] [formerly 89243e92c7 [formerly a9191c78de] [formerly 244a62f073 [formerly 43739f8001]]] [formerly 5d9867925c [formerly 509201691b] [formerly 22cd74327e [formerly 7c3ac9aba7]] [formerly 0755333518 [formerly 817422ffe4] [formerly 9383e9b9c8]]]]
Former-commit-id: afb452c8f3 [formerly ea1698add0] [formerly 3d54f808d6 [formerly 4401762ba2]] [formerly 1a761dd333 [formerly ba4a98e11c] [formerly 30454ae6d4 [formerly 1cdd414e1f]]] [formerly e2b351cb48 [formerly b3d0317c6f] [formerly b1a6361251 [formerly 2eb8162586]] [formerly f3c2de3423 [formerly 7e209e0c63] [formerly d112f243d0 [formerly f23851c472]]]]
Former-commit-id: c374d9e437 [formerly 9f45b9ff74] [formerly 9e49ceaa22 [formerly af2d2ff8fa]] [formerly bf9e75f781 [formerly b9562147a0] [formerly f1977a7231 [formerly 8fd275458f]]]
Former-commit-id: 8834bf2d06 [formerly 43045250a0] [formerly 2bac52c32d [formerly 7791218c6a]]
Former-commit-id: 6ae23135cc [formerly ef5f0b5e72]
Former-commit-id: 2aae4970ae
master
lhenry15 4 years ago
parent
commit
6bb473a208
1 changed files with 11 additions and 16 deletions
  1. +11
    -16
      README.md

+ 11
- 16
README.md View File

@@ -24,24 +24,22 @@ Examples are available in [/examples](examples/). For basic usage, you can evalu
import pandas as pd import pandas as pd


from tods import schemas as schemas_utils from tods import schemas as schemas_utils
from tods.utils import generate_dataset_problem, evaluate_pipeline
from tods import generate_dataset, evaluate_pipeline


table_path = 'datasets/yahoo_sub_5.csv' table_path = 'datasets/yahoo_sub_5.csv'
target_index = 6 # what column is the target target_index = 6 # what column is the target
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset
time_limit = 30 # How many seconds you wanna search
#metric = 'F1' # F1 on label 1
metric = 'F1_MACRO' # F1 on both label 0 and 1 metric = 'F1_MACRO' # F1 on both label 0 and 1


# Read data and generate dataset and problem
# Read data and generate dataset
df = pd.read_csv(table_path) df = pd.read_csv(table_path)
dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric)
dataset = generate_dataset(df, target_index)


# Load the default pipeline # Load the default pipeline
pipeline = schemas_utils.load_default_pipeline() pipeline = schemas_utils.load_default_pipeline()


# Run the pipeline # Run the pipeline
pipeline_result = evaluate_pipeline(problem_description, dataset, pipeline)
pipeline_result = evaluate_pipeline(dataset, pipeline, metric)
print(pipeline_result)
``` ```
We also provide AutoML support to help you automatically find a good pipeline for a your data. We also provide AutoML support to help you automatically find a good pipeline for a your data.
```python ```python
@@ -49,29 +47,26 @@ import pandas as pd


from axolotl.backend.simple import SimpleRunner from axolotl.backend.simple import SimpleRunner


from tods.utils import generate_dataset_problem
from tods.search import BruteForceSearch
from tods import generate_dataset, generate_problem
from tods.searcher import BruteForceSearch


# Some information # Some information
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_GOOG.csv' # The path of the dataset
#target_index = 2 # what column is the target

table_path = 'datasets/yahoo_sub_5.csv' table_path = 'datasets/yahoo_sub_5.csv'
target_index = 6 # what column is the target target_index = 6 # what column is the target
#table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset
time_limit = 30 # How many seconds you wanna search time_limit = 30 # How many seconds you wanna search
#metric = 'F1' # F1 on label 1
metric = 'F1_MACRO' # F1 on both label 0 and 1 metric = 'F1_MACRO' # F1 on both label 0 and 1


# Read data and generate dataset and problem # Read data and generate dataset and problem
df = pd.read_csv(table_path) df = pd.read_csv(table_path)
dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric)
dataset = generate_dataset(df, target_index=target_index)
problem_description = generate_problem(dataset, metric)


# Start backend # Start backend
backend = SimpleRunner(random_seed=0) backend = SimpleRunner(random_seed=0)


# Start search algorithm # Start search algorithm
search = BruteForceSearch(problem_description=problem_description, backend=backend)
search = BruteForceSearch(problem_description=problem_description,
backend=backend)


# Find the best pipeline # Find the best pipeline
best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit) best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit)


Loading…
Cancel
Save