update Automl

Former-commit-id: f329413c23 [formerly fbc9364a04] [formerly 1de9149f55 [formerly 70ce578e28]] [formerly ba7486e129 [formerly 4d0ad761b6] [formerly e0a8c9ef9a [formerly b071749536]]] [formerly 96226aa3fa [formerly 33f1fb955e] [formerly 76a0a7d068 [formerly 7ddf23f692]] [formerly c087662f91 [formerly c48bb57447] [formerly 3a19d1c697 [formerly aff8923318]]]] Former-commit-id: 505ba89edf [formerly 6f1595ef97] [formerly 68902619de [formerly 7d427327cb]] [formerly 4949f72ed6 [formerly 99289aaf57] [formerly 3a19d1c697]] Former-commit-id: 7d86671a3b [formerly 3756e16cc2] [formerly 3ef8be421a [formerly a7849b254d]] Former-commit-id: cfd80b6f0d [formerly 0a4f55a634] Former-commit-id: b245688262
4 years ago · 43ff02cf86
--- a/datasets/yahoo_sub_5.csv
+++ b/datasets/yahoo_sub_5.csv
--- a/examples/run_automl.py
+++ b/examples/run_automl.py
@@ -1,36 +1,32 @@
 import uuid
 import random
 import pandas as pd
 from pprint import pprint
 from sklearn.datasets import make_classification

 from d3m import container
 from d3m.metadata.pipeline import Pipeline
 from d3m.metadata.problem import TaskKeyword, PerformanceMetric

 from axolotl.utils import data_problem
 from axolotl.backend.simple import SimpleRunner
 from axolotl.backend.ray import RayRunner
 from axolotl.algorithms.base import PipelineSearchBase
 from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils

 import tods
 from tods.utils import generate_dataset_problem
 from tods.search import BruteForceSearch

 table_path = 'datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv'
 # Some information
 table_path = 'datasets/yahoo_sub_5.csv' # The path of the dataset
 target_index = 6 # what column is the target
 time_limit = 30 # How many seconds you wanna search
 #metric = 'F1' # F1 on label 1
 metric = 'F1_MACRO' # F1 on both label 0 and 1

 # Read data and generate dataset and problem
 df = pd.read_csv(table_path)
 dataset, problem_description = data_problem.generate_dataset_problem(df,
                                                                     target_index=7,
                                                                     task_keywords=[TaskKeyword.ANOMALY_DETECTION,],
                                                                     performance_metrics=[{'metric': PerformanceMetric.F1}])
 dataset, problem_description = generate_dataset_problem(df, target_index=target_index, metric=metric)

 backend = SimpleRunner(random_seed=0) 
 # Start backend
 backend = SimpleRunner(random_seed=0)

 # Start search algorithm
 search = BruteForceSearch(problem_description=problem_description, backend=backend)

 # Find the best pipeline
 best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=15)
 best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit)
 best_pipeline = best_runtime.pipeline
 best_output = best_pipeline_result.output

 # Evaluate the best pipeline
 best_scores = search.evaluate(best_pipeline).scores