import os import time from pprint import pprint import pandas as pd from sklearn.datasets import make_regression from d3m import container from d3m.metadata.pipeline import Pipeline from axolotl.utils import data_problem, pipeline as pipeline_utils from axolotl.backend.simple import SimpleRunner from axolotl.backend.ray import RayRunner from axolotl.algorithms.random_search import RandomSearch # init runner #backend = RayRunner(random_seed=42, volumes_dir=None, n_workers=3) backend = SimpleRunner(random_seed=42, volumes_dir=None) #time.sleep(30) table_path = os.path.join('..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'tables', 'learningData.csv') df = pd.read_csv(table_path) dataset, problem_description = data_problem.generate_dataset_problem(df, task='binary_classification', target_index=5) # The method fit search for the best pipeline based on the time butget and fit the best pipeline based on the rank with the input_data. search = RandomSearch(problem_description=problem_description, backend=backend) fitted_pipeline, fitted_pipelineine_result = search.search_fit(input_data=[dataset], time_limit=30) produce_results = search.produce(fitted_pipeline, [dataset]) print(produce_results.output)