diff --git a/tods/tods/search/brute_force_search.py b/tods/tods/search/brute_force_search.py index e2bd486..64a5814 100644 --- a/tods/tods/search/brute_force_search.py +++ b/tods/tods/search/brute_force_search.py @@ -12,16 +12,14 @@ class BruteForceSearch(PipelineSearchBase): super().__init__(problem_description=problem_description, backend=backend, primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) if self.ranking_function is None: - self.ranking_function = _f1_rank + self.ranking_function = _rank_first_metric # Find the candidates self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords']) - #print('task_description:', self.task_description) self.available_pipelines = self._return_pipelines( self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types']) - #print('available_pipelines:', self.available_pipelines) - self.metrics = _generate_metrics() + self.metrics = self.problem_description['problem']['performance_metrics'] self.data_preparation_pipeline = _generate_data_preparation_pipeline() self.scoring_pipeline = _generate_scoring_pipeline() self.data_preparation_params = _generate_data_preparation_params() @@ -57,6 +55,18 @@ class BruteForceSearch(PipelineSearchBase): scoring_pipeline=self.scoring_pipeline, data_preparation_params=self.data_preparation_params) + # DEBUG + #################### + #for pipeline_result in pipeline_results: + # try: + # for error in pipeline_result.error: + # if error is not None: + # raise error + # except: + # import traceback + # traceback.print_exc() + #################### + return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results] def _return_pipelines(self, task_type, task_subtype, data_type): @@ -76,22 +86,22 @@ primitive_python_paths = { 'd3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler', 'd3m.primitives.tods.timeseries_processing.transformation.standard_scaler', 'd3m.primitives.tods.timeseries_processing.transformation.power_transformer', - #'d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer', - #'d3m.primitives.tods.timeseries_processing.transformation.moving_average_transform', - #'d3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing', + 'd3m.primitives.tods.timeseries_processing.transformation.quantile_transformer', + 'd3m.primitives.tods.timeseries_processing.transformation.moving_average_transform', + 'd3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing', #'d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing', #'d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing', #'d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition', ], 'feature_analysis': [ - 'd3m.primitives.tods.feature_analysis.auto_correlation', + #'d3m.primitives.tods.feature_analysis.auto_correlation', 'd3m.primitives.tods.feature_analysis.statistical_mean', 'd3m.primitives.tods.feature_analysis.statistical_median', - #'d3m.primitives.tods.feature_analysis.statistical_g_mean', - #'d3m.primitives.tods.feature_analysis.statistical_abs_energy', - #'d3m.primitives.tods.feature_analysis.statistical_abs_sum', - #'d3m.primitives.tods.feature_analysis.statistical_h_mean', - #'d3m.primitives.tods.feature_analysis.statistical_maximum', + 'd3m.primitives.tods.feature_analysis.statistical_g_mean', + 'd3m.primitives.tods.feature_analysis.statistical_abs_energy', + 'd3m.primitives.tods.feature_analysis.statistical_abs_sum', + 'd3m.primitives.tods.feature_analysis.statistical_h_mean', + 'd3m.primitives.tods.feature_analysis.statistical_maximum', #'d3m.primitives.tods.feature_analysis.statistical_minimum', #'d3m.primitives.tods.feature_analysis.statistical_mean_abs', #'d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative', @@ -119,10 +129,10 @@ primitive_python_paths = { 'd3m.primitives.tods.detection_algorithm.pyod_ae', 'd3m.primitives.tods.detection_algorithm.pyod_vae', 'd3m.primitives.tods.detection_algorithm.pyod_cof', - #'d3m.primitives.tods.detection_algorithm.pyod_sod', - #'d3m.primitives.tods.detection_algorithm.pyod_abod', - #'d3m.primitives.tods.detection_algorithm.pyod_hbos', - #'d3m.primitives.tods.detection_algorithm.pyod_iforest', + 'd3m.primitives.tods.detection_algorithm.pyod_sod', + 'd3m.primitives.tods.detection_algorithm.pyod_abod', + 'd3m.primitives.tods.detection_algorithm.pyod_hbos', + 'd3m.primitives.tods.detection_algorithm.pyod_iforest', #'d3m.primitives.tods.detection_algorithm.pyod_lof', #'d3m.primitives.tods.detection_algorithm.pyod_knn', #'d3m.primitives.tods.detection_algorithm.pyod_ocsvm', @@ -142,14 +152,7 @@ primitive_python_paths = { } -def _f1_rank(pipeline_result): - #try: - # for error in pipeline_result.error: - # if error is not None: - # raise error - #except: - # import traceback - # traceback.print_exc() +def _rank_first_metric(pipeline_result): if pipeline_result.status == 'COMPLETED': scores = pipeline_result.scores pipeline_result.rank = -scores['value'][0] @@ -159,12 +162,6 @@ def _f1_rank(pipeline_result): pipeline_result.rank = 1 return pipeline_result -def _generate_metrics(): - from d3m.metadata.problem import PerformanceMetric - metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}, - ] - return metrics - def _generate_data_preparation_params(): from axolotl.utils import schemas as schemas_utils data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] @@ -233,24 +230,24 @@ def _generate_pipline(combinations): tods_step_5.add_output('produce') pipeline_description.add_step(tods_step_5) - tods_step_6= PrimitiveStep(primitive=index.get_primitive(combination[1])) + tods_step_6= PrimitiveStep(primitive=index.get_primitive(combination[2])) tods_step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') tods_step_6.add_output('produce') pipeline_description.add_step(tods_step_6) - tods_step_7 = PrimitiveStep(primitive=index.get_primitive(combination[3])) - tods_step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') - tods_step_7.add_output('produce') - pipeline_description.add_step(tods_step_7) + #tods_step_7 = PrimitiveStep(primitive=index.get_primitive(combination[3])) + #tods_step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') + #tods_step_7.add_output('produce') + #pipeline_description.add_step(tods_step_7) # Finalize the pipeline final_step = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) - final_step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') + final_step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') final_step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') final_step.add_output('produce') pipeline_description.add_step(final_step) - pipeline_description.add_output(name='output predictions', data_reference='steps.8.produce') + pipeline_description.add_output(name='output predictions', data_reference='steps.7.produce') pipeline_description.id = str(uuid.uuid4()) pipeline_description.created = Pipeline().created @@ -269,7 +266,8 @@ def _generate_pipelines(primitive_python_paths, cpu_count=40): import itertools import multiprocessing as mp - components = ['data_processing', 'timeseries_processing', 'feature_analysis', 'detection_algorithm'] + #components = ['data_processing', 'timeseries_processing', 'feature_analysis', 'detection_algorithm'] + components = ['timeseries_processing', 'feature_analysis', 'detection_algorithm'] combinations = itertools.product(*(primitive_python_paths[k] for k in components)) diff --git a/tods/tods/utils.py b/tods/tods/utils.py new file mode 100644 index 0000000..6e3af4f --- /dev/null +++ b/tods/tods/utils.py @@ -0,0 +1,32 @@ + +def generate_dataset_problem(df, target_index, metric): + """ + A wrapper for generating dataset and problem + + Args: + df (pandas.DataFrame): dataset + target_index (int): The column index of the target + metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for + macro-F1 on both 0 and 1 + + returns: + dataset, problem + """ + from axolotl.utils import data_problem + from d3m.metadata.problem import TaskKeyword, PerformanceMetric + + if metric == 'F1': + performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}] + elif metric == 'F1_MACRO': + performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}] + else: + raise ValueError('The metric {} not supported.'.format(metric)) + + + dataset, problem_description = data_problem.generate_dataset_problem(df, + target_index=target_index, + task_keywords=[TaskKeyword.ANOMALY_DETECTION,], + performance_metrics=performance_metrics) + + return dataset, problem_description +