Browse Source

change name of tods.tods to tods.searcher

Former-commit-id: 8393f1343e [formerly 2f09dc09d3] [formerly 6459af62ce [formerly c7c511c679]] [formerly 962bd418d2 [formerly 10bae4656e] [formerly 1f7f471728 [formerly 985e2fd2a9]]] [formerly 426c8044ab [formerly ca018191c8] [formerly cb22635f51 [formerly 9a37cb878d]] [formerly 95a039741e [formerly 4e44dba96f] [formerly 476343b2ff [formerly b7d3a898d8]]]] [formerly 5f9f1d4fa2 [formerly 7373856571] [formerly ed2becf1cf [formerly d1a792fe3c]] [formerly 89390ba8d0 [formerly 31079ea695] [formerly 3fca4e6457 [formerly 19895c6952]]] [formerly f6f0cfd0d3 [formerly 3eb2877838] [formerly 942e60784d [formerly 9da85687f5]] [formerly 1ce6d5d967 [formerly 15d901d20f] [formerly e7c7cb86b6 [formerly b955f7f6a0]]]]] [formerly 96c3d57ba2 [formerly b7bb5e8c9c] [formerly 3b15a7261f [formerly 0c214acc35]] [formerly 3da333747e [formerly 71c31d49db] [formerly 67e89ad02e [formerly c7c5d75512]]] [formerly e83fe83221 [formerly e05023eebc] [formerly b4493082c2 [formerly bcd0c5d46e]] [formerly 4e9b99bee9 [formerly 2f7c556e4a] [formerly 8145a743dd [formerly d358d92075]]]] [formerly d35df56eb4 [formerly 95ea749c65] [formerly b06b45949a [formerly d1c5e31c89]] [formerly be0353db0e [formerly fc7795d1cd] [formerly 434997d943 [formerly 077868fc58]]] [formerly 59735884e5 [formerly 91232bec73] [formerly 9853ad2d5a [formerly bc51d3dd54]] [formerly ac59740fd3 [formerly f7d7af859c] [formerly 612849c2f6 [formerly 0ab7e381cc]]]]]]
Former-commit-id: bc31415855 [formerly 897aa81f63] [formerly 3ad8fdc92d [formerly a645626d60]] [formerly ca79927ae2 [formerly c0353aab6e] [formerly 9bf795f2f0 [formerly d400dc70c2]]] [formerly 892f7940b9 [formerly 4b85a391d0] [formerly f1a245b50f [formerly eed0b96b06]] [formerly 39f38d7386 [formerly 48d624ad16] [formerly 9904a66992 [formerly 889ec8e3af]]]] [formerly e98b8e5076 [formerly 9338a3e165] [formerly ad8bb4a8ad [formerly 5e15f7041c]] [formerly 47685cc5d7 [formerly 87ad6d2899] [formerly f8872f5d8e [formerly fbc7752748]]] [formerly 7e3217b173 [formerly 727df8524d] [formerly 3516085652 [formerly 6987e5df4a]] [formerly 00053c392e [formerly 5f4b91456e] [formerly 612849c2f6]]]]
Former-commit-id: 1d2db74040 [formerly 0bc6123417] [formerly 2ef904820d [formerly b79c9a540a]] [formerly 423a699757 [formerly 81b990c9de] [formerly bce0dc63ee [formerly 2202d9ac06]]] [formerly 8d4d2751f0 [formerly 9d774ce248] [formerly 38d91d8a76 [formerly d7fa49e88b]] [formerly aa334d23e1 [formerly 576ff1da20] [formerly 3acda2ccd0 [formerly 0822604845]]]]
Former-commit-id: 601afd5dfb [formerly 45c0ee02b4] [formerly a3ad5522f6 [formerly 5da1fe7c4e]] [formerly 2018fd5e5e [formerly ff4640bb3a] [formerly eea7ceadb1 [formerly 5ea304a132]]]
Former-commit-id: d6a8e4729a [formerly e0ed5cddc0] [formerly 318fed2674 [formerly 724249cab2]]
Former-commit-id: 85469cec0d [formerly c9dccd3bf7]
Former-commit-id: 38cb31fab2
master
lhenry15 4 years ago
parent
commit
39892aabb6
7 changed files with 414 additions and 0 deletions
  1. +0
    -0
      tods/searcher/__init__.py
  2. +1
    -0
      tods/searcher/resources/default_pipeline.json
  3. +10
    -0
      tods/searcher/schemas.py
  4. +1
    -0
      tods/searcher/search/__init__.py
  5. +292
    -0
      tods/searcher/search/brute_force_search.py
  6. +59
    -0
      tods/searcher/tods/utils.py
  7. +51
    -0
      tods/searcher/utils.py

+ 0
- 0
tods/searcher/__init__.py View File


+ 1
- 0
tods/searcher/resources/default_pipeline.json View File

@@ -0,0 +1 @@
{"id": "384bbfab-4f6d-4001-9f90-684ea5681f5d", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-09-09T23:40:01.756164Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.7.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "b94ee59ccf8db678d506adddbc238fb2049fb664a1e3f3f3f6a6517c0c4f8e5f"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "256f0155c7185d747b3b23096e46c40d15844106f9ed6346453f6010891f1896"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "eaff2f35-978c-4530-a12e-061a5f0beacd", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_mean", "name": "Time Series Decompostional", "digest": "2f2a8c07878643fe29c346096b91b5ba91477baa1e7e78684f07e53d29766ca4"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_vae", "name": "TODS.anomaly_detection_primitives.VariationalAutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "d5384857f75090844f367504befb1a854e5088589f6aae0795f66ccf10403e19"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "8d969800816d9596e94cb045aacce43dc3d49e8c5bedb403e35af6c9b8339990"}

+ 10
- 0
tods/searcher/schemas.py View File

@@ -0,0 +1,10 @@
import os

resource_dir = os.path.dirname(__file__)

DEFAULT_PIPELINE_DIR = os.path.join(resource_dir, 'resources', 'default_pipeline.json')

def load_default_pipeline():
from axolotl.utils import pipeline as pipeline_utils
pipeline = pipeline_utils.load_pipeline(DEFAULT_PIPELINE_DIR)
return pipeline

+ 1
- 0
tods/searcher/search/__init__.py View File

@@ -0,0 +1 @@
from .brute_force_search import BruteForceSearch

+ 292
- 0
tods/searcher/search/brute_force_search.py View File

@@ -0,0 +1,292 @@
# A Brute-Force Search
import uuid
import random

from d3m.metadata.pipeline import Pipeline

from axolotl.algorithms.base import PipelineSearchBase
from axolotl.utils import schemas as schemas_utils

class BruteForceSearch(PipelineSearchBase):
def __init__(self, problem_description, backend, *, primitives_blocklist=None, ranking_function=None):
super().__init__(problem_description=problem_description, backend=backend,
primitives_blocklist=primitives_blocklist, ranking_function=ranking_function)
if self.ranking_function is None:
self.ranking_function = _rank_first_metric

# Find the candidates
self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords'])
self.available_pipelines = self._return_pipelines(
self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types'])
self.metrics = self.problem_description['problem']['performance_metrics']
self.data_preparation_pipeline = _generate_data_preparation_pipeline()
self.scoring_pipeline = _generate_scoring_pipeline()
self.data_preparation_params = _generate_data_preparation_params()

self.current_pipeline_index = 0
self.offset = 1

def evaluate(self, pipeline_to_eval, input_data=None):
if input_data is None:
input_data = self.input_data
pipeline_result = self.backend.evaluate_pipeline(
problem_description=self.problem_description,
pipeline=pipeline_to_eval,
input_data=input_data,
metrics=self.metrics,
data_preparation_pipeline=self.data_preparation_pipeline,
scoring_pipeline=self.scoring_pipeline,
data_preparation_params=self.data_preparation_params)
return pipeline_result

def _search(self, time_left):
# Read all the pipelines to be evaluated
pipelines_to_eval = self.available_pipelines[self.current_pipeline_index: self.current_pipeline_index+self.offset]
self.current_pipeline_index += 1
pipeline_results = self.backend.evaluate_pipelines(
problem_description=self.problem_description,
pipelines=pipelines_to_eval,
input_data=self.input_data,
metrics=self.metrics,
data_preparation_pipeline=self.data_preparation_pipeline,
scoring_pipeline=self.scoring_pipeline,
data_preparation_params=self.data_preparation_params)

# DEBUG
####################
for pipeline_result in pipeline_results:
try:
for error in pipeline_result.error:
if error is not None:
raise error
except:
import traceback
traceback.print_exc()
####################

return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results]

def _return_pipelines(self, task_type, task_subtype, data_type):
pipeline_candidates = _generate_pipelines(primitive_python_paths)
return pipeline_candidates

primitive_python_paths = {
'data_processing': [
#'d3m.primitives.tods.data_processing.time_interval_transform',
#'d3m.primitives.tods.data_processing.categorical_to_binary',
'd3m.primitives.tods.data_processing.column_filter',
#'d3m.primitives.tods.data_processing.timestamp_validation',
#'d3m.primitives.tods.data_processing.duplication_validation',
#'d3m.primitives.tods.data_processing.continuity_validation',
],
'timeseries_processing': [
'd3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler',
'd3m.primitives.tods.timeseries_processing.transformation.standard_scaler',
'd3m.primitives.tods.timeseries_processing.transformation.power_transformer',
'd3m.primitives.tods.timeseries_processing.transformation.quantile_transformer',
'd3m.primitives.tods.timeseries_processing.transformation.moving_average_transform',
'd3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing',
#'d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing',
#'d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing',
#'d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition',
],
'feature_analysis': [
#'d3m.primitives.tods.feature_analysis.auto_correlation',
'd3m.primitives.tods.feature_analysis.statistical_mean',
'd3m.primitives.tods.feature_analysis.statistical_median',
'd3m.primitives.tods.feature_analysis.statistical_g_mean',
'd3m.primitives.tods.feature_analysis.statistical_abs_energy',
'd3m.primitives.tods.feature_analysis.statistical_abs_sum',
'd3m.primitives.tods.feature_analysis.statistical_h_mean',
'd3m.primitives.tods.feature_analysis.statistical_maximum',
#'d3m.primitives.tods.feature_analysis.statistical_minimum',
#'d3m.primitives.tods.feature_analysis.statistical_mean_abs',
#'d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative',
#'d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative',
#'d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation',
#'d3m.primitives.tods.feature_analysis.statistical_kurtosis',
#'d3m.primitives.tods.feature_analysis.statistical_skew',
#'d3m.primitives.tods.feature_analysis.statistical_std',
#'d3m.primitives.tods.feature_analysis.statistical_var',
#'d3m.primitives.tods.feature_analysis.statistical_variation',
#'d3m.primitives.tods.feature_analysis.statistical_vec_sum',
#'d3m.primitives.tods.feature_analysis.statistical_willison_amplitude',
#'d3m.primitives.tods.feature_analysis.statistical_zero_crossing',
#'d3m.primitives.tods.feature_analysis.spectral_residual_transform',
#'d3m.primitives.tods.feature_analysis.fast_fourier_transform',
#'d3m.primitives.tods.feature_analysis.discrete_cosine_transform',
#'d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization',
#'d3m.primitives.tods.feature_analysis.bk_filter',
#'d3m.primitives.tods.feature_analysis.hp_filter',
#'d3m.primitives.tods.feature_analysis.truncated_svd',
#'d3m.primitives.tods.feature_analysis.wavelet_transform',
#'d3m.primitives.tods.feature_analysis.trmf',
],
'detection_algorithm': [
'd3m.primitives.tods.detection_algorithm.pyod_ae',
'd3m.primitives.tods.detection_algorithm.pyod_vae',
'd3m.primitives.tods.detection_algorithm.pyod_cof',
'd3m.primitives.tods.detection_algorithm.pyod_sod',
'd3m.primitives.tods.detection_algorithm.pyod_abod',
'd3m.primitives.tods.detection_algorithm.pyod_hbos',
'd3m.primitives.tods.detection_algorithm.pyod_iforest',
#'d3m.primitives.tods.detection_algorithm.pyod_lof',
#'d3m.primitives.tods.detection_algorithm.pyod_knn',
#'d3m.primitives.tods.detection_algorithm.pyod_ocsvm',
#'d3m.primitives.tods.detection_algorithm.pyod_loda',
#'d3m.primitives.tods.detection_algorithm.pyod_cblof',
#'d3m.primitives.tods.detection_algorithm.pyod_sogaal',
#'d3m.primitives.tods.detection_algorithm.pyod_mogaal',
#'d3m.primitives.tods.detection_algorithm.matrix_profile',
#'d3m.primitives.tods.detection_algorithm.AutoRegODetector',
#'d3m.primitives.tods.detection_algorithm.LSTMODetector',
#'d3m.primitives.tods.detection_algorithm.AutoRegODetector',
#'d3m.primitives.tods.detection_algorithm.PCAODetector',
#'d3m.primitives.tods.detection_algorithm.KDiscordODetector',
#'d3m.primitives.tods.detection_algorithm.deeplog',
#'d3m.primitives.tods.detection_algorithm.telemanom',
]
}


def _rank_first_metric(pipeline_result):
if pipeline_result.status == 'COMPLETED':
scores = pipeline_result.scores
pipeline_result.rank = -scores['value'][0]
return pipeline_result
else:
# error
pipeline_result.rank = 1
return pipeline_result

def _generate_data_preparation_params():
from axolotl.utils import schemas as schemas_utils
data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split']
return data_preparation_params
def _generate_scoring_pipeline():
from axolotl.utils import schemas as schemas_utils
scoring_pipeline = schemas_utils.get_scoring_pipeline()
return scoring_pipeline
def _generate_data_preparation_pipeline():
from axolotl.utils import schemas as schemas_utils
data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA")
return data_preparation_pipeline

def _generate_pipline(combinations):
from d3m import index
from d3m.metadata.base import ArgumentType
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

piplines = []
for combination in combinations:
# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')
# The first three steps are fixed
# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_2)

# Step 3: extract_columns_by_semantic_types(targets)
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_3)

attributes = 'steps.2.produce'
targets = 'steps.3.produce'

tods_step_4 = PrimitiveStep(primitive=index.get_primitive(combination[0]))
tods_step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
tods_step_4.add_output('produce')
pipeline_description.add_step(tods_step_4)

tods_step_5 = PrimitiveStep(primitive=index.get_primitive(combination[1]))
tods_step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
tods_step_5.add_output('produce')
pipeline_description.add_step(tods_step_5)

tods_step_6= PrimitiveStep(primitive=index.get_primitive(combination[2]))
tods_step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
tods_step_6.add_output('produce')
pipeline_description.add_step(tods_step_6)

#tods_step_7 = PrimitiveStep(primitive=index.get_primitive(combination[3]))
#tods_step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce')
#tods_step_7.add_output('produce')
#pipeline_description.add_step(tods_step_7)

# Finalize the pipeline
final_step = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common'))
final_step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce')
final_step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
final_step.add_output('produce')
pipeline_description.add_step(final_step)

pipeline_description.add_output(name='output predictions', data_reference='steps.7.produce')
pipeline_description.id = str(uuid.uuid4())
pipeline_description.created = Pipeline().created

piplines.append(pipeline_description)
return piplines

def _generate_pipelines(primitive_python_paths, cpu_count=40):
"""
Args:
primitive_python_paths: a list of primitive Python paths for algorithms
Returns:
the pipline description json
"""
import itertools
import multiprocessing as mp

#components = ['data_processing', 'timeseries_processing', 'feature_analysis', 'detection_algorithm']
components = ['timeseries_processing', 'feature_analysis', 'detection_algorithm']
combinations = itertools.product(*(primitive_python_paths[k] for k in components))


return _generate_pipline(combinations)
#pipelines = []

## Allocate tasks
#combination_each_core_list = [[] for i in range(cpu_count)]
#for idx, combination in enumerate(combinations):
# core = idx % cpu_count
# combination_each_core_list[core].append(combination)

## Obtain all the pipelines
#pool = mp.Pool(processes=cpu_count)
#results = [pool.apply_async(_generate_pipline,
# args=(combinations,))
# for combinations in combination_each_core_list]
#piplines = []
#for p in results:
# piplines.extend(p.get())

return piplines

+ 59
- 0
tods/searcher/tods/utils.py View File

@@ -0,0 +1,59 @@

def generate_dataset_problem(df, target_index, metric):
"""
A wrapper for generating dataset and problem

Args:
df (pandas.DataFrame): dataset
target_index (int): The column index of the target
metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for
macro-F1 on both 0 and 1

returns:
dataset, problem
"""
from axolotl.utils import data_problem
from d3m.metadata.problem import TaskKeyword, PerformanceMetric

if metric == 'F1':
performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}]
elif metric == 'F1_MACRO':
performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}]
else:
raise ValueError('The metric {} not supported.'.format(metric))
dataset, problem_description = data_problem.generate_dataset_problem(df,
target_index=target_index,
task_keywords=[TaskKeyword.ANOMALY_DETECTION,],
performance_metrics=performance_metrics)

return dataset, problem_description

def evaluate_pipeline(problem_description, dataset, pipeline):
from axolotl.utils import schemas as schemas_utils
from axolotl.backend.simple import SimpleRunner
data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA")
scoring_pipeline = schemas_utils.get_scoring_pipeline()
data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split']
metrics = problem_description['problem']['performance_metrics']

backend = SimpleRunner(random_seed=0)
pipeline_result = backend.evaluate_pipeline(problem_description=problem_description,
pipeline=pipeline,
input_data=[dataset],
metrics=metrics,
data_preparation_pipeline=data_preparation_pipeline,
scoring_pipeline=scoring_pipeline,
data_preparation_params=data_preparation_params)
try:
for error in pipeline_result.error:
if error is not None:
raise error
except:
import traceback
traceback.print_exc()

return pipeline_result



+ 51
- 0
tods/searcher/utils.py View File

@@ -0,0 +1,51 @@

def generate_dataset_problem(df, target_index, metric):
"""
A wrapper for generating dataset and problem

Args:
df (pandas.DataFrame): dataset
target_index (int): The column index of the target
metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for
macro-F1 on both 0 and 1

returns:
dataset, problem
"""
from axolotl.utils import data_problem
from d3m.metadata.problem import TaskKeyword, PerformanceMetric

if metric == 'F1':
performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}]
elif metric == 'F1_MACRO':
performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}]
else:
raise ValueError('The metric {} not supported.'.format(metric))
dataset, problem_description = data_problem.generate_dataset_problem(df,
target_index=target_index,
task_keywords=[TaskKeyword.ANOMALY_DETECTION,],
performance_metrics=performance_metrics)

return dataset, problem_description

def evaluate_pipeline(problem_description, dataset, pipeline):
from axolotl.utils import schemas as schemas_utils
from axolotl.backend.simple import SimpleRunner
data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA")
scoring_pipeline = schemas_utils.get_scoring_pipeline()
data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split']
metrics = problem_description['problem']['performance_metrics']

backend = SimpleRunner(random_seed=0)
pipeline_result = backend.evaluate_pipeline(problem_description=problem_description,
pipeline=pipeline,
input_data=[dataset],
metrics=metrics,
data_preparation_pipeline=data_preparation_pipeline,
scoring_pipeline=scoring_pipeline,
data_preparation_params=data_preparation_params)
return pipeline_result



Loading…
Cancel
Save