From c000f914efe9c4d3bdbd248e287ce00043dce744 Mon Sep 17 00:00:00 2001 From: hwy893747147 Date: Mon, 28 Jun 2021 23:11:48 -0500 Subject: [PATCH] new notebook --- .../TODS Notebook Master-Branch.ipynb | 1975 ++++++++++++++++++++ 1 file changed, 1975 insertions(+) create mode 100644 examples/Demo Notebook/TODS Notebook Master-Branch.ipynb diff --git a/examples/Demo Notebook/TODS Notebook Master-Branch.ipynb b/examples/Demo Notebook/TODS Notebook Master-Branch.ipynb new file mode 100644 index 0000000..98aa202 --- /dev/null +++ b/examples/Demo Notebook/TODS Notebook Master-Branch.ipynb @@ -0,0 +1,1975 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TODS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction Summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "TODS is a full-stack automated machine learning system for outlier detection on multivariate time-series data. TODS provides exhaustive modules for building machine learning-based outlier detection systems, including: data processing, time series processing, feature analysis (extraction), detection algorithms, and reinforcement module. The functionalities provided via these modules include data preprocessing for general purposes, time series data smoothing/transformation, extracting features from time/frequency domains, various detection algorithms, and involving human expertise to calibrate the system. Three common outlier detection scenarios on time-series data can be performed: point-wise detection (time points as outliers), pattern-wise detection (subsequences as outliers), and system-wise detection (sets of time series as outliers), and a wide-range of corresponding algorithms are provided in TODS. This package is developed by DATA Lab @ Texas A&M University." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python 3.6.10 :: Anaconda, Inc.\r\n" + ] + } + ], + "source": [ + "!python -V\n", + "# Make sure python version is 3.6" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.4.1'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import scipy\n", + "scipy.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TODS Notebook Master-Branch.ipynb TODS Official Demo Notebook.ipynb\r\n" + ] + } + ], + "source": [ + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d3m.primitives.tods.detection_algorithm.LSTMODetector: Primitive is not providing a description through its docstring.\n" + ] + } + ], + "source": [ + "import sys\n", + "import argparse\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.metrics import precision_recall_curve\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "import matplotlib.pyplot as plt\n", + "from sklearn import metrics\n", + "from d3m import index\n", + "from d3m.metadata.base import ArgumentType\n", + "from d3m.metadata.pipeline import Pipeline, PrimitiveStep\n", + "from axolotl.backend.simple import SimpleRunner\n", + "from tods import generate_dataset, generate_problem\n", + "from tods.searcher import BruteForceSearch\n", + "from tods import generate_dataset, load_pipeline, evaluate_pipeline\n", + "from tods.sk_interface.detection_algorithm.DeepLog_skinterface import DeepLogSKI\n", + "from tods.sk_interface.detection_algorithm.Telemanom_skinterface import TelemanomSKI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### UCR Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "data_UCR = np.loadtxt(\"../../datasets/anomaly/raw_data/500_UCR_Anomaly_robotDOG1_10000_19280_19360.txt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (20000,)\n", + "datatype of data: float64\n", + "First 5 rows:\n", + " [0.145299 0.128205 0.094017 0.076923 0.111111]\n" + ] + } + ], + "source": [ + "print(\"shape:\", data_UCR.shape)\n", + "print(\"datatype of data:\",data_UCR.dtype)\n", + "print(\"First 5 rows:\\n\", data_UCR[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = np.expand_dims(data_UCR[:10000], axis=1)\n", + "X_test = np.expand_dims(data_UCR[10000:], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 5 rows train:\n", + " [[0.145299]\n", + " [0.128205]\n", + " [0.094017]\n", + " [0.076923]\n", + " [0.111111]]\n", + "First 5 rows test:\n", + " [[0.076923]\n", + " [0.076923]\n", + " [0.076923]\n", + " [0.094017]\n", + " [0.145299]]\n" + ] + } + ], + "source": [ + "print(\"First 5 rows train:\\n\", X_train[:5])\n", + "print(\"First 5 rows test:\\n\", X_test[:5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Yahoo Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "data_yahoo = pd.read_csv('../../datasets/anomaly/raw_data/yahoo_sub_5.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (1400, 7)\n", + "First 5 rows:\n", + " timestamp value_0 value_1 value_2 value_3 value_4 anomaly\n", + "0 1 12183 0.000000 3.716667 5 2109 0\n", + "1 2 12715 0.091758 3.610833 60 3229 0\n", + "2 3 12736 0.172297 3.481389 88 3637 0\n", + "3 4 12716 0.226219 3.380278 84 1982 0\n", + "4 5 12739 0.176358 3.193333 111 2751 0\n" + ] + } + ], + "source": [ + "print(\"shape:\", data_yahoo.shape)\n", + "print(\"First 5 rows:\\n\", data_yahoo[:5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SK Example 1: DeepLog" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "282/282 [==============================] - 1s 5ms/step - loss: 0.4255 - val_loss: 0.2777\n", + "Epoch 2/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3367 - val_loss: 0.2802\n", + "Epoch 3/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3545 - val_loss: 0.2595\n", + "Epoch 4/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3572 - val_loss: 0.2674\n", + "Epoch 5/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3457 - val_loss: 0.2880\n", + "Epoch 6/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3503 - val_loss: 0.2619\n", + "Epoch 7/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3559 - val_loss: 0.2818\n", + "Epoch 8/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3439 - val_loss: 0.2620\n", + "Epoch 9/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3390 - val_loss: 0.2690\n", + "Epoch 10/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3425 - val_loss: 0.2683\n" + ] + } + ], + "source": [ + "transformer = DeepLogSKI()\n", + "transformer.fit(X_train)\n", + "prediction_labels_train = transformer.predict(X_train)\n", + "prediction_labels_test = transformer.predict(X_test)\n", + "prediction_score = transformer.predict_score(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction Labels\n", + " [[0]\n", + " [0]\n", + " [0]\n", + " ...\n", + " [0]\n", + " [0]\n", + " [0]]\n", + "Prediction Score\n", + " [[0]\n", + " [0]\n", + " [0]\n", + " ...\n", + " [0]\n", + " [0]\n", + " [0]]\n" + ] + } + ], + "source": [ + "print(\"Prediction Labels\\n\", prediction_labels_test)\n", + "print(\"Prediction Score\\n\", prediction_score)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "y_true = prediction_labels_train\n", + "y_pred = prediction_labels_test\n", + "precision, recall, thresholds = precision_recall_curve(y_true, y_pred)\n", + "f1_scores = 2*recall*precision/(recall+precision)\n", + "fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)\n", + "roc_auc = metrics.auc(fpr, tpr)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Score: 0.9042\n" + ] + } + ], + "source": [ + "print('Accuracy Score: ', accuracy_score(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.93 0.97 0.95 9004\n", + " 1 0.53 0.35 0.42 996\n", + "\n", + " accuracy 0.90 10000\n", + " macro avg 0.73 0.66 0.68 10000\n", + "weighted avg 0.89 0.90 0.90 10000\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best threshold: 1\n", + "Best F1-Score: 0.42219541616405304\n" + ] + } + ], + "source": [ + "print('Best threshold: ', thresholds[np.argmax(f1_scores)])\n", + "print('Best F1-Score: ', np.max(f1_scores))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title('ROC')\n", + "plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)\n", + "plt.legend(loc = 'lower right')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SK Example 2: Telemanom" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "125/125 [==============================] - 1s 7ms/step - loss: 0.0134 - val_loss: 0.0051\n" + ] + } + ], + "source": [ + "transformer = TelemanomSKI(l_s= 2, n_predictions= 1)\n", + "transformer.fit(X_train)\n", + "prediction_labels_train = transformer.predict(X_train)\n", + "prediction_labels_test = transformer.predict(X_test)\n", + "prediction_score = transformer.predict_score(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction Labels\n", + " [[1]\n", + " [1]\n", + " [1]\n", + " ...\n", + " [1]\n", + " [1]\n", + " [1]]\n", + "Prediction Score\n", + " [[1]\n", + " [1]\n", + " [1]\n", + " ...\n", + " [1]\n", + " [1]\n", + " [1]]\n" + ] + } + ], + "source": [ + "print(\"Prediction Labels\\n\", prediction_labels_test)\n", + "print(\"Prediction Score\\n\", prediction_score)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "y_true = prediction_labels_train\n", + "y_pred = prediction_labels_test\n", + "precision, recall, thresholds = precision_recall_curve(y_true, y_pred)\n", + "f1_scores = 2*recall*precision/(recall+precision)\n", + "fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)\n", + "roc_auc = metrics.auc(fpr, tpr)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Score: 0.18055416624987497\n" + ] + } + ], + "source": [ + "print('Accuracy Score: ', accuracy_score(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 958, 8039],\n", + " [ 153, 847]])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "confusion_matrix(y_true, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.86 0.11 0.19 8997\n", + " 1 0.10 0.85 0.17 1000\n", + "\n", + " accuracy 0.18 9997\n", + " macro avg 0.48 0.48 0.18 9997\n", + "weighted avg 0.79 0.18 0.19 9997\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best threshold: 0\n", + "Best F1-Score: 0.18186778212239701\n" + ] + } + ], + "source": [ + "print('Best threshold: ', thresholds[np.argmax(f1_scores)])\n", + "print('Best F1-Score: ', np.max(f1_scores))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title('ROC')\n", + "plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)\n", + "plt.legend(loc = 'lower right')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pipline Example: AutoEncoder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'inputs.0'" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Creating pipeline\n", + "pipeline_description = Pipeline()\n", + "pipeline_description.add_input(name='inputs')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.dataset_to_dataframe', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.dataset_to_dataframe' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 0: dataset_to_dataframe\n", + "step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))\n", + "step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')\n", + "step_0.add_output('produce')\n", + "pipeline_description.add_step(step_0)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.column_parser', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.column_parser' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 1: column_parser\n", + "step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))\n", + "step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')\n", + "step_1.add_output('produce')\n", + "pipeline_description.add_step(step_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.extract_columns_by_semantic_types', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.extract_columns_by_semantic_types' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 2: extract_columns_by_semantic_types(attributes)\n", + "step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))\n", + "step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')\n", + "step_2.add_output('produce')\n", + "step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,\n", + "\t\t\t\t\t\t\t data=['https://metadata.datadrivendiscovery.org/types/Attribute'])\n", + "pipeline_description.add_step(step_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: extract_columns_by_semantic_types(targets)\n", + "step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))\n", + "step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')\n", + "step_3.add_output('produce')\n", + "step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,\n", + "\t\t\t\t\t\t\tdata=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])\n", + "pipeline_description.add_step(step_3)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "attributes = 'steps.2.produce'\n", + "targets = 'steps.3.produce'" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.feature_analysis.statistical_maximum', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.feature_analysis.statistical_maximum' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 4: processing\n", + "step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum'))\n", + "step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)\n", + "step_4.add_output('produce')\n", + "pipeline_description.add_step(step_4)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.detection_algorithm.pyod_ae', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.detection_algorithm.pyod_ae' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 5: algorithm`\n", + "step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))\n", + "step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')\n", + "step_5.add_output('produce')\n", + "pipeline_description.add_step(step_5)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.construct_predictions', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.construct_predictions' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 6: Predictions\n", + "step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))\n", + "step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')\n", + "step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')\n", + "step_6.add_output('produce')\n", + "pipeline_description.add_step(step_6)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'outputs.0'" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Final Output\n", + "pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"id\": \"44caca5f-ed2a-42d6-bede-777fd96e5a90\", \"schema\": \"https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json\", \"created\": \"2021-06-29T04:06:32.108192Z\", \"inputs\": [{\"name\": \"inputs\"}], \"outputs\": [{\"data\": \"steps.6.produce\", \"name\": \"output predictions\"}], \"steps\": [{\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"c78138d9-9377-31dc-aee8-83d9df049c60\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.dataset_to_dataframe\", \"name\": \"Extract a DataFrame from a Dataset\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"inputs.0\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"81235c29-aeb9-3828-911a-1b25319b6998\", \"version\": \"0.6.0\", \"python_path\": \"d3m.primitives.tods.data_processing.column_parser\", \"name\": \"Parses strings into their types\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/Attribute\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/TrueTarget\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e\", \"version\": \"0.1.0\", \"python_path\": \"d3m.primitives.tods.feature_analysis.statistical_maximum\", \"name\": \"Time Series Decompostional\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.2.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"67e7fcdf-d645-3417-9aa4-85cd369487d9\", \"version\": \"0.0.1\", \"python_path\": \"d3m.primitives.tods.detection_algorithm.pyod_ae\", \"name\": \"TODS.anomaly_detection_primitives.AutoEncoder\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.4.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"2530840a-07d4-3874-b7d8-9eb5e4ae2bf3\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.construct_predictions\", \"name\": \"Construct pipeline predictions output\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.5.produce\"}, \"reference\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}], \"digest\": \"3f4eb364201fc5fc403cc66c847ce1597fe8b0a91d130bded4d21c2a6ef2eef2\"}\n" + ] + } + ], + "source": [ + "# Output to json\n", + "data = pipeline_description.to_json()\n", + "with open('autoencoder_pipeline.json', 'w') as f:\n", + " f.write(data)\n", + " print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "this_path = os.path.dirname(os.path.abspath(\"__file__\"))\n", + "default_data_path = os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "_StoreAction(option_strings=['--pipeline_path'], dest='pipeline_path', nargs=None, const=None, default='/Users/wangyanghe/Desktop/Research/tods/examples/Demo Notebook/autoencoder_pipeline.json', type=None, choices=None, help='Input the path of the pre-built pipeline description', metavar=None)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.')\n", + "parser.add_argument('--table_path', type=str, default=default_data_path,\n", + " help='Input the path of the input data table')\n", + "parser.add_argument('--target_index', type=int, default=6,\n", + " help='Index of the ground truth (for evaluation)')\n", + "parser.add_argument('--metric',type=str, default='F1_MACRO',\n", + " help='Evaluation Metric (F1, F1_MACRO)')\n", + "parser.add_argument('--pipeline_path', \n", + " default=os.path.join(this_path, 'autoencoder_pipeline.json'),\n", + " help='Input the path of the pre-built pipeline description')" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "args, unknown = parser.parse_known_args()\n", + "table_path = args.table_path \n", + "target_index = args.target_index # what column is the target\n", + "pipeline_path = args.pipeline_path\n", + "metric = args.metric # F1 on both label 0 and 1" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# Read data and generate dataset\n", + "df = pd.read_csv(table_path)\n", + "dataset = generate_dataset(df, target_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the default pipeline\n", + "pipeline = load_pipeline(pipeline_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_2\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_2 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_2 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_3 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_3 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_4 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_4 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_5 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_5 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_6 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_6 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_7 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.8796 - val_loss: 1.4306\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.7280 - val_loss: 1.3324\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.6184 - val_loss: 1.2660\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.5448 - val_loss: 1.2157\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.4950 - val_loss: 1.1736\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.4282 - val_loss: 1.1391\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3967 - val_loss: 1.1090\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3643 - val_loss: 1.0819\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.3212 - val_loss: 1.0579\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.2965 - val_loss: 1.0358\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2677 - val_loss: 1.0152\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2449 - val_loss: 0.9960\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2246 - val_loss: 0.9778\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2096 - val_loss: 0.9606\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1837 - val_loss: 0.9444\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1703 - val_loss: 0.9288\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1430 - val_loss: 0.9140\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1249 - val_loss: 0.8997\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1178 - val_loss: 0.8861\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0976 - val_loss: 0.8732\n", + "{'method_called': 'evaluate',\n", + " 'outputs': \"[{'outputs.0': d3mIndex anomaly\"\n", + " '0 0 1'\n", + " '1 1 0'\n", + " '2 2 0'\n", + " '3 3 1'\n", + " '4 4 0'\n", + " '... ... ...'\n", + " '1395 1395 1'\n", + " '1396 1396 0'\n", + " '1397 1397 1'\n", + " '1398 1398 1'\n", + " '1399 1399 1'\n", + " ''\n", + " \"[1400 rows x 2 columns]}, {'outputs.0': d3mIndex anomaly\"\n", + " '0 0 1'\n", + " '1 1 0'\n", + " '2 2 0'\n", + " '3 3 1'\n", + " '4 4 0'\n", + " '... ... ...'\n", + " '1395 1395 1'\n", + " '1396 1396 0'\n", + " '1397 1397 1'\n", + " '1398 1398 1'\n", + " '1399 1399 1'\n", + " ''\n", + " '[1400 rows x 2 columns]}]',\n", + " 'pipeline': '',\n", + " 'scores': ' metric value normalized randomSeed fold'\n", + " '0 F1_MACRO 0.509059 0.509059 0 0',\n", + " 'status': 'COMPLETED'}\n" + ] + } + ], + "source": [ + "# Run the pipeline\n", + "pipeline_result = evaluate_pipeline(dataset, pipeline, metric)\n", + "print(pipeline_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Searcher Example:" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "table_path = '../../datasets/anomaly/raw_data/yahoo_sub_5.csv'\n", + "target_index = 6 # column of the target label\n", + "time_limit = 30 # How many seconds you wanna search" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "metric = 'F1_MACRO' # F1 on both label 0 and 1" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "# Read data and generate dataset and problem\n", + "df = pd.read_csv(table_path)\n", + "dataset = generate_dataset(df, target_index=target_index)\n", + "problem_description = generate_problem(dataset, metric)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# Start backend\n", + "backend = SimpleRunner(random_seed=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# Start search algorithm\n", + "search = BruteForceSearch(problem_description=problem_description,\n", + " backend=backend)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_3\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_8 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_7 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_9 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_8 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_10 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_9 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_11 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_10 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_12 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_11 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_13 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.4187 - val_loss: 1.0009\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2895 - val_loss: 0.9167\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.2010 - val_loss: 0.8517\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1463 - val_loss: 0.7988\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0777 - val_loss: 0.7531\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0281 - val_loss: 0.7135\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9993 - val_loss: 0.6791\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9634 - val_loss: 0.6496\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9320 - val_loss: 0.6239\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8982 - val_loss: 0.6019\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8760 - val_loss: 0.5825\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8527 - val_loss: 0.5652\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8399 - val_loss: 0.5510\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8218 - val_loss: 0.5378\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8096 - val_loss: 0.5263\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7945 - val_loss: 0.5162\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7836 - val_loss: 0.5069\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7713 - val_loss: 0.4988\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7561 - val_loss: 0.4908\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.7538 - val_loss: 0.4840\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_4\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_14 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_12 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_15 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_13 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_16 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_14 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_17 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_15 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_18 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_16 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_19 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.4226 - val_loss: 1.0312\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3035 - val_loss: 0.9579\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2140 - val_loss: 0.9087\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1662 - val_loss: 0.8710\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1229 - val_loss: 0.8401\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0874 - val_loss: 0.8141\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0573 - val_loss: 0.7913\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0292 - val_loss: 0.7709\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0038 - val_loss: 0.7525\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9837 - val_loss: 0.7353\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9654 - val_loss: 0.7190\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9444 - val_loss: 0.7040\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9280 - val_loss: 0.6898\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9117 - val_loss: 0.6762\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8950 - val_loss: 0.6634\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8789 - val_loss: 0.6515\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8663 - val_loss: 0.6400\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8541 - val_loss: 0.6290\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8419 - val_loss: 0.6187\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8293 - val_loss: 0.6088\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_5\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_20 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_17 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_21 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_18 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_22 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_19 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_23 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_20 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_24 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_21 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_25 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.5037 - val_loss: 0.9548\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3846 - val_loss: 0.8790\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2879 - val_loss: 0.8227\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2069 - val_loss: 0.7754\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1539 - val_loss: 0.7350\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1092 - val_loss: 0.6988\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0573 - val_loss: 0.6661\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0250 - val_loss: 0.6363\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9841 - val_loss: 0.6097\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9457 - val_loss: 0.5857\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9316 - val_loss: 0.5643\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9055 - val_loss: 0.5456\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8854 - val_loss: 0.5292\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8633 - val_loss: 0.5146\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8412 - val_loss: 0.5022\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8335 - val_loss: 0.4911\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8192 - val_loss: 0.4814\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8071 - val_loss: 0.4726\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.7888 - val_loss: 0.4646\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.7846 - val_loss: 0.4576\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_6\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_26 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_22 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_27 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_23 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_28 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_24 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_29 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_25 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_30 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_26 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_31 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 6ms/step - loss: 1.5385 - val_loss: 0.9377\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.4001 - val_loss: 0.8741\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3243 - val_loss: 0.8293\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2663 - val_loss: 0.7954\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2048 - val_loss: 0.7677\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1459 - val_loss: 0.7439\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1224 - val_loss: 0.7230\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0832 - val_loss: 0.7042\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0554 - val_loss: 0.6868\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0283 - val_loss: 0.6708\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0062 - val_loss: 0.6558\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9902 - val_loss: 0.6417\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9738 - val_loss: 0.6284\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9461 - val_loss: 0.6158\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9323 - val_loss: 0.6038\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9126 - val_loss: 0.5925\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9007 - val_loss: 0.5817\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8846 - val_loss: 0.5715\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8657 - val_loss: 0.5617\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8551 - val_loss: 0.5524\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_7\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_32 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_27 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_33 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_28 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_34 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_29 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_35 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_30 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_36 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_31 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_37 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 5ms/step - loss: 1.4187 - val_loss: 1.0796\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.2828 - val_loss: 0.9882\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1966 - val_loss: 0.9252\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1363 - val_loss: 0.8790\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0851 - val_loss: 0.8430\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0490 - val_loss: 0.8141\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0228 - val_loss: 0.7893\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9927 - val_loss: 0.7679\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9690 - val_loss: 0.7490\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9507 - val_loss: 0.7316\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9298 - val_loss: 0.7158\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9132 - val_loss: 0.7011\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8952 - val_loss: 0.6873\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8818 - val_loss: 0.6743\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8682 - val_loss: 0.6620\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8537 - val_loss: 0.6504\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8404 - val_loss: 0.6394\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8283 - val_loss: 0.6289\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8162 - val_loss: 0.6190\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8059 - val_loss: 0.6095\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_8\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_38 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_32 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_39 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_33 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_40 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_34 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_41 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_35 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_42 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_36 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_43 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.5237 - val_loss: 1.0177\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3755 - val_loss: 0.9350\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2795 - val_loss: 0.8795\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2211 - val_loss: 0.8374\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1686 - val_loss: 0.8039\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1298 - val_loss: 0.7758\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0982 - val_loss: 0.7514\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0670 - val_loss: 0.7298\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0382 - val_loss: 0.7106\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0145 - val_loss: 0.6931\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9928 - val_loss: 0.6770\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9743 - val_loss: 0.6621\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9573 - val_loss: 0.6483\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9370 - val_loss: 0.6353\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9207 - val_loss: 0.6231\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9041 - val_loss: 0.6116\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8930 - val_loss: 0.6007\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8765 - val_loss: 0.5904\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8633 - val_loss: 0.5806\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8528 - val_loss: 0.5713\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_9\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_44 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_37 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_45 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_38 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_46 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_39 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_47 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_40 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_48 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_41 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_49 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.5013 - val_loss: 1.5361\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3749 - val_loss: 1.4108\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2565 - val_loss: 1.3262\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1685 - val_loss: 1.2589\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1140 - val_loss: 1.2080\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0896 - val_loss: 1.1662\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0621 - val_loss: 1.1308\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0299 - val_loss: 1.0962\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9957 - val_loss: 1.0679\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9738 - val_loss: 1.0435\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9496 - val_loss: 1.0196\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9224 - val_loss: 1.0000\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9094 - val_loss: 0.9790\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8959 - val_loss: 0.9610\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8703 - val_loss: 0.9438\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8584 - val_loss: 0.9280\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8485 - val_loss: 0.9134\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8315 - val_loss: 0.8994\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8147 - val_loss: 0.8818\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8000 - val_loss: 0.8617\n" + ] + } + ], + "source": [ + "# Find the best pipeline\n", + "best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit)\n", + "best_pipeline = best_runtime.pipeline\n", + "best_output = best_pipeline_result.output" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_10\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_50 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_42 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_51 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_43 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_52 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_44 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_53 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_45 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_54 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_46 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_55 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.3728 - val_loss: 2.2095\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2507 - val_loss: 2.0598\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1434 - val_loss: 1.9599\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0993 - val_loss: 1.8894\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0486 - val_loss: 1.8368\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0115 - val_loss: 1.7958\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9806 - val_loss: 1.7621\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9481 - val_loss: 1.7337\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9264 - val_loss: 1.6992\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8929 - val_loss: 1.6732\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8834 - val_loss: 1.6493\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8608 - val_loss: 1.6288\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8382 - val_loss: 1.6080\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8256 - val_loss: 1.5866\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8124 - val_loss: 1.5684\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7965 - val_loss: 1.5524\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7840 - val_loss: 1.5353\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7678 - val_loss: 1.5211\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7594 - val_loss: 1.5052\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7455 - val_loss: 1.4914\n" + ] + } + ], + "source": [ + "# Evaluate the best pipeline\n", + "best_scores = search.evaluate(best_pipeline).scores" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Search History:\n", + "----------------------------------------------------\n", + "Pipeline id: f6665410-4d1d-4695-9f00-5d5f457ef95d\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.708549 0.708549 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: 34ae48fe-fb5c-4dbd-940b-00098300cb9f\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.616695 0.616695 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: fc287cdb-2958-4117-8e20-ba7645caa23c\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.55474 0.55474 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: e510c088-369b-4b04-8b25-a320b4a86530\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.531302 0.531302 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: b42e188a-ea92-4dc0-b7d3-8983b0e659e9\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.509059 0.509059 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: 5e641e81-9e0e-46f3-b487-c37ccd1b9573\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.483604 0.483604 0 0\n" + ] + } + ], + "source": [ + "print('Search History:')\n", + "for pipeline_result in search.history:\n", + " print('-' * 52)\n", + " print('Pipeline id:', pipeline_result.pipeline.id)\n", + " print(pipeline_result.scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best pipeline:\n", + "----------------------------------------------------\n", + "Pipeline id: f6665410-4d1d-4695-9f00-5d5f457ef95d\n", + "Pipeline json: {\"id\": \"f6665410-4d1d-4695-9f00-5d5f457ef95d\", \"schema\": \"https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json\", \"created\": \"2021-06-29T04:06:53.685353Z\", \"inputs\": [{\"name\": \"inputs\"}], \"outputs\": [{\"data\": \"steps.7.produce\", \"name\": \"output predictions\"}], \"steps\": [{\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"c78138d9-9377-31dc-aee8-83d9df049c60\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.dataset_to_dataframe\", \"name\": \"Extract a DataFrame from a Dataset\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"inputs.0\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"81235c29-aeb9-3828-911a-1b25319b6998\", \"version\": \"0.6.0\", \"python_path\": \"d3m.primitives.tods.data_processing.column_parser\", \"name\": \"Parses strings into their types\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/Attribute\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/TrueTarget\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"642de2e7-5590-3cab-9266-2a53c326c461\", \"version\": \"0.0.1\", \"python_path\": \"d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler\", \"name\": \"Axis_wise_scale\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.2.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"30bc7cec-2ccc-34bc-9df8-2095bf3b1ae2\", \"version\": \"0.1.0\", \"python_path\": \"d3m.primitives.tods.feature_analysis.statistical_mean\", \"name\": \"Time Series Decompostional\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.4.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"67e7fcdf-d645-3417-9aa4-85cd369487d9\", \"version\": \"0.0.1\", \"python_path\": \"d3m.primitives.tods.detection_algorithm.pyod_ae\", \"name\": \"TODS.anomaly_detection_primitives.AutoEncoder\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.5.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"contamination\": {\"type\": \"VALUE\", \"data\": 0.01}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"2530840a-07d4-3874-b7d8-9eb5e4ae2bf3\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.construct_predictions\", \"name\": \"Construct pipeline predictions output\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.6.produce\"}, \"reference\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}], \"digest\": \"af9c67055b7d50ecf3dce829af31051781b757f648d138836d827b6dfb699e8e\"}\n", + "Output:\n", + " d3mIndex anomaly\n", + "0 0 0\n", + "1 1 0\n", + "2 2 0\n", + "3 3 0\n", + "4 4 0\n", + "... ... ...\n", + "1395 1395 0\n", + "1396 1396 0\n", + "1397 1397 1\n", + "1398 1398 1\n", + "1399 1399 0\n", + "\n", + "[1400 rows x 2 columns]\n", + "Scores:\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.708549 0.708549 0 0\n" + ] + } + ], + "source": [ + "print('Best pipeline:')\n", + "print('-' * 52)\n", + "print('Pipeline id:', best_pipeline.id)\n", + "print('Pipeline json:', best_pipeline.to_json())\n", + "print('Output:')\n", + "print(best_output)\n", + "print('Scores:')\n", + "print(best_scores)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}