From c000f914efe9c4d3bdbd248e287ce00043dce744 Mon Sep 17 00:00:00 2001 From: hwy893747147 Date: Mon, 28 Jun 2021 23:11:48 -0500 Subject: [PATCH] new notebook --- .../TODS Notebook Master-Branch.ipynb | 1975 ++++++++++++++++++++ 1 file changed, 1975 insertions(+) create mode 100644 examples/Demo Notebook/TODS Notebook Master-Branch.ipynb diff --git a/examples/Demo Notebook/TODS Notebook Master-Branch.ipynb b/examples/Demo Notebook/TODS Notebook Master-Branch.ipynb new file mode 100644 index 0000000..98aa202 --- /dev/null +++ b/examples/Demo Notebook/TODS Notebook Master-Branch.ipynb @@ -0,0 +1,1975 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TODS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction Summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "TODS is a full-stack automated machine learning system for outlier detection on multivariate time-series data. TODS provides exhaustive modules for building machine learning-based outlier detection systems, including: data processing, time series processing, feature analysis (extraction), detection algorithms, and reinforcement module. The functionalities provided via these modules include data preprocessing for general purposes, time series data smoothing/transformation, extracting features from time/frequency domains, various detection algorithms, and involving human expertise to calibrate the system. Three common outlier detection scenarios on time-series data can be performed: point-wise detection (time points as outliers), pattern-wise detection (subsequences as outliers), and system-wise detection (sets of time series as outliers), and a wide-range of corresponding algorithms are provided in TODS. This package is developed by DATA Lab @ Texas A&M University." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Python 3.6.10 :: Anaconda, Inc.\r\n" + ] + } + ], + "source": [ + "!python -V\n", + "# Make sure python version is 3.6" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.4.1'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import scipy\n", + "scipy.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TODS Notebook Master-Branch.ipynb TODS Official Demo Notebook.ipynb\r\n" + ] + } + ], + "source": [ + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d3m.primitives.tods.detection_algorithm.LSTMODetector: Primitive is not providing a description through its docstring.\n" + ] + } + ], + "source": [ + "import sys\n", + "import argparse\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.metrics import precision_recall_curve\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.metrics import classification_report\n", + "import matplotlib.pyplot as plt\n", + "from sklearn import metrics\n", + "from d3m import index\n", + "from d3m.metadata.base import ArgumentType\n", + "from d3m.metadata.pipeline import Pipeline, PrimitiveStep\n", + "from axolotl.backend.simple import SimpleRunner\n", + "from tods import generate_dataset, generate_problem\n", + "from tods.searcher import BruteForceSearch\n", + "from tods import generate_dataset, load_pipeline, evaluate_pipeline\n", + "from tods.sk_interface.detection_algorithm.DeepLog_skinterface import DeepLogSKI\n", + "from tods.sk_interface.detection_algorithm.Telemanom_skinterface import TelemanomSKI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### UCR Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "data_UCR = np.loadtxt(\"../../datasets/anomaly/raw_data/500_UCR_Anomaly_robotDOG1_10000_19280_19360.txt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (20000,)\n", + "datatype of data: float64\n", + "First 5 rows:\n", + " [0.145299 0.128205 0.094017 0.076923 0.111111]\n" + ] + } + ], + "source": [ + "print(\"shape:\", data_UCR.shape)\n", + "print(\"datatype of data:\",data_UCR.dtype)\n", + "print(\"First 5 rows:\\n\", data_UCR[:5])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = np.expand_dims(data_UCR[:10000], axis=1)\n", + "X_test = np.expand_dims(data_UCR[10000:], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First 5 rows train:\n", + " [[0.145299]\n", + " [0.128205]\n", + " [0.094017]\n", + " [0.076923]\n", + " [0.111111]]\n", + "First 5 rows test:\n", + " [[0.076923]\n", + " [0.076923]\n", + " [0.076923]\n", + " [0.094017]\n", + " [0.145299]]\n" + ] + } + ], + "source": [ + "print(\"First 5 rows train:\\n\", X_train[:5])\n", + "print(\"First 5 rows test:\\n\", X_test[:5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Yahoo Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "data_yahoo = pd.read_csv('../../datasets/anomaly/raw_data/yahoo_sub_5.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape: (1400, 7)\n", + "First 5 rows:\n", + " timestamp value_0 value_1 value_2 value_3 value_4 anomaly\n", + "0 1 12183 0.000000 3.716667 5 2109 0\n", + "1 2 12715 0.091758 3.610833 60 3229 0\n", + "2 3 12736 0.172297 3.481389 88 3637 0\n", + "3 4 12716 0.226219 3.380278 84 1982 0\n", + "4 5 12739 0.176358 3.193333 111 2751 0\n" + ] + } + ], + "source": [ + "print(\"shape:\", data_yahoo.shape)\n", + "print(\"First 5 rows:\\n\", data_yahoo[:5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SK Example 1: DeepLog" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "282/282 [==============================] - 1s 5ms/step - loss: 0.4255 - val_loss: 0.2777\n", + "Epoch 2/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3367 - val_loss: 0.2802\n", + "Epoch 3/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3545 - val_loss: 0.2595\n", + "Epoch 4/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3572 - val_loss: 0.2674\n", + "Epoch 5/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3457 - val_loss: 0.2880\n", + "Epoch 6/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3503 - val_loss: 0.2619\n", + "Epoch 7/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3559 - val_loss: 0.2818\n", + "Epoch 8/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3439 - val_loss: 0.2620\n", + "Epoch 9/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3390 - val_loss: 0.2690\n", + "Epoch 10/10\n", + "282/282 [==============================] - 1s 2ms/step - loss: 0.3425 - val_loss: 0.2683\n" + ] + } + ], + "source": [ + "transformer = DeepLogSKI()\n", + "transformer.fit(X_train)\n", + "prediction_labels_train = transformer.predict(X_train)\n", + "prediction_labels_test = transformer.predict(X_test)\n", + "prediction_score = transformer.predict_score(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction Labels\n", + " [[0]\n", + " [0]\n", + " [0]\n", + " ...\n", + " [0]\n", + " [0]\n", + " [0]]\n", + "Prediction Score\n", + " [[0]\n", + " [0]\n", + " [0]\n", + " ...\n", + " [0]\n", + " [0]\n", + " [0]]\n" + ] + } + ], + "source": [ + "print(\"Prediction Labels\\n\", prediction_labels_test)\n", + "print(\"Prediction Score\\n\", prediction_score)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "y_true = prediction_labels_train\n", + "y_pred = prediction_labels_test\n", + "precision, recall, thresholds = precision_recall_curve(y_true, y_pred)\n", + "f1_scores = 2*recall*precision/(recall+precision)\n", + "fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)\n", + "roc_auc = metrics.auc(fpr, tpr)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Score: 0.9042\n" + ] + } + ], + "source": [ + "print('Accuracy Score: ', accuracy_score(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.93 0.97 0.95 9004\n", + " 1 0.53 0.35 0.42 996\n", + "\n", + " accuracy 0.90 10000\n", + " macro avg 0.73 0.66 0.68 10000\n", + "weighted avg 0.89 0.90 0.90 10000\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best threshold: 1\n", + "Best F1-Score: 0.42219541616405304\n" + ] + } + ], + "source": [ + "print('Best threshold: ', thresholds[np.argmax(f1_scores)])\n", + "print('Best F1-Score: ', np.max(f1_scores))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAArEklEQVR4nO3de3gV5bn38e/NMQiIbqC0CtmgoKLgBk2pFqposUWLikoBrW7FKr5aVGyrxdLdbtxWW+0uCYoHRIqHgiBWpFaktoKnXVBQFEGDiBQCKIgo0ggSuN8/nkFjyGHFZNYkmd/nutaVmVnPmnVPAute8xzN3RERkfRqlHQAIiKSLCUCEZGUUyIQEUk5JQIRkZRTIhARSTklAhGRlFMiEBFJOSUCkUqY2Roz+8TMtpvZu2Y21cxalXr+m2b2tJl9bGYfmdmfzezIMufY38zyzWxtdJ63o/122b8ikX0pEYhU7XR3bwX0AnoD1wOY2fHAX4HHgIOALsCrwAtmdkhUphnwd+AoYCCwP3A8sAXok9WrEKmAaWSxSMXMbA1wibv/Ldq/BTjK3b9nZs8By9z9ijKvmQtsdvf/NLNLgF8Dh7r79iyHL5IR3RGIZMjMOgKnAqvMbD/gm8DD5RSdCZwSbQ8AnlQSkLpMiUCkarPN7GNgHbAJ+BXwb4T/PxvLKb8R2Fv/37aCMiJ1hhKBSNUGu3troD9wBOFDfiuwB/haOeW/BrwfbW+poIxInaFEIJIhd38GmAr8zt3/BfwD+H45RYcSGogB/gZ818xaZiVIkS9BiUCkevKBU8zsP4AxwIVmdpWZtTazA83sRkKvoHFR+QcIVUqPmNkRZtbIzNqa2c/N7LRErkCkDCUCkWpw983A/cAv3f154LvA2YR2gH8Supf2c/e3ovI7CQ3GbwJPAduAFwnVS4uyfgEi5VD3URGRlNMdgYhIyikRiIiknBKBiEjKKRGIiKRck6QDqK527dp5586dkw5DRKReWbJkyfvu3r685+pdIujcuTOLFy9OOgwRkXrFzP5Z0XOqGhIRSTklAhGRlFMiEBFJOSUCEZGUUyIQEUm52BKBmU0xs01m9noFz5uZTTCzVWb2mpkdE1csIiJSsTjvCKYSFuuuyKlAt+gxErgzxlhERKQCsSUCd38W+KCSImcC93uwEDjAzLSSk4hIGRs3wi9/CYWF8Zw/yQFlBxMW7NirKDq2z/quZjaScNdAbm5uVoITEUnakiWQnw8zZkBJCXzta3D44bX/PvWisdjdJ7l7nrvntW9f7ghpEZEGoaQEZs2Cb30L8vJg9my4/HJYuTL8jEOSdwTrgU6l9jtGx0REUmfrVrj3XrjtNli7Frp0gfHjYcQIaNMm3vdOMhHMAUaZ2UPAN4CP3H2faiERkYassBAmTICpU6G4GE48EQoK4PTToXHj7MQQWyIws+lAf6CdmRUBvwKaArj7XcATwGnAKqAYGBFXLCIidYk7PPVU+MB/4glo1gzOOw+uvhp69cp+PLElAnc/t4rnHfhRXO8vIlLXFBfDgw+GBLBiBXToAOPGwWWXhe2k1LtpqEVE6puiIpg4ESZNgg8+gN694b77YNgwaN486eiUCEREYrNwYfj2//DDoTpo8GAYPRr69QOzpKP7nBKBiEgt2rULHnkk9P9ftAj23z98+I8aBXV1cUUlAhGRWrBlS6j6mTgR1q+Hbt1CV9ALL4TWrZOOrnJKBCIiNbB8eej+ef/9sGMHDBgAd98Np54KjerFkF0lAhGRatuzB558MlT/PPUU5OTABRfAVVdBjx5JR1d9SgQiIhnavj309pkwIUz5cNBB8Otfw8iR0K5d0tF9eUoEIiJVWLMGbr8dJk+Gjz6CPn1g2jQYMgSaNk06uppTIhARKYc7vPBCqP559NHQ3XPIkNAD6Ljjko6udikRiIiUsnMnzJwZEsDLL8OBB8K118KPfgSdOlX58npJiUBEBNi0Ce66C+68E959F7p3D/vnnw8tWyYdXbyUCEQk1V59NYz+nTYt3A2cemqo/jnllLo1+jdOSgQikjq7d8Pjj4fqnwULYL/94OKLQ/fPI45IOrrsUyIQkdTYtg2mTAkjflevDnX+t9wCl1wS2gLSSolARBq8t98OH/5TpsDHH0PfvvDb34ZJ4JroU1CJQEQaJvdQ7ZOfD3/+c1jta9iwsPjL17+edHR1ixKBiDQoO3aEht+CAnjttTDid+zYsPD7QQclHV3dpEQgIg3Cxo2h6+ddd8HmzdCzZ1gM/txzoUWLpKOr25QIRKReW7IkVP/MmAElJTBoUOj+edJJ6en+WVNKBCJS75SUwOzZIQG88AK0ahWqfq68Erp2TTq6+keJQETqja1bQ3XPbbfB2rXQpQuMHw8jRkCbNklHV38pEYhInVdYGKZ+njoViovhxBNDY/Dpp4feQFIzSgQiUie5h0Vf8vNh7lxo1gzOOy90/+zVK+noGhYlAhGpU4qL4cEHwzf+FSugQwcYNw4uuyxsS+1TIhCROqGoKCz8PmkSfPAB9O4dVgMbNgyaN086uoZNiUBEErVwYaj+mTUrVAcNHhy6f/brp+6f2aJEICJZt2sXPPJISACLFoUeP6NHw6hR0LlzwsGlkBKBiGTNli2h6mfiRFi/Hrp1C2sBX3hhGAsgyVAiEJHYLV8eGn8feCDMBTRgANx9d1gEplGjpKMTJQIRicWePfDkk6H656mnICcHLrggLP7So0fS0UlpSgQiUqu2bw+9fSZMgJUrw4yfN90El14aZgKVuifWmzIzG2hmhWa2yszGlPN8rpnNN7NXzOw1MzstznhEJD5r1sBPfwodO4ZG3wMOCNNBr1kD11+vJFCXxXZHYGaNgYnAKUAR8JKZzXH3FaWK/QKY6e53mtmRwBNA57hiEpHa5R4mfcvPh0cfDd09hwwJPYCOOy7p6CRTcVYN9QFWuftqADN7CDgTKJ0IHNg/2m4DbIgxHhGpJTt3wsyZIQG8/HJY7/e66+CKK8I6wFK/xJkIDgbWldovAr5Rpsx/A381syuBlsCA8k5kZiOBkQC5ubm1HqiIZGbTprDwy513wrvvQvfuYf/886Fly6Sjky8r6Y5b5wJT3b0jcBrwgJntE5O7T3L3PHfPa9++fdaDFEm7V1+Fiy8O3/Z/9asw/cO8eaFb6GWXKQnUd3HeEawHSt8kdoyOlfZDYCCAu//DzHKAdsCmGOMSkQzs3g2PPx6qfxYsgP32g0suCYu/HHFE0tFJbYrzjuAloJuZdTGzZsBwYE6ZMmuBbwOYWXcgB9gcY0wiUoVt28KH/2GHhXl/3n4bbrnl80nhlAQantjuCNy9xMxGAfOAxsAUd19uZjcAi919DvAT4B4zu4bQcHyRu3tcMYlIxVatCit//eEP8PHH0Lcv/Pa3IRk00YijBi3WP6+7P0HoElr62C9Lba8A+sYZg4hUzD1U++Tnw5//HFb7Gj48LP6Sl5d0dJItyvMiKbRjRxjsVVAAr70WBnuNHRsWgD/ooKSjk2xTIhBJkY0b4Y47woRvmzdDz55hMfhzz4UWLZKOTpKiRCCSAkuWhOqfGTOgpCQs+n711XDSSVr8RZQIRBqskhKYPTskgBdeCPP9X3556P7ZtWvS0UldokQg0sBs3QqTJ4cFX9auhS5dYPx4GDEirAQmUpYSgUgDUVgYpn6eOhWKi6F//9AYfPrpoTeQSEWUCETqMfew6Et+PsydC82awXnnhfr/Xr2Sjk7qCyUCkXqouDgs+zhhAqxYAR06wLhxYd6fDh2Sjk7qGyUCkXpk7zQPkybBBx+Eyd/uuw+GDYPmzZOOTuorJQKRemDhwlD9M2tWqA4aPDgs/tKvn7p/Ss1lnAjMbD93L44zGBH53K5d4YO/oAAWLQo9fkaPDstAdu6cdHTSkFQ5+6iZfdPMVgBvRvv/YWZ3xB6ZSEpt2QI33xy6fZ53XqgCuv32UC30u98pCUjty+SOYDzwXaIppN39VTM7IdaoRFJo+fLw7f+BB8JcQAMGhKkgTj0VGiW9hJQ0aBlVDbn7OvtiReTueMIRSZc9e0K3z4KC0A00JwcuuACuugp69Eg6OkmLTBLBOjP7JuBm1hS4Gngj3rBEGrbt20Nvn4ICeOutMOPnTTfBpZeGmUBFsimTRPD/gALCYvTrgb8CV8QZlEhDtWZNqO+fPBk++gj69AnTQQ8ZAk2bJh2dpFUmieBwd/9B6QNm1hd4IZ6QRBoWd3j++dD9c/bs0N1zyJDQA+i44xIOToTM1iy+LcNjIlLKzp2h4TcvD044AebPh+uug3fegYceUhKQuqPCOwIzOx74JtDezH5c6qn9CWsQi0g5Nm2Cu+6CO++Ed9+F7t3D/vnnQ8uWSUcnsq/KqoaaAa2iMq1LHd8GDIkzKJH6aOnS0Pg7bRp8+mno9jl6NJxyikb/St1WYSJw92eAZ8xsqrv/M4sxidQbu3eHRd8LCsIi8PvtB5dcEhZ/OeKIpKMTyUwmjcXFZnYrcBSQs/egu58cW1Qiddy2bTBlSpj98513oFMnuOWWkAQOPDDp6ESqJ5NE8EdgBjCI0JX0QmBznEGJ1FWrVsFtt8Ef/gAffwx9+4YEMHgwNNEUjlJPZfJPt62732tmV5eqLnop7sBE6gr30OOnoCBUAzVuDMOHh8Vf8vKSjk6k5jJJBLuinxvN7HvABuDf4gtJpG7YsSM0/Obnw7JlYcTv2LFhAfiDDko6OpHak0kiuNHM2gA/IYwf2B8YHWdQIknauBHuuCNM+LZ5M/TsCffeC+eeCy1aJB2dSO2rMhG4++PR5kfASfDZyGKRBmXx4lD9M2MGlJSERd+vvhpOOkndP6Vhq2xAWWNgKGGOoSfd/XUzGwT8HGgB9M5OiCLxKSkJ0z7k58MLL0CrVqHq58oroWvXpKMTyY7K7gjuBToBLwITzGwDkAeMcffZWYhNJDZbt4aJ326/HdauDYvAjB8PI0aElcBE0qSyRJAHHO3ue8wsB3gXONTdt2QnNJHaV1gY+v5PnQrFxdC/f9gfNCj0BhJJo8omnfvU3fcAuPsOYHV1k4CZDTSzQjNbZWZjKigz1MxWmNlyM5tWnfOLZMId/vpXOO20MNp38mQYOhReeSV0Cz3zTCUBSbfK7giOMLPXom0DDo32DXB3P7qyE0dtDBOBU4Ai4CUzm+PuK0qV6QZcD/R1961m9pUaXIvIFxQXh9k/J0yAFSugQwcYNw4uuyxsi0hQWSLoXsNz9wFWuftqADN7CDgTWFGqzKXARHffCuDum2r4niIUFcHEiTBpUlj4vXdvuP/+cBfQvHnS0YnUPZVNOlfTieYOBtaV2i8CvlGmzGEAZvYCYWrr/3b3J8ueyMxGAiMBcnNzaxiWNFQLF4beP7NmheqgwYPD7J/9+qn7p0hlkp4dpQnQDegPdASeNbOe7v5h6ULuPgmYBJCXl+dZjlHqsF27wgd/QQEsWhR6/IweDaNGQefOSUcnUj/EmQjWE7qf7tUxOlZaEbDI3XcB75jZSkJi0FxGUqktW0LVz8SJsH49dOsWuoJeeGEYCyAimctkqUrMrIWZHV7Nc78EdDOzLmbWDBgOzClTZjbhbgAza0eoKlpdzfeRFFm+HEaOhI4d4ec/D6t/Pf44vPkm/OhHSgIiX0aVicDMTgeWAk9G+73MrOwH+j7cvQQYBcwD3gBmuvtyM7vBzM6Iis0DtpjZCmA+cK3GKUhZe/bAX/4C3/kO9OgRegJdcEGYCO6pp+B734NGGX2lEZHymHvlVe5mtgQ4GVjg7r2jY8vcvWcW4ttHXl6eL168OIm3lizbvj0M/JowAd56K8z4OWoUXHppmAlURDJnZkvcvdyJ0zOahtrdP7IvdrtQg63EZs2aUN8/eTJ89BH06ROmgx4yBJo2TTo6kYYnk0Sw3MzOAxpHA8CuAv4v3rAkbdzh+edD98/Zs0N3zyFDQg+g445LODiRBi6TmtUrCesV7wSmEaajHh1jTJIiO3eGwV55eXDCCWHKh+uuC+sAP/SQkoBINmRyR3CEu48FxsYdjKTHpk1w111hAZj33gu9f+66KzQC77df0tGJpEsmieB/zeyrwCxghru/HnNM0oAtXRoGf02bBp9+CqeeGqp/TjlFo39FkpLJCmUnRYlgKHC3me1PSAg3xh6dNAi7d4dF3wsKYMGC8I3/kkvC4i9HHJF0dCKSUe9rd3/X3ScA/48wpuCXcQYlDcO2baHxt1s3OOssWL0abrnl80nhlARE6oYq7wjMrDswDDgH2ALMICxkL1KuVavgtttgypQwFqBv35AABg+GJknPbiUi+8jkv+UUwof/d919Q8zxSD3lHnr8FBSEaqAmTWDYsLD4e165Q1hEpK7IpI3g+GwEIvXTjh2h4Tc/P0z50K4djB0bFoA/6KCkoxORTFSYCMxsprsPNbNlfHEkcUYrlEnDtnFj6Pp5113w/vvQsyfcey+cey60aJF0dCJSHZXdEVwd/RyUjUCkfli8OFT/zJgBJSVw+umh+uekk9T9U6S+qrDXkLtvjDavcPd/ln4AV2QnPKkLSkrC4i/9+sHXvx6mgLj8cli5Eh57DE4+WUlApD7LpPvoKeUcO7W2A5G6Z+tWuPVWOPRQ+P73YcMGGD8+dP8sKICuXZOOUERqQ2VtBJcTvvkfYmavlXqqNfBC3IFJcgoLw9TPU6dCcTH07x/2Bw2Cxo2Tjk5EaltlbQTTgLnAzcCYUsc/dvcPYo1Kss49LPKSnw9z50KzZnDeeaH+v1evpKMTkThVlgjc3deY2Y/KPmFm/6Zk0DAUF4cVvwoK4I03oEMHGDcOLrssbItIw1fVHcEgYAmh+2jp5kAHDokxLonZ3mkeJk2CDz6A3r3DdNBDh0Lz5klHJyLZVGEicPdB0c8u2QtH4rZwYaj+mTUrVAcNHhxm/+zXTz1/RNIqk7mG+gJL3f1fZnY+cAyQ7+5rY49OasWuXeGDPz8fXnwR2rQJH/6jRkHnzgkHJyKJy6T76J1AsZn9B2GyubeBB2KNSmrFli1w883QpUto+N26NawFXFQEv/udkoCIBJlMOlfi7m5mZwK3u/u9ZvbDuAOTL2/58tD4+8ADYS6gAQPg7rvDIjCNMpp4XETSJJNE8LGZXQ9cAHzLzBoBTeMNS6prz57Q7TM/H/72N8jJCcs+XnUV9OiRdHQiUpdlkgiGAecBF7v7u2aWC9wab1iSqe3bw8CvCRPgrbfCjJ833QSXXhpmAhURqUom01C/a2Z/BL5uZoOAF939/vhDk8qsWRPq+ydPho8+gj59wnTQQ4ZAU92viUg1VFljbGZDgReB7xPWLV5kZkPiDkz25Q7PPQfnnBPm/8nPh4ED4R//gEWLwhTQSgIiUl2ZVA2NBb7u7psAzKw98DdgVpyByed27gzTPhcUwMsvw4EHwnXXwRVXQKdOSUcnIvVdJomg0d4kENlChoveS81s2hQWfrnjDnjvPejePexfcAHst1/S0YlIQ5FJInjSzOYB06P9YcAT8YUkS5eGb//TpsGnn4Zun6NHwymnaPSviNS+TBqLrzWzs4F+0aFJ7v5ovGGlz+7dYdH3ggJYsCB847/kErjySjjiiKSjE5GGrLL1CLoBvwMOBZYBP3X39dkKLC22bYMpU0L3z3fegdxcuOWWkAQOPDDp6EQkDSqr658CPA6cQ5iB9LbqntzMBppZoZmtMrMxlZQ7x8zczPKq+x711apVYa7/gw+Ga64J/f8ffhjefhuuvVZJQESyp7Kqodbufk+0XWhmL1fnxGbWGJhIWOqyCHjJzOa4+4oy5VoDVwOLqnP++sgd5s8P3T4ffxyaNIFhw0JCyEtNChSRuqayRJBjZr35fB2CFqX33b2qxNAHWOXuqwHM7CHgTGBFmXL/A/wWuLaasdcbO3aEht/8fFi2LIz4HTs2LAB/0EFJRyciaVdZItgI/L7U/rul9h04uYpzHwysK7VfBHyjdAEzOwbo5O5/MbMKE4GZjQRGAuTm5lbxtnXHhg1w552hy+f770PPnnDvvWEm0JycpKMTEQkqW5jmpDjfOJq87vfARVWVdfdJwCSAvLw8jzOu2rB4cfj2P3MmlJTA6aeH6p+TTlL3TxGpezIZR/BlrQdKj3vtGB3bqzXQA1hg4dPxq8AcMzvD3RfHGFcsSkpg9uyQAF54AVq1CiN/R42Crl2Tjk5EpGJxJoKXgG5m1oWQAIYTZjEFwN0/Aj6bH9PMFhC6qNarJLB1a5j47fbbYe3asAjM+PEwYkRYCUxEpK6LLRG4e4mZjQLmAY2BKe6+3MxuABa7+5y43jsb3nwz9P2/7z4oLob+/cP+oEHQuHHS0YmIZC6TNYsN+AFwiLvfEK1H8FV3f7Gq17r7E5SZjsLdf1lB2f4ZRZwgd3jqqVD9M3cuNGsGP/hBWPylV6+koxMR+XIyuSO4A9hD6CV0A/Ax8Ajw9RjjqlOKi8OyjwUF8MYb0KEDjBsHl10WtkVE6rNMEsE33P0YM3sFwN23mlmzmOOqE9atg4kTYdKk0BbQuzfcfz8MHQrNmycdnYhI7cgkEeyKRgk7fLYewZ5Yo0rYa6+F5R5nzQrVQWedFbp/9uun7p8i0vBkkggmAI8CXzGzXwNDgF/EGlWC3MO0z9u3h6mfR42Czp2TjkpEJD6ZTEP9RzNbAnybML3EYHd/I/bIEvL++2FE8O9/HyaDExFp6DLpNZQLFAN/Ln3M3dfGGVhSCgvDz8MPTzYOEZFsyaRq6C+E9gEDcoAuQCFwVIxxJUaJQETSJpOqoZ6l96OJ4q6ILaKEFRaG8QFqFxCRtKj2IvTR9NPfqLJgPVVYGOYG0uhgEUmLTNoIflxqtxFwDLAhtogStnIldO+edBQiItmTyR1B61KP5oQ2gzPjDCopJSVhqUi1D4hImlR6RxANJGvt7j/NUjyJeucd2LVLiUBE0qXCOwIza+Luu4G+WYwnUXt7DB12WLJxiIhkU2V3BC8S2gOWmtkc4GHgX3ufdPc/xRxb1qnrqIikUSbjCHKALYTZR/eOJ3CgQSaCtm3DQ0QkLSpLBF+Jegy9zucJYK86v27wl1FYqLsBEUmfyhJBY6AVX0wAezXIRLByZZhwTkQkTSpLBBvd/YasRZKwbdvg3Xd1RyAi6VPZOIJUzbyvhmIRSavKEsG3sxZFHaCuoyKSVhUmAnf/IJuBJK2wEBo1gkMPTToSEZHsqvakcw1VYSF06aK1iEUkfZQIIuo6KiJppUQA7NkDb72lRCAi6aREABQVwSefKBGISDopEaCuoyKSbkoEKBGISLopERASQatW8NWvJh2JiEj2KRHweY8hS9VYahGRQIkAdR0VkXSLNRGY2UAzKzSzVWY2ppznf2xmK8zsNTP7u5n9e5zxlKe4GNauVSIQkfSKLRFE6x1PBE4FjgTONbMjyxR7Bchz96OBWcAtccVTkbfeCj+VCEQkreK8I+gDrHL31e7+KfAQcGbpAu4+392Lo92FQMcY4ynXypXhpxKBiKRVnIngYGBdqf2i6FhFfgjMLe8JMxtpZovNbPHmzZtrMcTPu45261arpxURqTfqRGOxmZ0P5AG3lve8u09y9zx3z2vfvn2tvndhIXTqBC1b1uppRUTqjUwWr/+y1gOdSu13jI59gZkNAMYCJ7r7zhjjKVdhodYgEJF0i/OO4CWgm5l1MbNmwHBgTukCZtYbuBs4w903xRhLudzVdVREJLZE4O4lwChgHvAGMNPdl5vZDWZ2RlTsVqAV8LCZLTWzORWcLhbvvRfWKlYiEJE0i7NqCHd/AniizLFfltoeEOf7V0VzDImI1JHG4qQoEYiIpDwRrFwJOTmQm5t0JCIiyUl1IigsDOMHGqX6tyAiaZfqj0D1GBIRSXEi+PRTWL1aYwhERFKbCFavht27dUcgIpLaRKAeQyIigRKBEoGIpFxqE8HKlfCVr8ABByQdiYhIslKbCNRjSEQkUCIQEUm5VCaCrVth82YlAhERSGki2NtQrDEEIiIpTwS6IxARSXEiaNIEDjkk6UhERJKX2kRwyCHQtGnSkYiIJC+ViWDlSlULiYjslbpEsHs3vPWWEoGIyF6pSwRr18LOnUoEIiJ7pS4RqMeQiMgXpTYRaAyBiEjQJOkAsq2wENq0CRPOiUi8du3aRVFRETt27Eg6lNTIycmhY8eONK1Gt8hUJoLDDwezpCMRafiKiopo3bo1nTt3xvSfLnbuzpYtWygqKqJLly4Zvy6VVUNqHxDJjh07dtC2bVslgSwxM9q2bVvtO7BUJYLt22H9eiUCkWxSEsiuL/P7TlUieOut8FOJQETkc6lKBOo6KpJOs2fPxsx48803Pzu2YMECBg0a9IVyF110EbNmzQJCQ/eYMWPo1q0bxxxzDMcffzxz586tcSw333wzXbt25fDDD2fevHnllnF3xo4dy2GHHUb37t2ZMGHCF+Lu1asXRx11FCeeeGKN44GUNRYXFoZG4q5dk45ERLJp+vTp9OvXj+nTpzNu3LiMXvNf//VfbNy4kddff53mzZvz3nvv8cwzz9QojhUrVvDQQw+xfPlyNmzYwIABA1i5ciWNGzf+QrmpU6eybt063nzzTRo1asSmTZsA+PDDD7niiit48sknyc3N/ex4TaUuEeTmQosWSUcikj6jR8PSpbV7zl69ID+/8jLbt2/n+eefZ/78+Zx++ukZJYLi4mLuuece3nnnHZo3bw5Ahw4dGDp0aI3ifeyxxxg+fDjNmzenS5cudO3alRdffJHjjz/+C+XuvPNOpk2bRqNGodLmK1F/92nTpnH22WeTm5v7heM1lbqqIVULiaTLY489xsCBAznssMNo27YtS5YsqfI1q1atIjc3l/3337/Kstdccw29evXa5/Gb3/xmn7Lr16+nU6dOn+137NiR9evX71Pu7bffZsaMGeTl5XHqqafyVtTAuXLlSrZu3Ur//v059thjuf/++6uMLxOpuSNwD7OO9u2bdCQi6VTVN/e4TJ8+nauvvhqA4cOHM336dI499tgKe9dUt9fN+PHjaxxjWTt37iQnJ4fFixfzpz/9iYsvvpjnnnuOkpISlixZwt///nc++eQTjj/+eI477jgOq+FUCbEmAjMbCBQAjYHJ7v6bMs83B+4HjgW2AMPcfU0csWzYELqP6o5AJD0++OADnn76aZYtW4aZsXv3bsyMW2+9lbZt27J169Z9yrdr146uXbuydu1atm3bVuVdwTXXXMP8+fP3OT58+HDGjBnzhWMHH3ww69at+2y/qKiIgw8+eJ/XduzYkbPPPhuAs846ixEjRnx2vG3btrRs2ZKWLVtywgkn8Oqrr9Y4EeDusTwIH/5vA4cAzYBXgSPLlLkCuCvaHg7MqOq8xx57rH8ZTz/tDu5PPfWlXi4iX8KKFSsSff+7777bR44c+YVjJ5xwgj/zzDO+Y8cO79y582cxrlmzxnNzc/3DDz90d/drr73WL7roIt+5c6e7u2/atMlnzpxZo3hef/11P/roo33Hjh2+evVq79Kli5eUlOxT7mc/+5nfe++97u4+f/58z8vLc/fw+zz55JN9165d/q9//cuPOuooX7Zs2T6vL+/3Diz2Cj5X42wj6AOscvfV7v4p8BBwZpkyZwL3RduzgG9bTKNP1HVUJH2mT5/OWWed9YVj55xzDtOnT6d58+Y8+OCDjBgxgl69ejFkyBAmT55MmzZtALjxxhtp3749Rx55JD169GDQoEEZtRlU5qijjmLo0KEceeSRDBw4kIkTJ37WY+i0005jw4YNAIwZM4ZHHnmEnj17cv311zN58mQAunfvzsCBAzn66KPp06cPl1xyCT169KhRTAAWEkXtM7MhwEB3vyTavwD4hruPKlXm9ahMUbT/dlTm/TLnGgmMBMjNzT32n//8Z7Xjeewx+MMf4E9/gkapaiIXSc4bb7xB9+7dkw4jdcr7vZvZEnfPK698vfhIdPdJ7p7n7nnt27f/Uuc480yYPVtJQESkrDg/FtcDnUrtd4yOlVvGzJoAbQiNxiIikiVxJoKXgG5m1sXMmhEag+eUKTMHuDDaHgI87XHVVYlIIvRfOru+zO87tkTg7iXAKGAe8AYw092Xm9kNZnZGVOxeoK2ZrQJ+DIwp/2wiUh/l5OSwZcsWJYMs8Wg9gpycnGq9LrbG4rjk5eX54sWLkw5DRDKgFcqyr6IVyiprLE7NyGIRyb6mTZtWa6UsSYb60IiIpJwSgYhIyikRiIikXL1rLDazzUD1hxYH7YD3qyzVsOia00HXnA41ueZ/d/dyR+TWu0RQE2a2uKJW84ZK15wOuuZ0iOuaVTUkIpJySgQiIimXtkQwKekAEqBrTgddczrEcs2paiMQEZF9pe2OQEREylAiEBFJuQaZCMxsoJkVmtkqM9tnRlMza25mM6LnF5lZ5wTCrFUZXPOPzWyFmb1mZn83s39PIs7aVNU1lyp3jpm5mdX7roaZXLOZDY3+1svNbFq2Y6xtGfzbzjWz+Wb2SvTv+7Qk4qwtZjbFzDZFKziW97yZ2YTo9/GamR1T4zetaDHj+voAGgNvA4cAzYBXgSPLlLkCuCvaHg7MSDruLFzzScB+0fblabjmqFxr4FlgIZCXdNxZ+Dt3A14BDoz2v5J03Fm45knA5dH2kcCapOOu4TWfABwDvF7B86cBcwEDjgMW1fQ9G+IdQR9glbuvdvdPgYeAM8uUORO4L9qeBXzbzCyLMda2Kq/Z3ee7e3G0u5CwYlx9lsnfGeB/gN8CDWEe5Eyu+VJgortvBXD3TVmOsbZlcs0O7F1Vvg2wIYvx1Tp3fxb4oJIiZwL3e7AQOMDMvlaT92yIieBgYF2p/aLoWLllPCyg8xHQNivRxSOTay7th4RvFPVZldcc3TJ3cve/ZDOwGGXydz4MOMzMXjCzhWY2MGvRxSOTa/5v4HwzKwKeAK7MTmiJqe7/9yppPYKUMbPzgTzgxKRjiZOZNQJ+D1yUcCjZ1oRQPdSfcNf3rJn1dPcPkwwqZucCU939f83seOABM+vh7nuSDqy+aIh3BOuBTqX2O0bHyi1jZk0It5NbshJdPDK5ZsxsADAWOMPdd2YptrhUdc2tgR7AAjNbQ6hLnVPPG4wz+TsXAXPcfZe7vwOsJCSG+iqTa/4hMBPA3f8B5BAmZ2uoMvr/Xh0NMRG8BHQzsy5m1ozQGDynTJk5wIXR9hDgaY9aYeqpKq/ZzHoDdxOSQH2vN4YqrtndP3L3du7e2d07E9pFznD3+rzOaSb/tmcT7gYws3aEqqLVWYyxtmVyzWuBbwOYWXdCItic1Sizaw7wn1HvoeOAj9x9Y01O2OCqhty9xMxGAfMIPQ6muPtyM7sBWOzuc4B7CbePqwiNMsOTi7jmMrzmW4FWwMNRu/hadz8jsaBrKMNrblAyvOZ5wHfMbAWwG7jW3evt3W6G1/wT4B4zu4bQcHxRff5iZ2bTCcm8XdTu8SugKYC730VoBzkNWAUUAyNq/J71+PclIiK1oCFWDYmISDUoEYiIpJwSgYhIyikRiIiknBKBiEjKKRFInWRmu81saalH50rKbq+F95tqZu9E7/VyNEK1uueYbGZHRts/L/Pc/9U0xug8e38vr5vZn83sgCrK96rvs3FK/NR9VOokM9vu7q1qu2wl55gKPO7us8zsO8Dv3P3oGpyvxjFVdV4zuw9Y6e6/rqT8RYRZV0fVdizScOiOQOoFM2sVraPwspktM7N9Zho1s6+Z2bOlvjF/Kzr+HTP7R/Tah82sqg/oZ4Gu0Wt/HJ3rdTMbHR1raWZ/MbNXo+PDouMLzCzPzH4DtIji+GP03Pbo50Nm9r1SMU81syFm1tjMbjWzl6I55i/L4NfyD6LJxsysT3SNr5jZ/5nZ4dFI3BuAYVEsw6LYp5jZi1HZ8mZslbRJeu5tPfQo70EYFbs0ejxKGAW/f/RcO8Koyr13tNujnz8BxkbbjQnzDbUjfLC3jI7/DPhlOe83FRgSbX8fWAQcCywDWhJGZS8HegPnAPeUem2b6OcCojUP9sZUqszeGM8C7ou2mxFmkWwBjAR+ER1vDiwGupQT5/ZS1/cwMDDa3x9oEm0PAB6Jti8Cbi/1+puA86PtAwhzEbVM+u+tR7KPBjfFhDQYn7h7r707ZtYUuMnMTgD2EL4JdwDeLfWal4ApUdnZ7r7UzE4kLFbyQjS1RjPCN+ny3GpmvyDMU/NDwvw1j7r7v6IY/gR8C3gS+F8z+y2hOum5alzXXKDAzJoDA4Fn3f2TqDrqaDMbEpVrQ5gs7p0yr29hZkuj638DeKpU+fvMrBthmoWmFbz/d4AzzOyn0X4OkBudS1JKiUDqix8A7YFj3X2XhRlFc0oXcPdno0TxPWCqmf0e2Ao85e7nZvAe17r7rL07Zvbt8gq5+0oLax2cBtxoZn939xsyuQh332FmC4DvAsMIC61AWG3qSnefV8UpPnH3Xma2H2H+nR8BEwgL8Mx397OihvUFFbzegHPcvTCTeCUd1EYg9UUbYFOUBE4C9llz2cI6zO+5+z3AZMJyfwuBvma2t86/pZkdluF7PgcMNrP9zKwloVrnOTM7CCh29wcJk/mVt2bsrujOpDwzCBOF7b27gPChfvne15jZYdF7lsvDanNXAT+xz6dS3zsV8UWlin5MqCLbax5wpUW3RxZmpZWUUyKQ+uKPQJ6ZLQP+E3iznDL9gVfN7BXCt+0Cd99M+GCcbmavEaqFjsjkDd39ZULbwYuENoPJ7v4K0BN4Maqi+RVwYzkvnwS8trexuIy/EhYG+puH5RchJK4VwMsWFi2/myru2KNYXiMszHILcHN07aVfNx84cm9jMeHOoWkU2/JoX1JO3UdFRFJOdwQiIimnRCAiknJKBCIiKadEICKSckoEIiIpp0QgIpJySgQiIin3/wHXo1tTCI8mXQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title('ROC')\n", + "plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)\n", + "plt.legend(loc = 'lower right')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SK Example 2: Telemanom" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "125/125 [==============================] - 1s 7ms/step - loss: 0.0134 - val_loss: 0.0051\n" + ] + } + ], + "source": [ + "transformer = TelemanomSKI(l_s= 2, n_predictions= 1)\n", + "transformer.fit(X_train)\n", + "prediction_labels_train = transformer.predict(X_train)\n", + "prediction_labels_test = transformer.predict(X_test)\n", + "prediction_score = transformer.predict_score(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction Labels\n", + " [[1]\n", + " [1]\n", + " [1]\n", + " ...\n", + " [1]\n", + " [1]\n", + " [1]]\n", + "Prediction Score\n", + " [[1]\n", + " [1]\n", + " [1]\n", + " ...\n", + " [1]\n", + " [1]\n", + " [1]]\n" + ] + } + ], + "source": [ + "print(\"Prediction Labels\\n\", prediction_labels_test)\n", + "print(\"Prediction Score\\n\", prediction_score)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "y_true = prediction_labels_train\n", + "y_pred = prediction_labels_test\n", + "precision, recall, thresholds = precision_recall_curve(y_true, y_pred)\n", + "f1_scores = 2*recall*precision/(recall+precision)\n", + "fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)\n", + "roc_auc = metrics.auc(fpr, tpr)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy Score: 0.18055416624987497\n" + ] + } + ], + "source": [ + "print('Accuracy Score: ', accuracy_score(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 958, 8039],\n", + " [ 153, 847]])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "confusion_matrix(y_true, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.86 0.11 0.19 8997\n", + " 1 0.10 0.85 0.17 1000\n", + "\n", + " accuracy 0.18 9997\n", + " macro avg 0.48 0.48 0.18 9997\n", + "weighted avg 0.79 0.18 0.19 9997\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_true, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best threshold: 0\n", + "Best F1-Score: 0.18186778212239701\n" + ] + } + ], + "source": [ + "print('Best threshold: ', thresholds[np.argmax(f1_scores)])\n", + "print('Best F1-Score: ', np.max(f1_scores))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAq0UlEQVR4nO3dd5xU1fnH8c8jIhjEBlgiEoggiERBNiqaGBULEsQkGhRbiEaiWFAI+RELYjf2hgUsGAuKqIANLHQUqUrXrA0WNCAoUkQpz++Pc1fHZcssu3fuzs73/Xrti7l3zsw8d4F99txznnPM3RERkdy1TdIBiIhIspQIRERynBKBiEiOUyIQEclxSgQiIjlOiUBEJMcpEYiI5DglApFSmNmnZvatma0xsy/MbLCZ7ZDy/GFmNsbMVpvZKjN7ycxaFnmPHc3sLjNbFL3PR9Fx/cxfkciWlAhEynaiu+8AtAbaAP8CMLN2wOvACODnQBPgfWCymf0yarMd8BawP9AB2BFoB6wADs7oVYiUwFRZLFIyM/sU+Ju7vxkd3wLs7+6/N7OJwBx371HkNa8By939bDP7G3ADsI+7r8lw+CJpUY9AJE1m1hA4Acg3s58BhwHPFdN0KHBs9PgYYJSSgFRlSgQiZRtuZquBxcAy4GpgV8L/n8+Laf85UHj/v14JbUSqDCUCkbL9wd3rAkcCLQg/5L8CNgN7FtN+T+DL6PGKEtqIVBlKBCJpcvfxwGDgNndfC7wD/LmYpl0IA8QAbwLHm1mdjAQpshWUCETK5y7gWDM7EOgL/MXMLjGzuma2i5ldT5gVdE3U/gnCLaXnzayFmW1jZvXM7HIz65jIFYgUoUQgUg7uvhz4D9DP3ScBxwN/IowDfEaYXvobd/9v1P47woDxQuAN4BtgKuH20rsZvwCRYmj6qIhIjlOPQEQkxykRiIjkOCUCEZEcp0QgIpLjtk06gPKqX7++N27cOOkwRESyyowZM7509wbFPZd1iaBx48ZMnz496TBERLKKmX1W0nO6NSQikuOUCEREcpwSgYhIjlMiEBHJcUoEIiI5LrZEYGaPmtkyM5tbwvNmZveYWb6ZzTazg+KKRUREShZnj2AwYbPukpwANIu+ugMPxBiLiIiUILZE4O4TgJWlNDkJ+I8HU4CdzUw7OYmIFLFxI/TpA4sXx/P+SY4R7EXYsKNQQXRuC2bW3cymm9n05cuXZyQ4EZGqYP16OPlkuO02eOWVeD4jKwaL3X2gu+e5e16DBsVWSIuIVDurV0PHjjByJNx3H5x/fjyfk+QSE0uAvVOOG0bnRERy3ooVcMIJMHMmPPEEnHlmfJ+VZI9gJHB2NHvoUGCVu3+eYDwiIlXCkiVwxBEweza8+GK8SQBi7BGY2RDgSKC+mRUAVwM1Adz9QeBVoCOQD6wD/hpXLCIi2eKjj+CYY+DLL2HUKDjyyPg/M7ZE4O5dy3jegQvj+nwRkWwzZw4cdxxs2ABjx0JeXmY+NysGi0VEqrspU+B3v4MaNWDChMwlAVAiEBFJ3BtvQPv2sOuuMGkStGyZ2c9XIhARSdALL0CnTrDPPiEJJLEBoxKBiEhCHnsM/vxnaNsWxo+HPfZIJg4lAhGRBNx5J5xzTpgh9MYbsMsuycWiRCAikkHu0K8f9OoVlo4YORLq1Ek2pqzbvF5EJFtt3gw9e4blIs45Bx56CLatAj+F1SMQEcmADRvgL38JSaB3b3j44aqRBEA9AhGR2K1fD6eeGm4DXX89XH45mCUd1Y+UCEREYrR6NXTuDOPGhd7AhVVwPQUlAhGRmHz5ZVhBdNYsePJJOOOMpCMqnhKBiEgMliwJ6wZ99FFYQfTEE5OOqGRKBCIilSw/H449NuwpkKkVRCtCiUBEpBLNnh16Ahs3wpgxmV08bmtp+qiISCV5552wgui222Z+BdGKUCIQEakEb7wRlouoXz+ZFUQrQolARKSCnn8efv97aNoUJk5MZgXRilAiEBGpgEcfhS5dwm2gceOSW0G0IpQIRES20h13wLnnVo0VRCtCiUBEpJzc4aqrwppBp5xSNVYQrQhNHxURKYfUFUTPPTesIFqjRtJRVYx6BCIiaSq6guigQdmfBEA9AhGRtKxfHwaFX3oJbrgB/vWvqrWCaEUoEYiIlOGbb+Ckk8K+wgMGQI8eSUdUuXRrSESkBO7wwguhOGzixLCCaHVLAqBEICJSrE8+CSuGnnzyj9XCp5+edFTxUCIQEUnx/fdw002w//7hVtAdd8D06XDooUlHFh+NEYiIRMaPhwsugAULQk/grrugYcOko4qfegQikvOWLQvTQo88MswOeuUVGDYsN5IAKBGISA7bvBkGDoQWLWDIkLCp/Ny50LFj0pFllm4NiUhOev/9cBuocA+BBx6A/fZLOqpkxNojMLMOZvaBmeWbWd9inm9kZmPNbJaZzTazHMvDIpJpa9bAP/4BbduGLSUffxzGjs3dJAAxJgIzqwEMAE4AWgJdzazoVg1XAkPdvQ1wGnB/XPGISG5zD5vI77cf3H57WCdo4UI4++zqUyG8teLsERwM5Lv7x+7+PfAMcFKRNg7sGD3eCVgaYzwikqMKawL+9CfYdVd4++2wWNyuuyYdWdUQZyLYC1icclwQnUvVHzjTzAqAV4GLi3sjM+tuZtPNbPry5cvjiFVEqqHUmoBx40JPYMYMaNcu6ciqlqRnDXUFBrt7Q6Aj8ISZbRGTuw909zx3z2vQoEHGgxSR7DNhArRuHWYCnXBCqA3o1StsLC8/FWciWALsnXLcMDqX6lxgKIC7vwPUBurHGJOIVHPLl8Nf/xpmAn37Lbz8cthTeO+9y35troozEUwDmplZEzPbjjAYPLJIm0VAewAz24+QCHTvR0TKbfNmePhhaN4cnnoqLBM9b17YVF5KF1snyd03mtlFwGigBvCou88zs2uB6e4+EugNDDKzywgDx93c3eOKSUSqp9mz4fzzQ03AEUeEmoCWRecoSolivVvm7q8SBoFTz/VLeTwfODzOGESk+lqzBvr3D2sC7bILDB6s6aBbQ8MmIpJ13GH4cLjkEigogPPOg5tv1nTQrZX0rCERkXL59FPo3DnUBOyyC0yeHNYLUhLYekoEIpIVvv8e/v3vcO9/7Fi47bZQE3DYYUlHlv10a0hEqrwJE8ICcfPnwx//CHffremglUk9AhGpslJrAtauhZdeCnsIKwlULiUCEalyCmsCWrQIG8b37Rt6A506JR1Z9aRbQyJSpcyZE2oC3n4bfvvbUBOw//5JR1W9qUcgIlXCmjXQpw+0aQMffgiPPRb2EFYSiJ96BCKSuBEj4OKLYfFi+NvfQk1AvXpJR5U71CMQkcR89lmoCfjDH2DnnWHSJBg0SEkg09JOBGb2szgDEZHcsWHDjzUBY8b8WBNwuBacSUSZicDMDjOz+cDC6PhAM9OWkiKyVSZODOMAffvCcceF2UC9e0PNmklHlrvS6RHcCRwPrABw9/eBI+IMSkSqny+/hHPOCauDrlkDI0eGPYQbNUo6Mknr1pC7Ly5yalMMsYhINbR5MzzySNgn4Ikn4P/+L+wTcOKJSUcmhdKZNbTYzA4D3MxqAj2BBfGGJSLVwZw5YWmIyZNVE1CVpdMjOB+4kLDx/BKgNdAjxphEJMutXQv//CccdBAsXKiagKounR5Bc3c/I/WEmR0OTI4nJBHJZiNGhH0CFi1STUC2SKdHcG+a50Qkh332GZx0UqgJ2HFH1QRkkxJ7BGbWDjgMaGBmvVKe2pGwB7GICBs2wJ13wjXXhONbboFLL9V00GxS2q2h7YAdojZ1U85/A5wSZ1Aikh0mTQqDwXPnht7APfdoOmg2KjERuPt4YLyZDXb3zzIYk4hUcV9+GaaBPvpo+ME/YkRYKkKyUzqDxevM7FZgf6B24Ul3Pzq2qESkStq8GQYPDjOCVq0KyeCqq6BOnaQjk4pIZ7D4KcLyEk2Aa4BPgWkxxiQiVdDcuWGnsHPPhf32g1mzwowgJYHsl04iqOfujwAb3H28u58DqDcgkiPWrg2/+bdpAwsWhNtB48dDq1ZJRyaVJZ1bQxuiPz83s98DS4Fd4wtJRKqKkSPDPgGLFoV1gv79b6hfP+mopLKlkwiuN7OdgN6E+oEdgUvjDEpEkrVoUSgKGzEiVANPnAi/+U3SUUlcykwE7v5y9HAVcBT8UFksItXMhg1w113Qv384Vk1AbiitoKwG0IWwxtAod59rZp2Ay4HtgTaZCVFEMmHy5LBpfGFNwN13wy9+kXRUkgml9QgeAfYGpgL3mNlSIA/o6+7DMxCbiGTAihVhMPiRR2DvvWH48JAIJHeUlgjygAPcfbOZ1Qa+APZx9xWZCU1E4uQeagL69Ak1AX36QL9+sMMOSUcmmVba9NHv3X0zgLuvBz4ubxIwsw5m9oGZ5ZtZ3xLadDGz+WY2z8yeLs/7i8jWmTcv1ASccw60aAEzZ4bxACWB3FRaj6CFmc2OHhuwT3RsgLv7AaW9cTTGMAA4FigAppnZSHefn9KmGfAv4HB3/8rMdqvAtYhIGdauheuug9tvDyuEPvIIdOsG26S1V6FUV6Ulgv0q+N4HA/nu/jGAmT0DnATMT2lzHjDA3b8CcPdlFfxMESnBSy+FmoDPPlNNgPxUaYvOVXShub2A1L2OC4BDirTZF8DMJhOWtu7v7qOKvpGZdQe6AzTS0oYi5bJoEfTsGQaBVRMgxUm6Q7gt0Aw4EugKDDKznYs2cveB7p7n7nkNGjTIbIQiWWrDBrjtNmjZEkaPDj2AWbOUBGRL6VQWb60lhOmnhRpG51IVAO+6+wbgEzP7kJAYtKidSAW8/XaoCZgzB048MewT0Lhx0lFJVZVWj8DMtjez5uV872lAMzNrYmbbAacBI4u0GU7oDWBm9Qm3ij4u5+eISGTFCjjvPDj8cPj663A7aORIJQEpXZmJwMxOBN4DRkXHrc2s6A/0Lbj7RuAiYDSwABjq7vPM7FozK9zCYjSwwszmA2OBPqpTECm/wpqAFi3gscdCTcD8+SoMk/SYu5fewGwGYdnpce7eJjo3x91/lYH4tpCXl+fTp09P4qNFqqR588J2kRMnhp7AAw/ArxL53ylVmZnNcPe84p5L59bQBndfVeRc6dlDRGK3bh3861/QunVIBg8/DBMmKAlI+aUzWDzPzE4HakQFYJcAb8cbloiU5uWXQ03Ap5+GgrBbbgFNqJOtlU6P4GLCfsXfAU8TlqO+NMaYRKQEixfDn/4UZgL97Gdhp7DHHlMSkIpJp0fQwt2vAK6IOxgRKd6GDWEK6NVXhw3kb74ZLrsMttsu6cikOkgnEdxuZnsAw4Bn3X1uzDGJSIrUmoBOneDeezUdVCpXmbeG3P0ows5ky4GHzGyOmV0Ze2QiOW7lSujePcwE+uorePFF1QRIPNIqKHP3L9z9HuB8Qk1BvziDEsll7vD449C8OTz6KPzjH7BgAfzhD2CWdHRSHaVTULafmfU3szmEzevfJiwXISKVbP58OOqoMBOoWbOwT8Ctt2qfAIlXOmMEjwLPAse7+9KY4xHJSevWwfXXhx/6devCoEFhqWjtEyCZUGYicPd2mQhEJFe98gpcdJFqAiQ5JSYCMxvq7l2iW0KplcRp7VAmIqVbvDjsE/Dii2Gp6PHj4Ygjko5KclFpPYKe0Z+dMhGISK7YuDHUBPTrF2oCbroJevVSTYAkp8Q7kO7+efSwh7t/lvoF9MhMeCLVyzvvQF4e9O4NRx4Z1gjq21dJQJKVzlDUscWcO6GyAxGpzlauhL//HQ47LOwZ8MILYQ/hJk2Sjkyk9DGCCwi/+f/SzGanPFUXmBx3YCLVgTs88USoBVi5MvQE+vfXdFCpWkobI3gaeA24Ceibcn61u6+MNSqRamDBgrBPwPjx0K4dPPggHKApFlIFlXZryN39U+BCYHXKF2a2a/yhiWSndevgiivgwANh9mwYOBAmTVISkKqrrB5BJ2AGYfpoanG7A7+MMS6RrPTqq6Em4JNP4C9/CTUBu+2WdFQipSsxEbh7p+hPDWeJlKGgAC69FJ5/HvbbD8aNg9/9LumoRNKTzlpDh5tZnejxmWZ2h5k1ij80kapv40a4887ww/+VV+DGG+G995QEJLukM330AWCdmR0I9AY+Ap6INSqRLDBlSqgJ6NUrVATPnx/2EFZNgGSbdBLBRnd34CTgPncfQJhCKpKTitYEPP982ENYNQGSrdJJBKvN7F/AWcArZrYNUDPesESqnsKagBYt4JFHwlaR8+eHPYS1T4Bks3QSwamEjevPcfcvCHsR3BprVCJVzMKFcPTRcPbZsM8+MGMG3H57WDJaJNuls1XlF8BTwE5m1glY7+7/iT0ykSrg22/hyitDDcD778NDD8HkyaFGQKS6SGfWUBdgKvBnoAvwrpmdEndgIkl79VXYf3+44Qbo2jX0Crp312YxUv2ks0PZFcCv3X0ZgJk1AN4EhsUZmEhSUmsCWrSAsWPDSqEi1VU6v9tsU5gEIivSfJ1IVtm4Ee6668eagBtuCLeDlASkukunRzDKzEYDQ6LjU4FX4wtJJPPefRfOPz8Ug51wAtx3H/xSi6hIjkhnz+I+ZvYn4DfRqYHu/mK8YYlkxldfweWXh0Hgn/8chg3TdFDJPaXtR9AMuA3YB5gD/MPdl2QqMJE4ucNTT4X9AVasCGMC11yj6aCSm0q71/8o8DJwMmEF0nvL++Zm1sHMPjCzfDPrW0q7k83MzSyvvJ8hUl4LF0L79nDWWaEaePp0uOMOJQHJXaXdGqrr7oOixx+Y2czyvLGZ1QAGELa6LACmmdlId59fpF1doCfwbnneX6S8vv02DADfcgvUqRM2ijnvPE0HFSktEdQ2szb8uA/B9qnH7l5WYjgYyHf3jwHM7BnCekXzi7S7Dvg30KecsYukbdQouPBC+Pjj0BO47TbtEyBSqLRE8DlwR8rxFynHDhxdxnvvBSxOOS4ADkltYGYHAXu7+ytmVmIiMLPuQHeARo20Arakb8mScP9/2DBo3hzGjIGjjko6KpGqpbSNaWL97xItXncH0K2stu4+EBgIkJeX53HGJdXDxo0wYEBYHmLjRrj++rCBfK1aSUcmUvWkU0ewtZYAe6ccN4zOFaoLtALGWZirtwcw0sw6u/v0GOOSak41ASLlE+cw2TSgmZk1MbPtgNOAkYVPuvsqd6/v7o3dvTEwBVASkK321VdwwQXQrh0sWxZuB73yipKASFliSwTuvhG4CBgNLACGuvs8M7vWzDrH9bmSewprAlq0gIEDoWfPMEX05JNVGCaSjjJvDVm4b3MG8Et3vzbar3gPd59a1mvd/VWKLEfh7v1KaHtkWhGLpPjgA+jRIwwCH3xwmB3Upk3SUYlkl3R6BPcD7YCu0fFqQn2ASGK+/RauuirsEzBzJjzwALz9tpKAyNZIZ7D4EHc/yMxmAbj7V9E9f5FEpNYEnHlmqAnYffekoxLJXun0CDZEVcIOP+xHsDnWqESKsWQJdOkSZgLVrAlvvRX2EFYSEKmYdBLBPcCLwG5mdgMwCbgx1qhEUmzcCHffHfYJeOkluO66sE/A0WWVNIpIWtJZhvopM5sBtCcsL/EHd18Qe2QiwNSpoSZg1izo0CHUBOyzT9JRiVQv6exZ3AhYB7xEqANYG50Tic3XX4fZQIceCv/7HwwdGvYQVhIQqXzpDBa/QhgfMKA20AT4ANg/xrgkR7nD00+HfQKWL4dLLoFrr4Udd0w6MpHqK51bQ79KPY4WiusRW0SSs4rWBLz2mqaDimRCuSuLo+WnDymzoUiavv0W+vULNQEzZqgmQCTT0qks7pVyuA1wELA0togkp4weHWoCPvoIzjgDbr9d00FFMi2dHkHdlK9ahDGDk+IMSqq/pUvh1FPDTKAaNeDNN+HJJ5UERJJQao8gKiSr6+7/yFA8Us1t2vTjPgHffx8Ggv/5T+0TIJKkEhOBmW3r7hvN7PBMBiTV17RpoSZg5kw4/vhQE9C0adJRiUhpPYKphPGA98xsJPAcsLbwSXd/IebYpJr4+mu44oowCLzHHqEm4JRTtES0SFWRTh1BbWAFYY/iwnoCB5QIpFTuMGQI9OoVagIuvjgsD6GaAJGqpbREsFs0Y2guPyaAQto3WEr14YehJuCtt+DXvw5VwQcdlHRUIlKc0hJBDWAHfpoACikRSLHWr4ebboKbb4bttw8Dw3//e5gZJCJVU2mJ4HN3vzZjkUjWe/31UBOQnw+nnx5qAvbYI+moRKQspdURaChP0rJ0KZx2WpgJtM02oSbgqaeUBESyRWmJoH3GopCstGkT3Htv2DR++PBQEzB7NrTXvxyRrFLirSF3X5nJQCS7TJ8eagJmzIDjjgtjAaoJEMlO5V50TnLbqlVw0UVhddClS+GZZ8IewkoCItlLiUDSUlgT0KJFKAy76CJYsCCsF6TCMJHslk5BmeS4//431AS8+Sbk5cHLL0PbtklHJSKVRT0CKdH69dC/P7RqFfYOHjAApkxREhCpbtQjkGKpJkAkd6hHID/x+efQtWuoCTCDN95QTYBIdadEIECoCbjvvjAY/OKL4ZbQ7NlwzDFJRyYicdOtIflJTcCxx4axgGbNko5KRDJFPYIctmpVWBr64INhyZJQEzB6tJKASK6JNRGYWQcz+8DM8s2sbzHP9zKz+WY228zeMrNfxBmPBO7hh36LFnD//aEmYOFC1QSI5KrYEkG03/EA4ASgJdDVzFoWaTYLyHP3A4BhwC1xxSPBf/8bBoK7doW99oJ334V77oGddko6MhFJSpw9goOBfHf/2N2/B54BTkpt4O5j3X1ddDgFaBhjPDlt/Xq45hr41a/CD//77gt/5uUlHZmIJC3OweK9gMUpxwXAIaW0Pxd4rbgnzKw70B2gUaNGlRVfznjzzVAZ/N//huWi77gD9twz6ahEpKqoEoPFZnYmkAfcWtzz7j7Q3fPcPa9BgwaZDS6LffFFKAY79tgwLvD662G9ICUBEUkVZyJYAuydctwwOvcTZnYMcAXQ2d2/izGenLFpU5gC2rw5PP98qAmYMyckBBGRouK8NTQNaGZmTQgJ4DTg9NQGZtYGeAjo4O7LYowlZ8yYEWoCpk8PxWD336/poCJSuth6BO6+EbgIGA0sAIa6+zwzu9bMOkfNbgV2AJ4zs/fMbGRc8VR3q1bBJZeEmoCCgnAL6PXXlQREpGyxVha7+6vAq0XO9Ut5rAUMKsgdhg6Fyy4LYwIXXgjXX6/poCKSPi0xkcXy88MP/tdfD0tDjxyp6aAiUn5VYtaQlM9334WN4lu1CvsD3HuvagJEZOupR5BlVBMgIpVNPYIs8cUXcMYZYQro5s1hcTjVBIhIZVAiqOI2bQpTQFu0gGHD4OqrYe5cOO64pCMTkepCt4aqsJkzQ03AtGmhJmDAANh336SjEpHqRj2CKuibb6BnT/j1r2HRInj66TAzSElAROKgHkEV4g7PPQeXXhrGBHr0CDUBO++cdGQiUp0pEVQRqTUBBx0EI0aEHoGISNx0ayhh330H110XagLeeSdsEjN1qpKAiGSOegQJGjMGLrgAPvwwbBN5xx3w858nHZWI5Br1CBLwv//BmWdC+/Zheujo0WEPYSUBEUmCEkEGbdoEDzwQ9gl47jno1y/sE6CaABFJkm4NZUhqTUD79qFITNNBRaQqUI8gZt98E6aD/vrX8Nln8NRT8MYbSgIiUnWoRxAT97AkxKWXwuefh0HhG25QTYCIVD3qEcTgo4+gY0fo0gV23z0sFT1ggJKAiFRNSgSV6LvvQiVwq1YweTLcfXeoCTj44KQjExEpmW4NVZIxY8KSEB98EHoCd96p6aAikh3UI6ig//0PzjorzATasAFGjYJnn1USEJHsoUSwlTZvhgcfDPsEPPssXHVV2Cfg+OOTjkxEpHx0a2grzJoVagKmToWjjw41Ac2bJx2VSNWzYcMGCgoKWL9+fdKh5IzatWvTsGFDatasmfZrlAjKYfXqUA18zz1Qvz48+SScfjqYJR2ZSNVUUFBA3bp1ady4Mab/KLFzd1asWEFBQQFNmjRJ+3W6NZSGwpqAFi3CTKC//x0WLgx7COvftkjJ1q9fT7169ZQEMsTMqFevXrl7YEoEZSisCfjzn2G33UJNwP33wy67JB2ZSHZQEsisrfl+KxGU4LvvQiVwq1YwaRLcdVdYJ0g1ASJS3SgRFGPsWDjwQLjySjjxxHAbqGdP2FYjKiJZafjw4ZgZCxcu/OHcuHHj6NSp00/adevWjWHDhgFhoLtv3740a9aMgw46iHbt2vHaa69VOJabbrqJpk2b0rx5c0aPHl1q20suuYQddtjhh+NFixZx1FFH0aZNGw444ABeffXVCscDSgQ/UVgTcPTRoSbgtddg6FDYa6+kIxORihgyZAi/+c1vGDJkSNqvueqqq/j888+ZO3cuM2fOZPjw4axevbpCccyfP59nnnmGefPmMWrUKHr06MGmTZuKbTt9+nS++uqrn5y7/vrr6dKlC7NmzeKZZ56hR48eFYqnkH7HJdQEDBoEffvC2rWhJ3D55bD99klHJlJ9XHopvPde5b5n69bhtm1p1qxZw6RJkxg7diwnnngi11xzTZnvu27dOgYNGsQnn3xCrVq1ANh9993p0qVLheIdMWIEp512GrVq1aJJkyY0bdqUqVOn0q5du5+027RpE3369OHpp5/mxRdf/OG8mfHNN98AsGrVKn5eSZWrOZ8I3nsv1AS8+y4cdVQYCG7RIumoRKSyjBgxgg4dOrDvvvtSr149ZsyYQdu2bUt9TX5+Po0aNWLHHXcs8/0vu+wyxo4du8X50047jb59+/7k3JIlSzj00EN/OG7YsCFLlizZ4rX33XcfnTt3Zs899/zJ+f79+3Pcccdx7733snbtWt58880y40tHziaC1avh6qvDdFDVBIjEr6zf3OMyZMgQevbsCYQfzkOGDKFt27Ylzq4p76ybO++8s8Ixplq6dCnPPfcc48aN2+K5IUOG0K1bN3r37s0777zDWWedxdy5c9lmm4rd5Y81EZhZB+BuoAbwsLvfXOT5WsB/gLbACuBUd/80zpjc4YUXwuDv0qWhJuDGGzUdVKQ6WrlyJWPGjGHOnDmYGZs2bcLMuPXWW6lXr94W9+BXrlxJ/fr1adq0KYsWLeKbb74ps1dQnh7BXnvtxeLFi384LigoYK8ig5CzZs0iPz+fpk2bAuE2VdOmTcnPz+eRRx5h1KhRALRr147169fz5Zdfsttuu6X/TSmOu8fyRfjh/xHwS2A74H2gZZE2PYAHo8enAc+W9b5t27b1rfXRR+4nnOAO7q1bu0+ZstVvJSJpmD9/fqKf/9BDD3n37t1/cu6II47w8ePH+/r1671x48Y/xPjpp596o0aN/Ouvv3Z39z59+ni3bt38u+++c3f3ZcuW+dChQysUz9y5c/2AAw7w9evX+8cff+xNmjTxjRs3lvqaOnXq/PC4Q4cO/thjj7l7+N7uueeevnnz5i1eU9z3HZjuJfxcjXPW0MFAvrt/7O7fA88AJxVpcxLwePR4GNDeYqo+eewx2H9/mDgxLBE9bRocckgcnyQiVcWQIUP44x//+JNzJ598MkOGDKFWrVo8+eST/PWvf6V169accsopPPzww+y0005AmKHToEEDWrZsSatWrejUqVNaYwal2X///enSpQstW7akQ4cODBgwgBo1agDQsWNHli5dWurrb7/9dgYNGsSBBx5I165dGTx4cKUU7FlIFJXPzE4BOrj736Ljs4BD3P2ilDZzozYF0fFHUZsvi7xXd6A7QKNGjdp+9tln5Y5n4sSwRtBdd2k6qEimLFiwgP322y/pMHJOcd93M5vh7nnFtc+KwWJ3HwgMBMjLy9uqzPXb34YvERH5qThvDS0B9k45bhidK7aNmW0L7EQYNBYRkQyJMxFMA5qZWRMz244wGDyySJuRwF+ix6cAYzyue1Uikgj9l86srfl+x5YI3H0jcBEwGlgADHX3eWZ2rZl1jpo9AtQzs3ygF9C3+HcTkWxUu3ZtVqxYoWSQIR7tR1C7du1yvS62weK45OXl+fTp05MOQ0TSoB3KMq+kHcqyfrBYRLJTzZo1y7VTliRDq4+KiOQ4JQIRkRynRCAikuOybrDYzJYD5S8tDuoDX5bZqnrRNecGXXNuqMg1/8LdGxT3RNYlgoows+kljZpXV7rm3KBrzg1xXbNuDYmI5DglAhGRHJdriWBg0gEkQNecG3TNuSGWa86pMQIREdlSrvUIRESkCCUCEZEcVy0TgZl1MLMPzCzfzLZY0dTMapnZs9Hz75pZ4wTCrFRpXHMvM5tvZrPN7C0z+0UScVamsq45pd3JZuZmlvVTDdO5ZjPrEv1dzzOzpzMdY2VL4992IzMba2azon/fHZOIs7KY2aNmtizawbG4583M7om+H7PN7KAKf2hJmxln6xdQA/gI+CWwHfA+0LJImx7Ag9Hj04Bnk447A9d8FPCz6PEFuXDNUbu6wARgCpCXdNwZ+HtuBswCdomOd0s67gxc80DgguhxS+DTpOOu4DUfARwEzC3h+Y7Aa4ABhwLvVvQzq2OP4GAg390/dvfvgWeAk4q0OQl4PHo8DGhvlbEDdHLKvGZ3H+vu66LDKYQd47JZOn/PANcB/waqwzrI6VzzecAAd/8KwN2XZTjGypbONTtQuKv8TkDpO8BXce4+AVhZSpOTgP94MAXY2cz2rMhnVsdEsBewOOW4IDpXbBsPG+isAuplJLp4pHPNqc4l/EaRzcq85qjLvLe7v5LJwGKUzt/zvsC+ZjbZzKaYWYeMRRePdK65P3CmmRUArwIXZya0xJT3/3uZtB9BjjGzM4E84HdJxxInM9sGuAPolnAombYt4fbQkYRe3wQz+5W7f51kUDHrCgx299vNrB3whJm1cvfNSQeWLapjj2AJsHfKccPoXLFtzGxbQndyRUaii0c614yZHQNcAXR29+8yFFtcyrrmukArYJyZfUq4lzoyyweM0/l7LgBGuvsGd/8E+JCQGLJVOtd8LjAUwN3fAWoTFmerrtL6/14e1TERTAOamVkTM9uOMBg8skibkcBfosenAGM8GoXJUmVes5m1AR4iJIFsv28MZVyzu69y9/ru3tjdGxPGRTq7ezbvc5rOv+3hhN4AZlafcKvo4wzGWNnSueZFQHsAM9uPkAiWZzTKzBoJnB3NHjoUWOXun1fkDavdrSF332hmFwGjCTMOHnX3eWZ2LTDd3UcCjxC6j/mEQZnTkou44tK85luBHYDnonHxRe7eObGgKyjNa65W0rzm0cBxZjYf2AT0cfes7e2mec29gUFmdhlh4LhbNv9iZ2ZDCMm8fjTucTVQE8DdHySMg3QE8oF1wF8r/JlZ/P0SEZFKUB1vDYmISDkoEYiI5DglAhGRHKdEICKS45QIRERynBKBVElmtsnM3kv5alxK2zWV8HmDzeyT6LNmRhWq5X2Ph82sZfT48iLPvV3RGKP3Kfy+zDWzl8xs5zLat8721Tglfpo+KlWSma1x9x0qu20p7zEYeNndh5nZccBt7n5ABd6vwjGV9b5m9jjwobvfUEr7boRVVy+q7Fik+lCPQLKCme0Q7aMw08zmmNkWK42a2Z5mNiHlN+bfRuePM7N3otc+Z2Zl/YCeADSNXtsreq+5ZnZpdK6Omb1iZu9H50+Nzo8zszwzuxnYPorjqei5NdGfz5jZ71NiHmxmp5hZDTO71cymRWvM/z2Nb8s7RIuNmdnB0TXOMrO3zax5VIl7LXBqFMupUeyPmtnUqG1xK7ZKrkl67W196au4L0JV7HvR14uEKvgdo+fqE6oqC3u0a6I/ewNXRI9rENYbqk/4wV4nOv9/QL9iPm8wcEr0+M/Au0BbYA5Qh1CVPQ9oA5wMDEp57U7Rn+OI9jwojCmlTWGMfwQejx5vR1hFcnugO3BldL4WMB1oUkyca1Ku7zmgQ3S8I7Bt9PgY4PnocTfgvpTX3wicGT3embAWUZ2k/771lexXtVtiQqqNb929deGBmdUEbjSzI4DNhN+Edwe+SHnNNODRqO1wd3/PzH5H2KxkcrS0xnaE36SLc6uZXUlYp+Zcwvo1L7r72iiGF4DfAqOA283s34TbSRPLcV2vAXebWS2gAzDB3b+NbkcdYGanRO12IiwW90mR129vZu9F178AeCOl/eNm1oywzELNEj7/OKCzmf0jOq4NNIreS3KUEoFkizOABkBbd99gYUXR2qkN3H1ClCh+Dww2szuAr4A33L1rGp/Rx92HFR6YWfviGrn7hxb2OugIXG9mb7n7telchLuvN7NxwPHAqYSNViDsNnWxu48u4y2+dffWZvYzwvo7FwL3EDbgGevuf4wG1seV8HoDTnb3D9KJV3KDxggkW+wELIuSwFHAFnsuW9iH+X/uPgh4mLDd3xTgcDMrvOdfx8z2TfMzJwJ/MLOfmVkdwm2diWb2c2Cduz9JWMyvuD1jN0Q9k+I8S1gorLB3AeGH+gWFrzGzfaPPLJaH3eYuAXrbj0upFy5F3C2l6WrCLbJCo4GLLeoeWViVVnKcEoFki6eAPDObA5wNLCymzZHA+2Y2i/Db9t3uvpzwg3GImc0m3BZqkc4HuvtMwtjBVMKYwcPuPgv4FTA1ukVzNXB9MS8fCMwuHCwu4nXCxkBveth+EULimg/MtLBp+UOU0WOPYplN2JjlFuCm6NpTXzcWaFk4WEzoOdSMYpsXHUuO0/RREZEcpx6BiEiOUyIQEclxSgQiIjlOiUBEJMcpEYiI5DglAhGRHKdEICKS4/4ff8mafMc7RFwAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.title('ROC')\n", + "plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)\n", + "plt.legend(loc = 'lower right')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pipline Example: AutoEncoder" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'inputs.0'" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Creating pipeline\n", + "pipeline_description = Pipeline()\n", + "pipeline_description.add_input(name='inputs')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.dataset_to_dataframe', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.dataset_to_dataframe' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 0: dataset_to_dataframe\n", + "step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))\n", + "step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')\n", + "step_0.add_output('produce')\n", + "pipeline_description.add_step(step_0)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.column_parser', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.column_parser' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 1: column_parser\n", + "step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))\n", + "step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')\n", + "step_1.add_output('produce')\n", + "pipeline_description.add_step(step_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.extract_columns_by_semantic_types', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.extract_columns_by_semantic_types' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 2: extract_columns_by_semantic_types(attributes)\n", + "step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))\n", + "step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')\n", + "step_2.add_output('produce')\n", + "step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,\n", + "\t\t\t\t\t\t\t data=['https://metadata.datadrivendiscovery.org/types/Attribute'])\n", + "pipeline_description.add_step(step_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 3: extract_columns_by_semantic_types(targets)\n", + "step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))\n", + "step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')\n", + "step_3.add_output('produce')\n", + "step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,\n", + "\t\t\t\t\t\t\tdata=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])\n", + "pipeline_description.add_step(step_3)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "attributes = 'steps.2.produce'\n", + "targets = 'steps.3.produce'" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.feature_analysis.statistical_maximum', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.feature_analysis.statistical_maximum' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 4: processing\n", + "step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum'))\n", + "step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)\n", + "step_4.add_output('produce')\n", + "pipeline_description.add_step(step_4)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.detection_algorithm.pyod_ae', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.detection_algorithm.pyod_ae' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 5: algorithm`\n", + "step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae'))\n", + "step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')\n", + "step_5.add_output('produce')\n", + "pipeline_description.add_step(step_5)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "While loading primitive 'tods.data_processing.construct_predictions', an error has been detected: (scikit-learn 0.22.2.post1 (/Users/wangyanghe/anaconda3/envs/tods2/lib/python3.6/site-packages), Requirement.parse('scikit-learn==0.22.0'))\n", + "Attempting to load primitive 'tods.data_processing.construct_predictions' without checking requirements.\n" + ] + } + ], + "source": [ + "# Step 6: Predictions\n", + "step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))\n", + "step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')\n", + "step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')\n", + "step_6.add_output('produce')\n", + "pipeline_description.add_step(step_6)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'outputs.0'" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Final Output\n", + "pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"id\": \"44caca5f-ed2a-42d6-bede-777fd96e5a90\", \"schema\": \"https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json\", \"created\": \"2021-06-29T04:06:32.108192Z\", \"inputs\": [{\"name\": \"inputs\"}], \"outputs\": [{\"data\": \"steps.6.produce\", \"name\": \"output predictions\"}], \"steps\": [{\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"c78138d9-9377-31dc-aee8-83d9df049c60\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.dataset_to_dataframe\", \"name\": \"Extract a DataFrame from a Dataset\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"inputs.0\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"81235c29-aeb9-3828-911a-1b25319b6998\", \"version\": \"0.6.0\", \"python_path\": \"d3m.primitives.tods.data_processing.column_parser\", \"name\": \"Parses strings into their types\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/Attribute\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/TrueTarget\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"f07ce875-bbc7-36c5-9cc1-ba4bfb7cf48e\", \"version\": \"0.1.0\", \"python_path\": \"d3m.primitives.tods.feature_analysis.statistical_maximum\", \"name\": \"Time Series Decompostional\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.2.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"67e7fcdf-d645-3417-9aa4-85cd369487d9\", \"version\": \"0.0.1\", \"python_path\": \"d3m.primitives.tods.detection_algorithm.pyod_ae\", \"name\": \"TODS.anomaly_detection_primitives.AutoEncoder\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.4.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"2530840a-07d4-3874-b7d8-9eb5e4ae2bf3\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.construct_predictions\", \"name\": \"Construct pipeline predictions output\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.5.produce\"}, \"reference\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}], \"digest\": \"3f4eb364201fc5fc403cc66c847ce1597fe8b0a91d130bded4d21c2a6ef2eef2\"}\n" + ] + } + ], + "source": [ + "# Output to json\n", + "data = pipeline_description.to_json()\n", + "with open('autoencoder_pipeline.json', 'w') as f:\n", + " f.write(data)\n", + " print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "this_path = os.path.dirname(os.path.abspath(\"__file__\"))\n", + "default_data_path = os.path.join(this_path, '../../datasets/anomaly/raw_data/yahoo_sub_5.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "_StoreAction(option_strings=['--pipeline_path'], dest='pipeline_path', nargs=None, const=None, default='/Users/wangyanghe/Desktop/Research/tods/examples/Demo Notebook/autoencoder_pipeline.json', type=None, choices=None, help='Input the path of the pre-built pipeline description', metavar=None)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.')\n", + "parser.add_argument('--table_path', type=str, default=default_data_path,\n", + " help='Input the path of the input data table')\n", + "parser.add_argument('--target_index', type=int, default=6,\n", + " help='Index of the ground truth (for evaluation)')\n", + "parser.add_argument('--metric',type=str, default='F1_MACRO',\n", + " help='Evaluation Metric (F1, F1_MACRO)')\n", + "parser.add_argument('--pipeline_path', \n", + " default=os.path.join(this_path, 'autoencoder_pipeline.json'),\n", + " help='Input the path of the pre-built pipeline description')" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "args, unknown = parser.parse_known_args()\n", + "table_path = args.table_path \n", + "target_index = args.target_index # what column is the target\n", + "pipeline_path = args.pipeline_path\n", + "metric = args.metric # F1 on both label 0 and 1" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# Read data and generate dataset\n", + "df = pd.read_csv(table_path)\n", + "dataset = generate_dataset(df, target_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the default pipeline\n", + "pipeline = load_pipeline(pipeline_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_2\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_2 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_2 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_3 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_3 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_4 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_4 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_5 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_5 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_6 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_6 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_7 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.8796 - val_loss: 1.4306\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.7280 - val_loss: 1.3324\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.6184 - val_loss: 1.2660\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.5448 - val_loss: 1.2157\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.4950 - val_loss: 1.1736\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.4282 - val_loss: 1.1391\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3967 - val_loss: 1.1090\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3643 - val_loss: 1.0819\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.3212 - val_loss: 1.0579\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.2965 - val_loss: 1.0358\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2677 - val_loss: 1.0152\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2449 - val_loss: 0.9960\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2246 - val_loss: 0.9778\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2096 - val_loss: 0.9606\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1837 - val_loss: 0.9444\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1703 - val_loss: 0.9288\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1430 - val_loss: 0.9140\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1249 - val_loss: 0.8997\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1178 - val_loss: 0.8861\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0976 - val_loss: 0.8732\n", + "{'method_called': 'evaluate',\n", + " 'outputs': \"[{'outputs.0': d3mIndex anomaly\"\n", + " '0 0 1'\n", + " '1 1 0'\n", + " '2 2 0'\n", + " '3 3 1'\n", + " '4 4 0'\n", + " '... ... ...'\n", + " '1395 1395 1'\n", + " '1396 1396 0'\n", + " '1397 1397 1'\n", + " '1398 1398 1'\n", + " '1399 1399 1'\n", + " ''\n", + " \"[1400 rows x 2 columns]}, {'outputs.0': d3mIndex anomaly\"\n", + " '0 0 1'\n", + " '1 1 0'\n", + " '2 2 0'\n", + " '3 3 1'\n", + " '4 4 0'\n", + " '... ... ...'\n", + " '1395 1395 1'\n", + " '1396 1396 0'\n", + " '1397 1397 1'\n", + " '1398 1398 1'\n", + " '1399 1399 1'\n", + " ''\n", + " '[1400 rows x 2 columns]}]',\n", + " 'pipeline': '',\n", + " 'scores': ' metric value normalized randomSeed fold'\n", + " '0 F1_MACRO 0.509059 0.509059 0 0',\n", + " 'status': 'COMPLETED'}\n" + ] + } + ], + "source": [ + "# Run the pipeline\n", + "pipeline_result = evaluate_pipeline(dataset, pipeline, metric)\n", + "print(pipeline_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Searcher Example:" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "table_path = '../../datasets/anomaly/raw_data/yahoo_sub_5.csv'\n", + "target_index = 6 # column of the target label\n", + "time_limit = 30 # How many seconds you wanna search" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "metric = 'F1_MACRO' # F1 on both label 0 and 1" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "# Read data and generate dataset and problem\n", + "df = pd.read_csv(table_path)\n", + "dataset = generate_dataset(df, target_index=target_index)\n", + "problem_description = generate_problem(dataset, metric)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# Start backend\n", + "backend = SimpleRunner(random_seed=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "# Start search algorithm\n", + "search = BruteForceSearch(problem_description=problem_description,\n", + " backend=backend)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_3\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_8 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_7 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_9 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_8 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_10 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_9 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_11 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_10 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_12 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_11 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_13 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.4187 - val_loss: 1.0009\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2895 - val_loss: 0.9167\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.2010 - val_loss: 0.8517\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1463 - val_loss: 0.7988\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0777 - val_loss: 0.7531\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0281 - val_loss: 0.7135\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9993 - val_loss: 0.6791\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9634 - val_loss: 0.6496\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9320 - val_loss: 0.6239\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8982 - val_loss: 0.6019\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8760 - val_loss: 0.5825\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8527 - val_loss: 0.5652\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8399 - val_loss: 0.5510\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8218 - val_loss: 0.5378\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8096 - val_loss: 0.5263\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7945 - val_loss: 0.5162\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7836 - val_loss: 0.5069\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7713 - val_loss: 0.4988\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7561 - val_loss: 0.4908\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.7538 - val_loss: 0.4840\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_4\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_14 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_12 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_15 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_13 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_16 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_14 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_17 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_15 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_18 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_16 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_19 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.4226 - val_loss: 1.0312\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3035 - val_loss: 0.9579\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2140 - val_loss: 0.9087\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1662 - val_loss: 0.8710\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1229 - val_loss: 0.8401\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0874 - val_loss: 0.8141\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0573 - val_loss: 0.7913\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0292 - val_loss: 0.7709\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0038 - val_loss: 0.7525\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9837 - val_loss: 0.7353\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9654 - val_loss: 0.7190\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9444 - val_loss: 0.7040\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9280 - val_loss: 0.6898\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9117 - val_loss: 0.6762\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8950 - val_loss: 0.6634\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8789 - val_loss: 0.6515\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8663 - val_loss: 0.6400\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8541 - val_loss: 0.6290\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8419 - val_loss: 0.6187\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8293 - val_loss: 0.6088\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_5\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_20 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_17 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_21 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_18 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_22 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_19 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_23 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_20 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_24 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_21 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_25 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.5037 - val_loss: 0.9548\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3846 - val_loss: 0.8790\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2879 - val_loss: 0.8227\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2069 - val_loss: 0.7754\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1539 - val_loss: 0.7350\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1092 - val_loss: 0.6988\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0573 - val_loss: 0.6661\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0250 - val_loss: 0.6363\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9841 - val_loss: 0.6097\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9457 - val_loss: 0.5857\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9316 - val_loss: 0.5643\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9055 - val_loss: 0.5456\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8854 - val_loss: 0.5292\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8633 - val_loss: 0.5146\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8412 - val_loss: 0.5022\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8335 - val_loss: 0.4911\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8192 - val_loss: 0.4814\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8071 - val_loss: 0.4726\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.7888 - val_loss: 0.4646\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.7846 - val_loss: 0.4576\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_6\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_26 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_22 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_27 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_23 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_28 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_24 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_29 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_25 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_30 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_26 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_31 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 6ms/step - loss: 1.5385 - val_loss: 0.9377\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.4001 - val_loss: 0.8741\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3243 - val_loss: 0.8293\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2663 - val_loss: 0.7954\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2048 - val_loss: 0.7677\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1459 - val_loss: 0.7439\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1224 - val_loss: 0.7230\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0832 - val_loss: 0.7042\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0554 - val_loss: 0.6868\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0283 - val_loss: 0.6708\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0062 - val_loss: 0.6558\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9902 - val_loss: 0.6417\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9738 - val_loss: 0.6284\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9461 - val_loss: 0.6158\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9323 - val_loss: 0.6038\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9126 - val_loss: 0.5925\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9007 - val_loss: 0.5817\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8846 - val_loss: 0.5715\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8657 - val_loss: 0.5617\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8551 - val_loss: 0.5524\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_7\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_32 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_27 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_33 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_28 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_34 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_29 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_35 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_30 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_36 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_31 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_37 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 5ms/step - loss: 1.4187 - val_loss: 1.0796\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.2828 - val_loss: 0.9882\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1966 - val_loss: 0.9252\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1363 - val_loss: 0.8790\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0851 - val_loss: 0.8430\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0490 - val_loss: 0.8141\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0228 - val_loss: 0.7893\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9927 - val_loss: 0.7679\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9690 - val_loss: 0.7490\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9507 - val_loss: 0.7316\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9298 - val_loss: 0.7158\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9132 - val_loss: 0.7011\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8952 - val_loss: 0.6873\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8818 - val_loss: 0.6743\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8682 - val_loss: 0.6620\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8537 - val_loss: 0.6504\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8404 - val_loss: 0.6394\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8283 - val_loss: 0.6289\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8162 - val_loss: 0.6190\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8059 - val_loss: 0.6095\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n", + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_8\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_38 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_32 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_39 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_33 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_40 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_34 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_41 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_35 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_42 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_36 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_43 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.5237 - val_loss: 1.0177\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3755 - val_loss: 0.9350\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2795 - val_loss: 0.8795\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2211 - val_loss: 0.8374\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1686 - val_loss: 0.8039\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.1298 - val_loss: 0.7758\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0982 - val_loss: 0.7514\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0670 - val_loss: 0.7298\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0382 - val_loss: 0.7106\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0145 - val_loss: 0.6931\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9928 - val_loss: 0.6770\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9743 - val_loss: 0.6621\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9573 - val_loss: 0.6483\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9370 - val_loss: 0.6353\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9207 - val_loss: 0.6231\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9041 - val_loss: 0.6116\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8930 - val_loss: 0.6007\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8765 - val_loss: 0.5904\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8633 - val_loss: 0.5806\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8528 - val_loss: 0.5713\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/wangyanghe/Desktop/Research/tods/tods/searcher/brute_force_search.py\", line 62, in _search\n", + " for error in pipeline_result.error:\n", + "TypeError: 'NoneType' object is not iterable\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_9\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_44 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_37 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_45 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_38 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_46 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_39 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_47 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_40 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_48 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_41 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_49 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.5013 - val_loss: 1.5361\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.3749 - val_loss: 1.4108\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2565 - val_loss: 1.3262\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1685 - val_loss: 1.2589\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1140 - val_loss: 1.2080\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0896 - val_loss: 1.1662\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0621 - val_loss: 1.1308\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0299 - val_loss: 1.0962\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9957 - val_loss: 1.0679\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9738 - val_loss: 1.0435\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9496 - val_loss: 1.0196\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9224 - val_loss: 1.0000\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9094 - val_loss: 0.9790\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8959 - val_loss: 0.9610\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8703 - val_loss: 0.9438\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8584 - val_loss: 0.9280\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8485 - val_loss: 0.9134\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8315 - val_loss: 0.8994\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8147 - val_loss: 0.8818\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8000 - val_loss: 0.8617\n" + ] + } + ], + "source": [ + "# Find the best pipeline\n", + "best_runtime, best_pipeline_result = search.search_fit(input_data=[dataset], time_limit=time_limit)\n", + "best_pipeline = best_runtime.pipeline\n", + "best_output = best_pipeline_result.output" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Not all provided hyper-parameters for the data preparation pipeline 79ce71bd-db96-494b-a455-14f2e2ac5040 were used: ['method', 'number_of_folds', 'randomSeed', 'shuffle', 'stratified']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"sequential_10\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_50 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_42 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_51 (Dense) (None, 12) 156 \n", + "_________________________________________________________________\n", + "dropout_43 (Dropout) (None, 12) 0 \n", + "_________________________________________________________________\n", + "dense_52 (Dense) (None, 1) 13 \n", + "_________________________________________________________________\n", + "dropout_44 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_53 (Dense) (None, 4) 8 \n", + "_________________________________________________________________\n", + "dropout_45 (Dropout) (None, 4) 0 \n", + "_________________________________________________________________\n", + "dense_54 (Dense) (None, 1) 5 \n", + "_________________________________________________________________\n", + "dropout_46 (Dropout) (None, 1) 0 \n", + "_________________________________________________________________\n", + "dense_55 (Dense) (None, 12) 24 \n", + "=================================================================\n", + "Total params: 362\n", + "Trainable params: 362\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "None\n", + "Epoch 1/20\n", + "40/40 [==============================] - 0s 4ms/step - loss: 1.3728 - val_loss: 2.2095\n", + "Epoch 2/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.2507 - val_loss: 2.0598\n", + "Epoch 3/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.1434 - val_loss: 1.9599\n", + "Epoch 4/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 1.0993 - val_loss: 1.8894\n", + "Epoch 5/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0486 - val_loss: 1.8368\n", + "Epoch 6/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 1.0115 - val_loss: 1.7958\n", + "Epoch 7/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9806 - val_loss: 1.7621\n", + "Epoch 8/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.9481 - val_loss: 1.7337\n", + "Epoch 9/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.9264 - val_loss: 1.6992\n", + "Epoch 10/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8929 - val_loss: 1.6732\n", + "Epoch 11/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8834 - val_loss: 1.6493\n", + "Epoch 12/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8608 - val_loss: 1.6288\n", + "Epoch 13/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.8382 - val_loss: 1.6080\n", + "Epoch 14/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8256 - val_loss: 1.5866\n", + "Epoch 15/20\n", + "40/40 [==============================] - 0s 1ms/step - loss: 0.8124 - val_loss: 1.5684\n", + "Epoch 16/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7965 - val_loss: 1.5524\n", + "Epoch 17/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7840 - val_loss: 1.5353\n", + "Epoch 18/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7678 - val_loss: 1.5211\n", + "Epoch 19/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7594 - val_loss: 1.5052\n", + "Epoch 20/20\n", + "40/40 [==============================] - 0s 2ms/step - loss: 0.7455 - val_loss: 1.4914\n" + ] + } + ], + "source": [ + "# Evaluate the best pipeline\n", + "best_scores = search.evaluate(best_pipeline).scores" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Search History:\n", + "----------------------------------------------------\n", + "Pipeline id: f6665410-4d1d-4695-9f00-5d5f457ef95d\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.708549 0.708549 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: 34ae48fe-fb5c-4dbd-940b-00098300cb9f\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.616695 0.616695 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: fc287cdb-2958-4117-8e20-ba7645caa23c\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.55474 0.55474 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: e510c088-369b-4b04-8b25-a320b4a86530\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.531302 0.531302 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: b42e188a-ea92-4dc0-b7d3-8983b0e659e9\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.509059 0.509059 0 0\n", + "----------------------------------------------------\n", + "Pipeline id: 5e641e81-9e0e-46f3-b487-c37ccd1b9573\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.483604 0.483604 0 0\n" + ] + } + ], + "source": [ + "print('Search History:')\n", + "for pipeline_result in search.history:\n", + " print('-' * 52)\n", + " print('Pipeline id:', pipeline_result.pipeline.id)\n", + " print(pipeline_result.scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best pipeline:\n", + "----------------------------------------------------\n", + "Pipeline id: f6665410-4d1d-4695-9f00-5d5f457ef95d\n", + "Pipeline json: {\"id\": \"f6665410-4d1d-4695-9f00-5d5f457ef95d\", \"schema\": \"https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json\", \"created\": \"2021-06-29T04:06:53.685353Z\", \"inputs\": [{\"name\": \"inputs\"}], \"outputs\": [{\"data\": \"steps.7.produce\", \"name\": \"output predictions\"}], \"steps\": [{\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"c78138d9-9377-31dc-aee8-83d9df049c60\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.dataset_to_dataframe\", \"name\": \"Extract a DataFrame from a Dataset\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"inputs.0\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"81235c29-aeb9-3828-911a-1b25319b6998\", \"version\": \"0.6.0\", \"python_path\": \"d3m.primitives.tods.data_processing.column_parser\", \"name\": \"Parses strings into their types\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/Attribute\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"a996cd89-ddf0-367f-8e7f-8c013cbc2891\", \"version\": \"0.4.0\", \"python_path\": \"d3m.primitives.tods.data_processing.extract_columns_by_semantic_types\", \"name\": \"Extracts columns by semantic type\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.0.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"semantic_types\": {\"type\": \"VALUE\", \"data\": [\"https://metadata.datadrivendiscovery.org/types/TrueTarget\"]}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"642de2e7-5590-3cab-9266-2a53c326c461\", \"version\": \"0.0.1\", \"python_path\": \"d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler\", \"name\": \"Axis_wise_scale\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.2.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"30bc7cec-2ccc-34bc-9df8-2095bf3b1ae2\", \"version\": \"0.1.0\", \"python_path\": \"d3m.primitives.tods.feature_analysis.statistical_mean\", \"name\": \"Time Series Decompostional\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.4.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"67e7fcdf-d645-3417-9aa4-85cd369487d9\", \"version\": \"0.0.1\", \"python_path\": \"d3m.primitives.tods.detection_algorithm.pyod_ae\", \"name\": \"TODS.anomaly_detection_primitives.AutoEncoder\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.5.produce\"}}, \"outputs\": [{\"id\": \"produce\"}], \"hyperparams\": {\"contamination\": {\"type\": \"VALUE\", \"data\": 0.01}}}, {\"type\": \"PRIMITIVE\", \"primitive\": {\"id\": \"2530840a-07d4-3874-b7d8-9eb5e4ae2bf3\", \"version\": \"0.3.0\", \"python_path\": \"d3m.primitives.tods.data_processing.construct_predictions\", \"name\": \"Construct pipeline predictions output\"}, \"arguments\": {\"inputs\": {\"type\": \"CONTAINER\", \"data\": \"steps.6.produce\"}, \"reference\": {\"type\": \"CONTAINER\", \"data\": \"steps.1.produce\"}}, \"outputs\": [{\"id\": \"produce\"}]}], \"digest\": \"af9c67055b7d50ecf3dce829af31051781b757f648d138836d827b6dfb699e8e\"}\n", + "Output:\n", + " d3mIndex anomaly\n", + "0 0 0\n", + "1 1 0\n", + "2 2 0\n", + "3 3 0\n", + "4 4 0\n", + "... ... ...\n", + "1395 1395 0\n", + "1396 1396 0\n", + "1397 1397 1\n", + "1398 1398 1\n", + "1399 1399 0\n", + "\n", + "[1400 rows x 2 columns]\n", + "Scores:\n", + " metric value normalized randomSeed fold\n", + "0 F1_MACRO 0.708549 0.708549 0 0\n" + ] + } + ], + "source": [ + "print('Best pipeline:')\n", + "print('-' * 52)\n", + "print('Pipeline id:', best_pipeline.id)\n", + "print('Pipeline json:', best_pipeline.to_json())\n", + "print('Output:')\n", + "print(best_output)\n", + "print('Scores:')\n", + "print(best_scores)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}