From eb74379fd99f5aa59e0011ff7c2eafece1922c63 Mon Sep 17 00:00:00 2001
From: ChenXin <will131@foxmail.com>
Date: Fri, 28 Feb 2020 01:00:58 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=20tutorial=5F8?=
 =?UTF-8?q?=EF=BC=8C=E5=B9=B6=E6=B7=BB=E5=8A=A0=E4=BA=86=20ipynb?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../source/tutorials/tutorial_8_modules_models.rst |   69 +-
 tutorials/tutorial_8_modules_models.ipynb          | 1014 ++++++++++++++++++++
 2 files changed, 1037 insertions(+), 46 deletions(-)
 create mode 100644 tutorials/tutorial_8_modules_models.ipynb

diff --git a/docs/source/tutorials/tutorial_8_modules_models.rst b/docs/source/tutorials/tutorial_8_modules_models.rst
index 0b26e0bd..267841fb 100644
--- a/docs/source/tutorials/tutorial_8_modules_models.rst
+++ b/docs/source/tutorials/tutorial_8_modules_models.rst
@@ -11,39 +11,16 @@
 
 fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models.CNNText` 、
 :class:`~fastNLP.models.SeqLabeling` 等完整的模型，以供用户直接使用。
-以 :class:`~fastNLP.models.CNNText` 为例，我们看一个简单的文本分类的任务的实现过程。
-
-首先是数据读入和处理部分，这里的代码和 :doc:`快速入门 </user/quickstart>` 中一致。
-
-.. code-block:: python
-
-    from fastNLP.io import CSVLoader
-    from fastNLP import Vocabulary, CrossEntropyLoss, AccuracyMetric
-
-    loader = CSVLoader(headers=('raw_sentence', 'label'), sep='\t')
-    dataset = loader.load("./sample_data/tutorial_sample_dataset.csv")
-
-    dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='sentence')
-    dataset.apply_field(lambda x: x.split(), field_name='sentence', new_field_name='words', is_input=True)
-    dataset.apply(lambda x: int(x['label']), new_field_name='target', is_target=True)
-
-    train_dev_data, test_data = dataset.split(0.1)
-    train_data, dev_data = train_dev_data.split(0.1)
-
-    vocab = Vocabulary(min_freq=2).from_dataset(train_data, field_name='words')
-    vocab.index_dataset(train_data, dev_data, test_data, field_name='words', new_field_name='words')
-
-然后我们从 :mod:`~fastNLP.models` 中导入 ``CNNText`` 模型，用它进行训练
+以文本分类的任务为例，我们从 models 中导入 :class:`~fastNLP.models.CNNText` 模型，用它进行训练。
 
 .. code-block:: python
 
-    from fastNLP.models import CNNText
-    from fastNLP import Trainer
+   from fastNLP.models import CNNText
 
-    model_cnn = CNNText((len(vocab),50), num_classes=5, padding=2, dropout=0.1)
+    model_cnn = CNNText((len(vocab),100), num_classes=2, dropout=0.1)
 
-    trainer = Trainer(model=model_cnn, train_data=train_data, dev_data=dev_data,
-                      loss=CrossEntropyLoss(), metrics=AccuracyMetric())
+    trainer = Trainer(train_data=train_data, dev_data=dev_data, metrics=metric,
+                      loss=loss, device=device, model=model_cnn)
     trainer.train()
 
 在 iPython 环境输入 `model_cnn` ，我们可以看到 ``model_cnn`` 的网络结构
@@ -52,18 +29,18 @@ fastNLP 在 :mod:`~fastNLP.models` 模块中内置了如 :class:`~fastNLP.models
 
     CNNText(
       (embed): Embedding(
-        169, 50
-        (dropout): Dropout(p=0.0)
+        (embed): Embedding(16292, 100)
+        (dropout): Dropout(p=0.0, inplace=False)
       )
       (conv_pool): ConvMaxpool(
         (convs): ModuleList(
-          (0): Conv1d(50, 3, kernel_size=(3,), stride=(1,), padding=(2,))
-          (1): Conv1d(50, 4, kernel_size=(4,), stride=(1,), padding=(2,))
-          (2): Conv1d(50, 5, kernel_size=(5,), stride=(1,), padding=(2,))
+          (0): Conv1d(100, 30, kernel_size=(1,), stride=(1,), bias=False)
+          (1): Conv1d(100, 40, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
+          (2): Conv1d(100, 50, kernel_size=(5,), stride=(1,), padding=(2,), bias=False)
         )
       )
-      (dropout): Dropout(p=0.1)
-      (fc): Linear(in_features=12, out_features=5, bias=True)
+      (dropout): Dropout(p=0.1, inplace=False)
+      (fc): Linear(in_features=120, out_features=2, bias=True)
     )
 
 FastNLP 中内置的 models 如下表所示，您可以点击具体的名称查看详细的 API：
@@ -131,10 +108,10 @@ FastNLP 完全支持使用 pyTorch 编写的模型，但与 pyTorch 中编写模
 .. parsed-literal::
 
     LSTMText(
-      (embedding): Embedding(169, 50)
-      (lstm): LSTM(50, 64, num_layers=2, dropout=0.5, bidirectional=True)
-      (fc): Linear(in_features=128, out_features=5, bias=True)
-      (dropout): Dropout(p=0.5)
+      (embedding): Embedding(16292, 100)
+      (lstm): LSTM(100, 64, num_layers=2, dropout=0.5, bidirectional=True)
+      (fc): Linear(in_features=128, out_features=2, bias=True)
+      (dropout): Dropout(p=0.5, inplace=False)
     )
 
 
@@ -148,7 +125,7 @@ FastNLP 完全支持使用 pyTorch 编写的模型，但与 pyTorch 中编写模
 
     from fastNLP.modules import Embedding, LSTM, MLP
 
-    class Model(nn.Module):
+    class MyText(nn.Module):
         def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):
             super().__init__()
 
@@ -166,18 +143,18 @@ FastNLP 完全支持使用 pyTorch 编写的模型，但与 pyTorch 中编写模
 
 .. parsed-literal::
 
-    Model(
+    MyText(
       (embedding): Embedding(
-        169, 50
-        (dropout): Dropout(p=0.0)
+        (embed): Embedding(16292, 100)
+        (dropout): Dropout(p=0.0, inplace=False)
       )
       (lstm): LSTM(
-        (lstm): LSTM(50, 64, num_layers=2, batch_first=True, bidirectional=True)
+        (lstm): LSTM(100, 64, num_layers=2, batch_first=True, bidirectional=True)
       )
       (mlp): MLP(
         (hiddens): ModuleList()
-        (output): Linear(in_features=128, out_features=5, bias=True)
-        (dropout): Dropout(p=0.5)
+        (output): Linear(in_features=128, out_features=2, bias=True)
+        (dropout): Dropout(p=0.5, inplace=False)
       )
     )
 
diff --git a/tutorials/tutorial_8_modules_models.ipynb b/tutorials/tutorial_8_modules_models.ipynb
new file mode 100644
index 00000000..2784cca1
--- /dev/null
+++ b/tutorials/tutorial_8_modules_models.ipynb
@@ -0,0 +1,1014 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 使用Modules和Models快速搭建自定义模型\n",
+    "\n",
+    "modules 和 models 用于构建 fastNLP 所需的神经网络模型，它可以和 torch.nn 中的模型一起使用。 下面我们会分三节介绍编写构建模型的具体方法。\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "我们首先准备好和上篇教程一样的基础实验代码"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fastNLP.io import SST2Pipe\n",
+    "from fastNLP import Trainer, CrossEntropyLoss, AccuracyMetric\n",
+    "import torch\n",
+    "\n",
+    "databundle = SST2Pipe().process_from_file()\n",
+    "vocab = databundle.get_vocab('words')\n",
+    "train_data = databundle.get_dataset('train')[:5000]\n",
+    "train_data, test_data = train_data.split(0.015)\n",
+    "dev_data = databundle.get_dataset('dev')\n",
+    "\n",
+    "loss = CrossEntropyLoss()\n",
+    "metric = AccuracyMetric()\n",
+    "device = 0 if torch.cuda.is_available() else 'cpu'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 使用 models 中的模型\n",
+    "\n",
+    "fastNLP 在 models 模块中内置了如 CNNText 、 SeqLabeling 等完整的模型，以供用户直接使用。 以文本分类的任务为例，我们从 models 中导入 CNNText 模型，用它进行训练。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "input fields after batch(if batch size is 2):\n",
+      "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 41]) \n",
+      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
+      "target fields after batch(if batch size is 2):\n",
+      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
+      "\n",
+      "training epochs started 2020-02-28-00-56-04\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.22 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 1/10. Step:154/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.760321\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.29 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 2/10. Step:308/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.727064\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.48 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 3/10. Step:462/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.758028\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.24 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 4/10. Step:616/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.759174\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.47 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 5/10. Step:770/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.743119\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.22 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 6/10. Step:924/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.756881\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.21 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 7/10. Step:1078/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.752294\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.21 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 8/10. Step:1232/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.756881\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.15 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 9/10. Step:1386/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.75344\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.12 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 10/10. Step:1540/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.752294\n",
+      "\n",
+      "\r\n",
+      "In Epoch:1/Step:154, got best dev performance:\n",
+      "AccuracyMetric: acc=0.760321\n",
+      "Reloaded the best model.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'best_eval': {'AccuracyMetric': {'acc': 0.760321}},\n",
+       " 'best_epoch': 1,\n",
+       " 'best_step': 154,\n",
+       " 'seconds': 29.3}"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from fastNLP.models import CNNText\n",
+    "\n",
+    "model_cnn = CNNText((len(vocab),100), num_classes=2, dropout=0.1)\n",
+    "\n",
+    "trainer = Trainer(train_data=train_data, dev_data=dev_data, metrics=metric,\n",
+    "                  loss=loss, device=device, model=model_cnn)\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "在 iPython 环境输入 model_cnn ，我们可以看到 model_cnn 的网络结构"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "CNNText(\n",
+       "  (embed): Embedding(\n",
+       "    (embed): Embedding(16292, 100)\n",
+       "    (dropout): Dropout(p=0.0, inplace=False)\n",
+       "  )\n",
+       "  (conv_pool): ConvMaxpool(\n",
+       "    (convs): ModuleList(\n",
+       "      (0): Conv1d(100, 30, kernel_size=(1,), stride=(1,), bias=False)\n",
+       "      (1): Conv1d(100, 40, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)\n",
+       "      (2): Conv1d(100, 50, kernel_size=(5,), stride=(1,), padding=(2,), bias=False)\n",
+       "    )\n",
+       "  )\n",
+       "  (dropout): Dropout(p=0.1, inplace=False)\n",
+       "  (fc): Linear(in_features=120, out_features=2, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_cnn"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 使用 nn.torch 编写模型\n",
+    "\n",
+    "FastNLP 完全支持使用 pyTorch 编写的模型，但与 pyTorch 中编写模型的常见方法不同， 用于 fastNLP 的模型中 forward 函数需要返回一个字典，字典中至少需要包含 pred 这个字段。\n",
+    "\n",
+    "下面是使用 pyTorch 中的 torch.nn 模块编写的文本分类，注意观察代码中标注的向量维度。 由于 pyTorch 使用了约定俗成的维度设置，使得 forward 中需要多次处理维度顺序"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "\n",
+    "class LSTMText(nn.Module):\n",
+    "    def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.embedding = nn.Embedding(vocab_size, embedding_dim)\n",
+    "        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True, dropout=dropout)\n",
+    "        self.fc = nn.Linear(hidden_dim * 2, output_dim)\n",
+    "        self.dropout = nn.Dropout(dropout)\n",
+    "\n",
+    "    def forward(self, words):\n",
+    "        # (input) words : (batch_size, seq_len)\n",
+    "        words = words.permute(1,0)\n",
+    "        # words : (seq_len, batch_size)\n",
+    "\n",
+    "        embedded = self.dropout(self.embedding(words))\n",
+    "        # embedded : (seq_len, batch_size, embedding_dim)\n",
+    "        output, (hidden, cell) = self.lstm(embedded)\n",
+    "        # output: (seq_len, batch_size, hidden_dim * 2)\n",
+    "        # hidden: (num_layers * 2, batch_size, hidden_dim)\n",
+    "        # cell: (num_layers * 2, batch_size, hidden_dim)\n",
+    "\n",
+    "        hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)\n",
+    "        hidden = self.dropout(hidden)\n",
+    "        # hidden: (batch_size, hidden_dim * 2)\n",
+    "\n",
+    "        pred = self.fc(hidden.squeeze(0))\n",
+    "        # result: (batch_size, output_dim)\n",
+    "        return {\"pred\":pred}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "我们同样可以在 iPython 环境中查看这个模型的网络结构"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LSTMText(\n",
+       "  (embedding): Embedding(16292, 100)\n",
+       "  (lstm): LSTM(100, 64, num_layers=2, dropout=0.5, bidirectional=True)\n",
+       "  (fc): Linear(in_features=128, out_features=2, bias=True)\n",
+       "  (dropout): Dropout(p=0.5, inplace=False)\n",
+       ")"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_lstm = LSTMText(len(vocab), 100, 2)\n",
+    "model_lstm                      "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "input fields after batch(if batch size is 2):\n",
+      "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 41]) \n",
+      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
+      "target fields after batch(if batch size is 2):\n",
+      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
+      "\n",
+      "training epochs started 2020-02-28-00-56-34\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.36 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 1/10. Step:154/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.59289\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.35 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 2/10. Step:308/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.674312\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.21 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 3/10. Step:462/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.724771\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.4 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 4/10. Step:616/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.748853\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.24 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 5/10. Step:770/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.756881\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.29 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 6/10. Step:924/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.741972\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.32 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 7/10. Step:1078/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.754587\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.24 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 8/10. Step:1232/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.756881\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.28 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 9/10. Step:1386/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.740826\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.23 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 10/10. Step:1540/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.751147\n",
+      "\n",
+      "\r\n",
+      "In Epoch:5/Step:770, got best dev performance:\n",
+      "AccuracyMetric: acc=0.756881\n",
+      "Reloaded the best model.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'best_eval': {'AccuracyMetric': {'acc': 0.756881}},\n",
+       " 'best_epoch': 5,\n",
+       " 'best_step': 770,\n",
+       " 'seconds': 45.69}"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer = Trainer(train_data=train_data, dev_data=dev_data, metrics=metric,\n",
+    "                  loss=loss, device=device, model=model_lstm)\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 使用 modules 编写模型\n",
+    "\n",
+    "下面我们使用 fastNLP.modules 中的组件来构建同样的网络。由于 fastNLP 统一把 batch_size 放在第一维， 在编写代码的过程中会有一定的便利。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "MyText(\n",
+       "  (embedding): Embedding(\n",
+       "    (embed): Embedding(16292, 100)\n",
+       "    (dropout): Dropout(p=0.0, inplace=False)\n",
+       "  )\n",
+       "  (lstm): LSTM(\n",
+       "    (lstm): LSTM(100, 64, num_layers=2, batch_first=True, bidirectional=True)\n",
+       "  )\n",
+       "  (mlp): MLP(\n",
+       "    (hiddens): ModuleList()\n",
+       "    (output): Linear(in_features=128, out_features=2, bias=True)\n",
+       "    (dropout): Dropout(p=0.5, inplace=False)\n",
+       "  )\n",
+       ")"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from fastNLP.modules import LSTM, MLP\n",
+    "from fastNLP.embeddings import Embedding\n",
+    "\n",
+    "\n",
+    "class MyText(nn.Module):\n",
+    "    def __init__(self, vocab_size, embedding_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.5):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.embedding = Embedding((vocab_size, embedding_dim))\n",
+    "        self.lstm = LSTM(embedding_dim, hidden_dim, num_layers=num_layers, bidirectional=True)\n",
+    "        self.mlp = MLP([hidden_dim*2,output_dim], dropout=dropout)\n",
+    "\n",
+    "    def forward(self, words):\n",
+    "        embedded = self.embedding(words)\n",
+    "        _,(hidden,_) = self.lstm(embedded)\n",
+    "        pred = self.mlp(torch.cat((hidden[-1],hidden[-2]),dim=1))\n",
+    "        return {\"pred\":pred}\n",
+    "    \n",
+    "model_text = MyText(len(vocab), 100, 2)\n",
+    "model_text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "input fields after batch(if batch size is 2):\n",
+      "\twords: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2, 41]) \n",
+      "\tseq_len: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
+      "target fields after batch(if batch size is 2):\n",
+      "\ttarget: (1)type:torch.Tensor (2)dtype:torch.int64, (3)shape:torch.Size([2]) \n",
+      "\n",
+      "training epochs started 2020-02-28-00-57-19\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "16a35f2b0ef0457dae15c5f240a19a3a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=1540.0), HTML(value='')), layout=Layout(d…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.38 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 1/10. Step:154/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.767202\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=28.0), HTML(value='')), layout=Layout(dis…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Evaluate data in 0.22 seconds!\n",
+      "\r",
+      "Evaluation on dev at Epoch 2/10. Step:308/1540: \n",
+      "\r",
+      "AccuracyMetric: acc=0.743119\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "trainer = Trainer(train_data=train_data, dev_data=dev_data, metrics=metric,\n",
+    "                  loss=loss, device=device, model=model_lstm)\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python Now",
+   "language": "python",
+   "name": "now"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}