From 24fe2569173973caacfaba9d97f4fde11f739be6 Mon Sep 17 00:00:00 2001
From: ChenXin <will131@foxmail.com>
Date: Thu, 27 Feb 2020 15:22:42 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=20tutorial=5F3=20?=
 =?UTF-8?q?=E7=9A=84=E6=8B=BC=E5=86=99=E9=94=99=E8=AF=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/source/tutorials/tutorial_3_embedding.rst     |   4 +-
 .../source/tutorials/tutorial_5_loss_optimizer.rst |   7 +-
 tutorials/tutorial_3_embedding.ipynb               | 524 +++++++++++++++++++++
 3 files changed, 529 insertions(+), 6 deletions(-)
 create mode 100644 tutorials/tutorial_3_embedding.ipynb

diff --git a/docs/source/tutorials/tutorial_3_embedding.rst b/docs/source/tutorials/tutorial_3_embedding.rst
index 95c0105e..b719041e 100644
--- a/docs/source/tutorials/tutorial_3_embedding.rst
+++ b/docs/source/tutorials/tutorial_3_embedding.rst
@@ -254,14 +254,14 @@ CNNCharEmbedding的使用例子如下：
 
 .. code-block:: python
 
-    from fastNLP.embeddings import LSTMCharEmbeddding
+    from fastNLP.embeddings import LSTMCharEmbedding
     from fastNLP import Vocabulary
 
     vocab = Vocabulary()
     vocab.add_word_lst("this is a demo .".split())
 
     # character的embedding维度大小为50，返回的embedding结果维度大小为64。
-    embed = LSTMCharEmbeddding(vocab, embed_size=64, char_emb_size=50)
+    embed = LSTMCharEmbedding(vocab, embed_size=64, char_emb_size=50)
     words = torch.LongTensor([[vocab.to_index(word) for word in "this is a demo .".split()]])
     print(embed(words).size())
 
diff --git a/docs/source/tutorials/tutorial_5_loss_optimizer.rst b/docs/source/tutorials/tutorial_5_loss_optimizer.rst
index 5874f66d..77d67e07 100644
--- a/docs/source/tutorials/tutorial_5_loss_optimizer.rst
+++ b/docs/source/tutorials/tutorial_5_loss_optimizer.rst
@@ -9,8 +9,8 @@
 
 数据读入
     我们可以使用 fastNLP  :mod:`fastNLP.io` 模块中的 :class:`~fastNLP.io.SST2Pipe` 类，轻松地读取以及预处理SST2数据集。:class:`~fastNLP.io.SST2Pipe` 对象的
-    :meth:`~fastNLP.io.SST2Pipe.process_from_file` 方法能够对读入的SST2数据集进行数据的预处理，方法的参数为paths, 指要处理的文件所在目录，如果paths为None，则会自动下载数      据集，函数默认paths值为None。
-    此函数返回一个 :class:`~fastNLP.io.DataBundle`，包含SST2数据集的训练集、测试集、验证集以及source端和target端的字典。其训练、测试、验证数据集含有四个     :mod:`~fastNLP.core.field` ：
+    :meth:`~fastNLP.io.SST2Pipe.process_from_file` 方法能够对读入的SST2数据集进行数据的预处理，方法的参数为paths, 指要处理的文件所在目录，如果paths为None，则会自动下载数据集，函数默认paths值为None。
+    此函数返回一个 :class:`~fastNLP.io.DataBundle`，包含SST2数据集的训练集、测试集、验证集以及source端和target端的字典。其训练、测试、验证数据集含有四个 :mod:`~fastNLP.core.field` ：
 
     * raw_words: 原source句子
     * target: 标签值
@@ -69,8 +69,7 @@
 数据集 :meth:`~fastNLP.DataSet.set_input` 和  :meth:`~fastNLP.DataSet.set_target` 函数
     :class:`~fastNLP.io.SST2Pipe`  类的 :meth:`~fastNLP.io.SST2Pipe.process_from_file` 方法在预处理过程中还将训练、测试、验证
     集的 `words` 、`seq_len` :mod:`~fastNLP.core.field` 设定为input，同时将 `target`  :mod:`~fastNLP.core.field` 设定
-    为target。我们可以通过 :class:`~fastNLP.core.Dataset` 类的 :meth:`~fastNLP.core.Dataset.print_field_meta` 方法查看各个
-     :mod:`~fastNLP.core.field` 的设定情况，代码如下：
+    为target。我们可以通过 :class:`~fastNLP.core.Dataset` 类的 :meth:`~fastNLP.core.Dataset.print_field_meta` 方法查看各个 :mod:`~fastNLP.core.field` 的设定情况，代码如下：
 
     .. code-block:: python
 
diff --git a/tutorials/tutorial_3_embedding.ipynb b/tutorials/tutorial_3_embedding.ipynb
new file mode 100644
index 00000000..154a0756
--- /dev/null
+++ b/tutorials/tutorial_3_embedding.ipynb
@@ -0,0 +1,524 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 5 out of 7 words in the pre-training embedding.\n",
+      "torch.Size([1, 5, 50])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from fastNLP.embeddings import StaticEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "embed = StaticEmbedding(vocab, model_dir_or_name='en-glove-6b-50d')\n",
+    "\n",
+    "words = torch.LongTensor([[vocab.to_index(word) for word in \"this is a demo .\".split()]])  # 将文本转为index\n",
+    "print(embed(words).size())  # StaticEmbedding的使用和pytorch的nn.Embedding是类似的"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([1, 5, 30])\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import StaticEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "embed = StaticEmbedding(vocab, model_dir_or_name=None, embedding_dim=30)\n",
+    "\n",
+    "words = torch.LongTensor([[vocab.to_index(word) for word in \"this is a demo .\".split()]])\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "22 out of 22 characters were found in pretrained elmo embedding.\n",
+      "torch.Size([1, 5, 256])\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import ElmoEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "embed = ElmoEmbedding(vocab, model_dir_or_name='en-small', requires_grad=False)\n",
+    "words = torch.LongTensor([[vocab.to_index(word) for word in \"this is a demo .\".split()]])\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "22 out of 22 characters were found in pretrained elmo embedding.\n",
+      "torch.Size([1, 5, 512])\n"
+     ]
+    }
+   ],
+   "source": [
+    "embed = ElmoEmbedding(vocab, model_dir_or_name='en-small', requires_grad=False, layers='1,2')\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "22 out of 22 characters were found in pretrained elmo embedding.\n",
+      "torch.Size([1, 5, 256])\n"
+     ]
+    }
+   ],
+   "source": [
+    "embed = ElmoEmbedding(vocab, model_dir_or_name='en-small', requires_grad=True, layers='mix')\n",
+    "print(embed(words).size())  # 三层输出按照权重element-wise的加起来"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 7 words out of 7.\n",
+      "torch.Size([1, 5, 768])\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import BertEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "embed = BertEmbedding(vocab, model_dir_or_name='en-base-cased')\n",
+    "words = torch.LongTensor([[vocab.to_index(word) for word in \"this is a demo .\".split()]])\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 7 words out of 7.\n",
+      "torch.Size([1, 5, 1536])\n"
+     ]
+    }
+   ],
+   "source": [
+    "#  使用后面两层的输出\n",
+    "embed = BertEmbedding(vocab, model_dir_or_name='en-base-cased', layers='10,11')\n",
+    "print(embed(words).size())  # 结果将是在最后一维做拼接"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 7 words out of 7.\n",
+      "torch.Size([1, 7, 768])\n"
+     ]
+    }
+   ],
+   "source": [
+    "embed = BertEmbedding(vocab, model_dir_or_name='en-base-cased', layers='-1', include_cls_sep=True)\n",
+    "print(embed(words).size())  # 结果将在序列维度上增加2\n",
+    "# 取出句子的cls表示\n",
+    "cls_reps = embed(words)[:, 0]  # shape: [batch_size, 768]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 7 words out of 7.\n",
+      "torch.Size([1, 5, 768])\n"
+     ]
+    }
+   ],
+   "source": [
+    "embed = BertEmbedding(vocab, model_dir_or_name='en-base-cased', layers='-1', pool_method='max')\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 10 words out of 10.\n",
+      "torch.Size([1, 9, 768])\n"
+     ]
+    }
+   ],
+   "source": [
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo . [SEP] another sentence .\".split())\n",
+    "\n",
+    "embed = BertEmbedding(vocab, model_dir_or_name='en-base-cased', layers='-1', pool_method='max')\n",
+    "words = torch.LongTensor([[vocab.to_index(word) for word in \"this is a demo . [SEP] another sentence .\".split()]])\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Start constructing character vocabulary.\n",
+      "In total, there are 8 distinct characters.\n",
+      "torch.Size([1, 5, 64])\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import CNNCharEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "# character的embedding维度大小为50，返回的embedding结果维度大小为64。\n",
+    "embed = CNNCharEmbedding(vocab, embed_size=64, char_emb_size=50)\n",
+    "words = torch.LongTensor([[vocab.to_index(word) for word in \"this is a demo .\".split()]])\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Start constructing character vocabulary.\n",
+      "In total, there are 8 distinct characters.\n",
+      "torch.Size([1, 5, 64])\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import LSTMCharEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "# character的embedding维度大小为50，返回的embedding结果维度大小为64。\n",
+    "embed = LSTMCharEmbedding(vocab, embed_size=64, char_emb_size=50)\n",
+    "words = torch.LongTensor([[vocab.to_index(word) for word in \"this is a demo .\".split()]])\n",
+    "print(embed(words).size())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 5 out of 7 words in the pre-training embedding.\n",
+      "50\n",
+      "Start constructing character vocabulary.\n",
+      "In total, there are 8 distinct characters.\n",
+      "30\n",
+      "22 out of 22 characters were found in pretrained elmo embedding.\n",
+      "256\n",
+      "22 out of 22 characters were found in pretrained elmo embedding.\n",
+      "512\n",
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 7 words out of 7.\n",
+      "768\n",
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 7 words out of 7.\n",
+      "1536\n",
+      "80\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import *\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "static_embed = StaticEmbedding(vocab, model_dir_or_name='en-glove-6b-50d')\n",
+    "print(static_embed.embedding_dim)  # 50\n",
+    "char_embed = CNNCharEmbedding(vocab, embed_size=30)\n",
+    "print(char_embed.embedding_dim)    # 30\n",
+    "elmo_embed_1 = ElmoEmbedding(vocab, model_dir_or_name='en-small', layers='2')\n",
+    "print(elmo_embed_1.embedding_dim)  # 256\n",
+    "elmo_embed_2 = ElmoEmbedding(vocab, model_dir_or_name='en-small', layers='1,2')\n",
+    "print(elmo_embed_2.embedding_dim)  # 512\n",
+    "bert_embed_1 = BertEmbedding(vocab, layers='-1', model_dir_or_name='en-base-cased')\n",
+    "print(bert_embed_1.embedding_dim)  # 768\n",
+    "bert_embed_2 = BertEmbedding(vocab, layers='2,-1', model_dir_or_name='en-base-cased')\n",
+    "print(bert_embed_2.embedding_dim)  # 1536\n",
+    "stack_embed = StackEmbedding([static_embed, char_embed])\n",
+    "print(stack_embed.embedding_dim)  # 80"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loading vocabulary file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/vocab.txt\n",
+      "Load pre-trained BERT parameters from file /remote-home/ynzheng/.fastNLP/embedding/bert-base-cased/pytorch_model.bin.\n",
+      "Start to generate word pieces for word.\n",
+      "Found(Or segment into word pieces) 7 words out of 7.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import *\n",
+    "\n",
+    "vocab = Vocabulary()\n",
+    "vocab.add_word_lst(\"this is a demo .\".split())\n",
+    "\n",
+    "embed = BertEmbedding(vocab, model_dir_or_name='en-base-cased', requires_grad=True)  # 初始化时设定为需要更新\n",
+    "embed.requires_grad = False  # 修改BertEmbedding的权重为不更新"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[ 0.3633, -0.2091, -0.0353, -0.3771, -0.5193]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n",
+      "tensor([[ 0.0926, -0.4812, -0.7744,  0.4836, -0.5475]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import StaticEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary().add_word_lst(\"The the a A\".split())\n",
+    "#  下面用随机的StaticEmbedding演示，但与使用预训练词向量时效果是一致的\n",
+    "embed = StaticEmbedding(vocab, model_name_or_dir=None, embedding_dim=5)\n",
+    "print(embed(torch.LongTensor([vocab.to_index('The')])))\n",
+    "print(embed(torch.LongTensor([vocab.to_index('the')])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "All word in the vocab have been lowered. There are 6 words, 4 unique lowered words.\n",
+      "tensor([[ 0.4530, -0.1558, -0.1941,  0.3203,  0.0355]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n",
+      "tensor([[ 0.4530, -0.1558, -0.1941,  0.3203,  0.0355]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import StaticEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary().add_word_lst(\"The the a A\".split())\n",
+    "#  下面用随机的StaticEmbedding演示，但与使用预训练时效果是一致的\n",
+    "embed = StaticEmbedding(vocab, model_name_or_dir=None, embedding_dim=5, lower=True)\n",
+    "print(embed(torch.LongTensor([vocab.to_index('The')])))\n",
+    "print(embed(torch.LongTensor([vocab.to_index('the')])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 out of 4 words have frequency less than 2.\n",
+      "tensor([[ 0.4724, -0.7277, -0.6350, -0.5258, -0.6063]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n",
+      "tensor([[ 0.7638, -0.0552,  0.1625, -0.2210,  0.4993]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n",
+      "tensor([[ 0.7638, -0.0552,  0.1625, -0.2210,  0.4993]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import StaticEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary().add_word_lst(\"the the the a\".split())\n",
+    "#  下面用随机的StaticEmbedding演示，但与使用预训练时效果是一致的\n",
+    "embed = StaticEmbedding(vocab, model_name_or_dir=None, embedding_dim=5, min_freq=2)\n",
+    "print(embed(torch.LongTensor([vocab.to_index('the')])))\n",
+    "print(embed(torch.LongTensor([vocab.to_index('a')])))\n",
+    "print(embed(torch.LongTensor([vocab.unknown_idx])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 out of 5 words have frequency less than 2.\n",
+      "All word in the vocab have been lowered. There are 5 words, 4 unique lowered words.\n",
+      "tensor([[ 0.1943,  0.3739,  0.2769, -0.4746, -0.3181]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n",
+      "tensor([[ 0.5892, -0.6916,  0.7319, -0.3803,  0.4979]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n",
+      "tensor([[ 0.5892, -0.6916,  0.7319, -0.3803,  0.4979]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n",
+      "tensor([[-0.1348, -0.2172, -0.0071,  0.5704, -0.2607]],\n",
+      "       grad_fn=<EmbeddingBackward>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from fastNLP.embeddings import StaticEmbedding\n",
+    "from fastNLP import Vocabulary\n",
+    "\n",
+    "vocab = Vocabulary().add_word_lst(\"the the the a A\".split())\n",
+    "#  下面用随机的StaticEmbedding演示，但与使用预训练时效果是一致的\n",
+    "embed = StaticEmbedding(vocab, model_name_or_dir=None, embedding_dim=5, min_freq=2, lower=True)\n",
+    "print(embed(torch.LongTensor([vocab.to_index('the')])))\n",
+    "print(embed(torch.LongTensor([vocab.to_index('a')])))\n",
+    "print(embed(torch.LongTensor([vocab.to_index('A')])))\n",
+    "print(embed(torch.LongTensor([vocab.unknown_idx])))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python Now",
+   "language": "python",
+   "name": "now"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}