From 43300ad90bc3cf4eb4ac0eb06216e951c0285806 Mon Sep 17 00:00:00 2001
From: yh_cc <poemsmileyh@gmail.com>
Date: Sun, 7 Jun 2020 20:35:09 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=83=A8=E5=88=86=E6=B3=A8?=
 =?UTF-8?q?=E9=87=8A,=20=E5=87=86=E5=A4=87=E5=BC=80=E5=8F=910.5.6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fastNLP/__init__.py             |  2 +-
 fastNLP/models/bert.py          |  7 +++++++
 fastNLP/modules/encoder/bert.py | 14 +++++++++++++-
 setup.py                        |  2 +-
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/fastNLP/__init__.py b/fastNLP/__init__.py
index 4436e565..46d83a9d 100644
--- a/fastNLP/__init__.py
+++ b/fastNLP/__init__.py
@@ -81,7 +81,7 @@ __all__ = [
     'logger',
     "init_logger_dist",
 ]
-__version__ = '0.5.5'
+__version__ = '0.5.6'
 
 import sys
 
diff --git a/fastNLP/models/bert.py b/fastNLP/models/bert.py
index 976b4638..88dd021c 100644
--- a/fastNLP/models/bert.py
+++ b/fastNLP/models/bert.py
@@ -70,6 +70,7 @@ class BertForSequenceClassification(BaseModel):
 
     def forward(self, words):
         r"""
+        输入为 [[w1, w2, w3, ...], [...]], BERTEmbedding会在开头和结尾额外加入[CLS]与[SEP]
         :param torch.LongTensor words: [batch_size, seq_len]
         :return: { :attr:`fastNLP.Const.OUTPUT` : logits}: torch.Tensor [batch_size, num_labels]
         """
@@ -115,6 +116,8 @@ class BertForSentenceMatching(BaseModel):
 
     def forward(self, words):
         r"""
+        输入words的格式为 [sent1] + [SEP] + [sent2]（BertEmbedding会在开头加入[CLS]和在结尾加入[SEP]），输出为batch_size x num_labels
+
         :param torch.LongTensor words: [batch_size, seq_len]
         :return: { :attr:`fastNLP.Const.OUTPUT` : logits}: torch.Tensor [batch_size, num_labels]
         """
@@ -247,6 +250,10 @@ class BertForQuestionAnswering(BaseModel):
 
     def forward(self, words):
         r"""
+        输入words为question + [SEP] + [paragraph]，BERTEmbedding在之后会额外加入开头的[CLS]和结尾的[SEP]. note:
+            如果BERTEmbedding中include_cls_sep=True，则输出的start和end index相对输入words会增加一位；如果为BERTEmbedding中
+            include_cls_sep=False, 则输出start和end index的位置与输入words的顺序完全一致
+
         :param torch.LongTensor words: [batch_size, seq_len]
         :return: 一个包含num_labels个logit的dict，每一个logit的形状都是[batch_size, seq_len + 2]
         """
diff --git a/fastNLP/modules/encoder/bert.py b/fastNLP/modules/encoder/bert.py
index 4e8d644f..62c18d48 100644
--- a/fastNLP/modules/encoder/bert.py
+++ b/fastNLP/modules/encoder/bert.py
@@ -473,6 +473,17 @@ class BertModel(nn.Module):
             module.bias.data.zero_()
 
     def forward(self, input_ids, token_type_ids=None, attention_mask=None, output_all_encoded_layers=True):
+        """
+
+        :param torch.LongTensor input_ids: bsz x max_len的输入id
+        :param torch.LongTensor token_type_ids: bsz x max_len，如果不输入认为全为0，一般第一个sep(含)及以前为0, 一个sep之后为1
+        :param attention_mask: 需要attend的为1，不需要为0
+        :param bool output_all_encoded_layers: 是否输出所有层，默认输出token embedding(包含bpe, position以及type embedding)
+            及每一层的hidden states。如果为False，只输出最后一层的结果
+        :return: encode_layers: 如果output_all_encoded_layers为True，返回list(共num_layers+1个元素)，每个元素为
+            bsz x max_len x hidden_size否则返回bsz x max_len x hidden_size的tensor;
+            pooled_output: bsz x hidden_size为cls的表示，可以用于句子的分类
+        """
         if attention_mask is None:
             attention_mask = torch.ones_like(input_ids)
         if token_type_ids is None:
@@ -504,7 +515,8 @@ class BertModel(nn.Module):
             pooled_output = sequence_output[:, 0]
         if not output_all_encoded_layers:
             encoded_layers = encoded_layers[-1]
-        encoded_layers.insert(0, embedding_output)
+        else:
+            encoded_layers.insert(0, embedding_output)
         return encoded_layers, pooled_output
 
     @classmethod
diff --git a/setup.py b/setup.py
index f10ec93d..150e8e22 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ print(pkgs)
 
 setup(
     name='FastNLP',
-    version='0.5.0',
+    version='0.5.6',
     url='https://github.com/fastnlp/fastNLP',
     description='fastNLP: Deep Learning Toolkit for NLP, developed by Fudan FastNLP Team',
     long_description=readme,