From 7398f2b0b20b7725295ca01feaaad437d3d331d2 Mon Sep 17 00:00:00 2001
From: yh_cc <poemsmileyh@gmail.com>
Date: Sat, 11 Apr 2020 22:55:54 +0800
Subject: [PATCH] =?UTF-8?q?1.=E5=A2=9E=E5=8A=A0RobertaEmbedding=E4=B8=8EGP?=
 =?UTF-8?q?T2Embedding?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 fastNLP/core/batch.py                              |   29 +-
 fastNLP/core/dist_trainer.py                       |    7 +-
 fastNLP/core/tester.py                             |    4 +-
 fastNLP/core/trainer.py                            |    4 +-
 fastNLP/embeddings/__init__.py                     |   11 +-
 fastNLP/embeddings/bert_embedding.py               |  244 ++-
 fastNLP/embeddings/gpt2_embedding.py               |  649 ++++++++
 fastNLP/embeddings/roberta_embedding.py            |  316 +++-
 fastNLP/io/file_utils.py                           |   67 +-
 fastNLP/modules/__init__.py                        |   11 +-
 fastNLP/modules/decoder/seq2seq_decoder.py         |  109 ++
 fastNLP/modules/encoder/__init__.py                |    4 +
 fastNLP/modules/encoder/bert.py                    |  483 +-----
 fastNLP/modules/encoder/gpt2.py                    | 1664 ++++++++++++--------
 fastNLP/modules/encoder/roberta.py                 |  221 +--
 fastNLP/modules/generator/__init__.py              |    0
 fastNLP/modules/generator/seq2seq_generator.py     |  444 ++++++
 fastNLP/modules/tokenizer/__init__.py              |   14 +
 fastNLP/modules/tokenizer/bert_tokenizer.py        |  447 ++++++
 fastNLP/modules/tokenizer/gpt2_tokenizer.py        |  758 +++++++++
 fastNLP/modules/tokenizer/roberta_tokenizer.py     |  102 ++
 fastNLP/modules/utils.py                           |   12 +-
 .../Summarization/Baseline/train_origin.py         |    6 +-
 reproduction/multi-criteria-cws/main.py            |   14 +-
 test/core/test_batch.py                            |   11 +-
 test/core/test_dataset.py                          |    1 -
 .../embedding/small_gpt2/config.json               |    1 +
 .../data_for_tests/embedding/small_gpt2/merges.txt |   39 +
 .../embedding/small_gpt2/small_pytorch_model.bin   |  Bin 0 -> 40749 bytes
 .../data_for_tests/embedding/small_gpt2/vocab.json |    1 +
 .../embedding/small_roberta/config.json            |    1 +
 .../embedding/small_roberta/merges.txt             |   39 +
 .../small_roberta/small_pytorch_model.bin          |  Bin 0 -> 29928 bytes
 .../embedding/small_roberta/vocab.json             |    1 +
 test/embeddings/test_bert_embedding.py             |   75 +-
 test/embeddings/test_gpt2_embedding.py             |  268 ++++
 test/embeddings/test_roberta_embedding.py          |  252 +++
 test/modules/encoder/test_bert.py                  |   24 +
 38 files changed, 4793 insertions(+), 1540 deletions(-)
 create mode 100644 fastNLP/embeddings/gpt2_embedding.py
 create mode 100755 fastNLP/modules/decoder/seq2seq_decoder.py
 create mode 100644 fastNLP/modules/generator/__init__.py
 create mode 100755 fastNLP/modules/generator/seq2seq_generator.py
 create mode 100644 fastNLP/modules/tokenizer/__init__.py
 create mode 100644 fastNLP/modules/tokenizer/bert_tokenizer.py
 create mode 100644 fastNLP/modules/tokenizer/gpt2_tokenizer.py
 create mode 100644 fastNLP/modules/tokenizer/roberta_tokenizer.py
 create mode 100644 test/data_for_tests/embedding/small_gpt2/config.json
 create mode 100644 test/data_for_tests/embedding/small_gpt2/merges.txt
 create mode 100644 test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin
 create mode 100644 test/data_for_tests/embedding/small_gpt2/vocab.json
 create mode 100644 test/data_for_tests/embedding/small_roberta/config.json
 create mode 100644 test/data_for_tests/embedding/small_roberta/merges.txt
 create mode 100644 test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin
 create mode 100644 test/data_for_tests/embedding/small_roberta/vocab.json
 create mode 100644 test/embeddings/test_gpt2_embedding.py
 create mode 100644 test/embeddings/test_roberta_embedding.py
 create mode 100644 test/modules/encoder/test_bert.py

diff --git a/fastNLP/core/batch.py b/fastNLP/core/batch.py
index 7c1e64ee..94942f09 100644
--- a/fastNLP/core/batch.py
+++ b/fastNLP/core/batch.py
@@ -217,7 +217,8 @@ class BatchIter:
 
 class DataSetIter(BatchIter):
     r"""
-    DataSetIter 用于从 `DataSet` 中按一定的顺序, 依次按 ``batch_size`` 的大小将数据取出，
+    DataSetIter 用于从 `DataSet` 中按一定的顺序, 依次按 ``batch_size`` 的大小将数据取出，通过使用DataSetIter，可以不需要考虑
+        输入的padding(由DataSet中每列的Padder决定了)以及不需要考虑将数据转为tensor。
     组成 `x` 和 `y`::
 
         batch = DataSetIter(data_set, batch_size=16, sampler=SequentialSampler())
@@ -226,10 +227,8 @@ class DataSetIter(BatchIter):
             # do stuff ...
 
     """
-    def __init__(self, dataset, batch_size=1, sampler=None, as_numpy=False,
-                 num_workers=0, pin_memory=False, drop_last=False,
-                 timeout=0, worker_init_fn=None, collate_fn=None,
-                 batch_sampler=None):
+    def __init__(self, dataset, batch_size=1, sampler=None, as_numpy=False, num_workers=0, pin_memory=False,
+                 drop_last=False, timeout=0, worker_init_fn=None, batch_sampler=None):
         r"""
         
         :param dataset: :class:`~fastNLP.DataSet` 对象, 数据集
@@ -245,13 +244,12 @@ class DataSetIter(BatchIter):
         :param bool drop_last: 如果最后一个batch没有batch_size这么多sample，就扔掉最后一个
         :param timeout: 生成一个batch的timeout值
         :param worker_init_fn: 在每个worker启动时调用该函数，会传入一个值，该值是worker的index。
-        :param collate_fn: 用于将样本组合成batch的函数
         :param batch_sampler: 当每次batch取出的数据数量不一致时，可以使用该sampler。batch_sampler每次iter应该输出一个list的index。
             当batch_sampler不为None时，参数batch_size, sampler, drop_last会被忽略。
         """
         assert isinstance(dataset, DataSet)
         dataset = DataSetGetter(dataset, as_numpy)
-        collate_fn = dataset.collate_fn if collate_fn is None else collate_fn
+        collate_fn = dataset.collate_fn
         if batch_sampler is not None:
             batch_size = 1
             sampler = None
@@ -272,8 +270,9 @@ class DataSetIter(BatchIter):
 
 class TorchLoaderIter(BatchIter):
     r"""
-    与DataSetIter类似，但可以用于非fastNLP的数据容器对象，然后将其传入到Trainer中。
-    只需要保证数据容器实现了实现了以下的方法
+    与DataSetIter类似，但可以用于非fastNLP的数据容器对象，以及可以实现完全自定义的生成batch的方式，然后与Trainer，Tester可以实现
+        与DataSetIter一样的对接。
+    需要保证传入的数据容器实现了实现了以下的方法
 
     Example::
 
@@ -293,7 +292,7 @@ class TorchLoaderIter(BatchIter):
                 return self.num_samples
 
         # 需要实现collact_fn将数据转换为tensor
-        def collact_fn(data_list):
+        def collate_fn(data_list):
             # [(x1,y1), (x2,y2), ...], 这里的输入实际上是将UdfDataSet的__getitem__输入结合为list
             xs, ys = [], []
             for l in data_list:
@@ -302,10 +301,10 @@ class TorchLoaderIter(BatchIter):
                 ys.append(y)
             # 不需要转移到gpu，Trainer或Tester会将其转移到model所在的device
             x,y = torch.FloatTensor(xs), torch.FloatTensor(ys)
-            return {'x':x, 'y':y}, {'y':y}
+            return {'x':x, 'y':y}, {'y':y}  # 第一个dict中内容类似于DataSet中的input列，第二个dict的内容类似于target列
 
         udf_dataset = UdfDataSet(10)
-        dataset = TorchLoaderIter(udf_dataset, collate_fn=collact_fn)
+        dataset = TorchLoaderIter(udf_dataset, collate_fn=collate_fn)
         class Model(nn.Module):
             def __init__(self):
                 super().__init__()
@@ -362,7 +361,7 @@ class TorchLoaderIter(BatchIter):
                 def __len__(self):
                     return self.num_samples
 
-            def collact_fn(data_list):
+            def collate_fn(data_list):
                 # [(x1,y1), (x2,y2), ...], 这里的输入实际上是将UdfDataSet的__getitem__输入结合为list
                 xs, ys = [], []
                 for l in data_list:
@@ -370,10 +369,10 @@ class TorchLoaderIter(BatchIter):
                     xs.append(x)
                     ys.append(y)
                 x, y = torch.FloatTensor(xs), torch.FloatTensor(ys)
-                return {'x': x, 'y': y}, {'y': y}
+                return {'x': x, 'y': y}, {'y': y}  # 第一个dict中内容类似于DataSet中的input列，第二个dict的内容类似于target列
 
             file_data = FileDataSet(tmp_file_path)
-            dataset = TorchLoaderIter(file_data, collate_fn=collact_fn)
+            dataset = TorchLoaderIter(file_data, collate_fn=collate_fn)
 
             class Model(nn.Module):
                 def __init__(self):
diff --git a/fastNLP/core/dist_trainer.py b/fastNLP/core/dist_trainer.py
index f5c0f229..680c4f80 100644
--- a/fastNLP/core/dist_trainer.py
+++ b/fastNLP/core/dist_trainer.py
@@ -205,11 +205,8 @@ class DistTrainer():
 
     def _get_data_iter(self, dataset):
         if isinstance(dataset, DataSet):
-            return DataSetIter(
-                dataset=dataset, batch_size=self.batch_size_per_gpu,
-                num_workers=self.num_data_workers, sampler=self.sampler,
-                drop_last=self.drop_last
-            )
+            return DataSetIter(dataset=dataset, batch_size=self.batch_size_per_gpu, sampler=self.sampler,
+                               num_workers=self.num_data_workers, drop_last=self.drop_last)
         elif isinstance(dataset, BatchIter):
             return dataset
         else:
diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py
index b223d35f..680782b1 100644
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -107,8 +107,8 @@ class Tester(object):
         self.logger = logger
 
         if isinstance(data, DataSet):
-            self.data_iterator = DataSetIter(
-                dataset=data, batch_size=batch_size, num_workers=num_workers, sampler=SequentialSampler())
+            self.data_iterator = DataSetIter(dataset=data, batch_size=batch_size, sampler=SequentialSampler(),
+                                             num_workers=num_workers)
         elif isinstance(data, BatchIter):
             self.data_iterator = data
         else:
diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py
index c6390b22..b16f5ddb 100644
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -487,8 +487,8 @@ class Trainer(object):
                 sampler.set_batch_size(batch_size)
 
         if isinstance(train_data, DataSet):
-            self.data_iterator = DataSetIter(
-                dataset=train_data, batch_size=batch_size, num_workers=num_workers, sampler=sampler, drop_last=drop_last)
+            self.data_iterator = DataSetIter(dataset=train_data, batch_size=batch_size, sampler=sampler,
+                                             num_workers=num_workers, drop_last=drop_last)
         elif isinstance(train_data, BatchIter):
             self.data_iterator = train_data
             train_data = train_data.dataset
diff --git a/fastNLP/embeddings/__init__.py b/fastNLP/embeddings/__init__.py
index 3b3b2dce..bf35b7d4 100644
--- a/fastNLP/embeddings/__init__.py
+++ b/fastNLP/embeddings/__init__.py
@@ -12,17 +12,26 @@ __all__ = [
     "ElmoEmbedding",
     "BertEmbedding",
     "BertWordPieceEncoder",
+
+    "RobertaEmbedding",
+    "RobertaWordPieceEncoder",
+
+    "GPT2Embedding",
+    "GPT2WordPieceEncoder",
+
     "StackEmbedding",
     "LSTMCharEmbedding",
     "CNNCharEmbedding",
     "get_embeddings",
+
 ]
 
 from .embedding import Embedding, TokenEmbedding
 from .static_embedding import StaticEmbedding
 from .elmo_embedding import ElmoEmbedding
 from .bert_embedding import BertEmbedding, BertWordPieceEncoder
-from .roberta_embedding import RobertaEmbedding
+from .roberta_embedding import RobertaEmbedding, RobertaWordPieceEncoder
+from .gpt2_embedding import GPT2WordPieceEncoder, GPT2Embedding
 from .char_embedding import CNNCharEmbedding, LSTMCharEmbedding
 from .stack_embedding import StackEmbedding
 from .utils import get_embeddings
diff --git a/fastNLP/embeddings/bert_embedding.py b/fastNLP/embeddings/bert_embedding.py
index 3bd448aa..3ad8cd39 100644
--- a/fastNLP/embeddings/bert_embedding.py
+++ b/fastNLP/embeddings/bert_embedding.py
@@ -11,6 +11,7 @@ __all__ = [
 import collections
 import warnings
 from itertools import chain
+from functools import partial
 
 import numpy as np
 import torch
@@ -20,7 +21,8 @@ from .contextual_embedding import ContextualEmbedding
 from ..core import logger
 from ..core.vocabulary import Vocabulary
 from ..io.file_utils import PRETRAINED_BERT_MODEL_DIR
-from ..modules.encoder.bert import _WordPieceBertModel, BertModel, BertTokenizer
+from ..modules.encoder.bert import BertModel
+from ..modules.tokenizer import BertTokenizer
 
 
 class BertEmbedding(ContextualEmbedding):
@@ -31,6 +33,7 @@ class BertEmbedding(ContextualEmbedding):
 
     BertEmbedding可以支持自动下载权重，当前支持的模型:
         en: base-cased
+        en-base-uncased:
         en-large-cased-wwm:
         en-large-cased:
         en-large-uncased:
@@ -63,7 +66,8 @@ class BertEmbedding(ContextualEmbedding):
         :param str model_dir_or_name: 模型所在目录或者模型的名称。当传入模型所在目录时，目录中应该包含一个词表文件(以.txt作为后缀名),
             权重文件(以.bin作为文件后缀名), 配置文件(以.json作为后缀名)。
         :param str layers: 输出embedding表示来自于哪些层，不同层的结果按照layers中的顺序在最后一维concat起来。以','隔开层数，层的序号是
-            从0开始，可以以负数去索引倒数几层。
+            从0开始，可以以负数去索引倒数几层。 layer=0为embedding层（包括wordpiece embedding,
+            position embedding和segment embedding）
         :param str pool_method: 因为在bert中，每个word会被表示为多个word pieces, 当获取一个word的表示的时候，怎样从它的word pieces
             中计算得到它对应的表示。支持 ``last`` , ``first`` , ``avg`` , ``max``。
         :param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。
@@ -80,6 +84,8 @@ class BertEmbedding(ContextualEmbedding):
         :param kwargs:
             bool only_use_pretrain_bpe: 仅使用出现在pretrain词表中的bpe，如果该词没法tokenize则使用unk。如果embedding不需要更新
                 建议设置为True。
+            int min_freq: 仅在only_use_pretrain_bpe为False有效，大于等于该次数的词会被新加入BERT的BPE词表中
+            bool truncate_embed: 是否仅保留用到的bpe(这样会减内存占用和加快速度)
         """
         super(BertEmbedding, self).__init__(vocab, word_dropout=word_dropout, dropout=dropout)
 
@@ -92,25 +98,28 @@ class BertEmbedding(ContextualEmbedding):
                                " faster speed.")
                 warnings.warn("For Chinese bert, pooled_method should choose from 'first', 'last' in order to achieve"
                               " faster speed.")
-        
-        self._word_sep_index = None
+
+        self._word_sep_index = -100
         if '[SEP]' in vocab:
             self._word_sep_index = vocab['[SEP]']
+        self._word_cls_index = -100
+        if '[CLS]' in vocab:
+            self._word_cls_index = vocab['CLS']
 
         only_use_pretrain_bpe = kwargs.get('only_use_pretrain_bpe', False)
-        
-        self.model = _WordBertModel(model_dir_or_name=model_dir_or_name, vocab=vocab, layers=layers,
+        truncate_embed = kwargs.get('truncate_embed', True)
+        min_freq = kwargs.get('min_freq', 2)
+
+        self.model = _BertWordModel(model_dir_or_name=model_dir_or_name, vocab=vocab, layers=layers,
                                     pool_method=pool_method, include_cls_sep=include_cls_sep,
-                                    pooled_cls=pooled_cls, auto_truncate=auto_truncate, min_freq=2,
-                                    only_use_pretrain_bpe=only_use_pretrain_bpe)
-        self._sep_index = self.model._sep_index
-        self._cls_index = self.model._cls_index
+                                    pooled_cls=pooled_cls, auto_truncate=auto_truncate, min_freq=min_freq,
+                                    only_use_pretrain_bpe=only_use_pretrain_bpe, truncate_embed=truncate_embed)
         self.requires_grad = requires_grad
         self._embed_size = len(self.model.layers) * self.model.encoder.hidden_size
-    
+
     def _delete_model_weights(self):
         del self.model
-    
+
     def forward(self, words):
         r"""
         计算words的bert embedding表示。计算之前会在每句话的开始增加[CLS]在结束增加[SEP], 并根据include_cls_sep判断要不要
@@ -125,9 +134,9 @@ class BertEmbedding(ContextualEmbedding):
             return self.dropout(outputs)
         outputs = self.model(words)
         outputs = torch.cat([*outputs], dim=-1)
-        
+
         return self.dropout(outputs)
-    
+
     def drop_word(self, words):
         r"""
         按照设定随机将words设置为unknown_index。
@@ -137,15 +146,16 @@ class BertEmbedding(ContextualEmbedding):
         """
         if self.word_dropout > 0 and self.training:
             with torch.no_grad():
-                not_sep_mask = words.ne(self._sep_index)
-                not_cls_mask = words.ne(self._cls_index)
-                if self._word_sep_index:
-                    not_sep_mask = not_sep_mask.__and__(words.ne(self._word_sep_index))
-                replaceable_mask = not_sep_mask.__and__(not_cls_mask)
                 mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
                 mask = torch.bernoulli(mask).eq(1)  # dropout_word越大，越多位置为1
-                pad_mask = words.ne(0)
-                mask = pad_mask.__and__(mask).__and__(replaceable_mask)  # pad的位置不为unk
+                pad_mask = words.ne(self._word_pad_index)
+                mask = pad_mask.__and__(mask)  # pad的位置不为unk
+                if self._word_sep_index!=-100:
+                    not_sep_mask = words.ne(self._word_sep_index)
+                    mask = mask.__and__(not_sep_mask)
+                if self._word_cls_index!=-100:
+                    not_cls_mask = words.ne(self._word_cls_index)
+                    mask = mask.__and__(not_cls_mask)
                 words = words.masked_fill(mask, self._word_unk_index)
         return words
 
@@ -167,21 +177,22 @@ class BertWordPieceEncoder(nn.Module):
         multi-base-uncased: multilingual uncased
 
     """
-    
+
     def __init__(self, model_dir_or_name: str = 'en-base-uncased', layers: str = '-1', pooled_cls: bool = False,
                  word_dropout=0, dropout=0, requires_grad: bool = True):
         r"""
-        
+
         :param str model_dir_or_name: 模型所在目录或者模型的名称。默认值为 ``en-base-uncased``
-        :param str layers: 最终结果中的表示。以','隔开层数，可以以负数去索引倒数几层
+        :param str layers: 最终结果中的表示。以','隔开层数，可以以负数去索引倒数几层。layer=0为embedding层（包括wordpiece embedding,
+                position embedding和segment embedding）
         :param bool pooled_cls: 返回的句子开头的[CLS]是否使用预训练中的BertPool映射一下。如果下游任务取[CLS]做预测，一般该值为True。
         :param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。
         :param float dropout: 以多大的概率对embedding的表示进行Dropout。0.1即随机将10%的值置为0。
         :param bool requires_grad: 是否需要gradient。
         """
         super().__init__()
-        
-        self.model = _WordPieceBertModel(model_dir_or_name=model_dir_or_name, layers=layers, pooled_cls=pooled_cls)
+
+        self.model = _BertWordPieceModel(model_dir_or_name=model_dir_or_name, layers=layers, pooled_cls=pooled_cls)
         self._sep_index = self.model._sep_index
         self._cls_index = self.model._cls_index
         self._wordpiece_pad_index = self.model._wordpiece_pad_index
@@ -190,19 +201,19 @@ class BertWordPieceEncoder(nn.Module):
         self.requires_grad = requires_grad
         self.word_dropout = word_dropout
         self.dropout_layer = nn.Dropout(dropout)
-    
+
     @property
     def embed_size(self):
         return self._embed_size
-    
+
     @property
     def embedding_dim(self):
         return self._embed_size
-    
+
     @property
     def num_embedding(self):
         return self.model.encoder.config.vocab_size
-    
+
     def index_datasets(self, *datasets, field_name, add_cls_sep=True):
         r"""
         使用bert的tokenizer新生成word_pieces列加入到datasets中，并将他们设置为input,且将word_pieces这一列的pad value设置为了
@@ -213,8 +224,8 @@ class BertWordPieceEncoder(nn.Module):
         :param bool add_cls_sep: 如果首尾不是[CLS]与[SEP]会在首尾额外加入[CLS]与[SEP]。
         :return:
         """
-        self.model.index_dataset(*datasets, field_name=field_name, add_cls_sep=add_cls_sep)
-    
+        self.model.index_datasets(*datasets, field_name=field_name, add_cls_sep=add_cls_sep)
+
     def forward(self, word_pieces, token_type_ids=None):
         r"""
         计算words的bert embedding表示。传入的words中应该自行包含[CLS]与[SEP]的tag。
@@ -224,20 +235,20 @@ class BertWordPieceEncoder(nn.Module):
             第一个[SEP]及之前为0, 第二个[SEP]及到第一个[SEP]之间为1; 第三个[SEP]及到第二个[SEP]之间为0，依次往后推。
         :return: torch.FloatTensor. batch_size x max_len x (768*len(self.layers))
         """
-        with torch.no_grad():
-            sep_mask = word_pieces.eq(self._sep_index)  # batch_size x max_len
-            if token_type_ids is None:
+        if token_type_ids is None:
+            with torch.no_grad():
+                sep_mask = word_pieces.eq(self._sep_index)  # batch_size x max_len
                 sep_mask_cumsum = sep_mask.long().flip(dims=[-1]).cumsum(dim=-1).flip(dims=[-1])
                 token_type_ids = sep_mask_cumsum.fmod(2)
                 if token_type_ids[0, 0].item():  # 如果开头是奇数，则需要flip一下结果，因为需要保证开头为0
                     token_type_ids = token_type_ids.eq(0).long()
-        
+
         word_pieces = self.drop_word(word_pieces)
         outputs = self.model(word_pieces, token_type_ids)
         outputs = torch.cat([*outputs], dim=-1)
-        
+
         return self.dropout_layer(outputs)
-    
+
     def drop_word(self, words):
         r"""
         按照设定随机将words设置为unknown_index。
@@ -258,38 +269,45 @@ class BertWordPieceEncoder(nn.Module):
         return words
 
 
-class _WordBertModel(nn.Module):
+class _BertWordModel(nn.Module):
     def __init__(self, model_dir_or_name: str, vocab: Vocabulary, layers: str = '-1', pool_method: str = 'first',
                  include_cls_sep: bool = False, pooled_cls: bool = False, auto_truncate: bool = False, min_freq=2,
-                 only_use_pretrain_bpe=False):
+                 only_use_pretrain_bpe=False, truncate_embed=True):
         super().__init__()
-        
+
         self.tokenzier = BertTokenizer.from_pretrained(model_dir_or_name)
         self.encoder = BertModel.from_pretrained(model_dir_or_name)
         self._max_position_embeddings = self.encoder.config.max_position_embeddings
         #  检查encoder_layer_number是否合理
         encoder_layer_number = len(self.encoder.encoder.layer)
-        self.layers = list(map(int, layers.split(',')))
+        if isinstance(layers, list):
+            self.layers = [int(l) for l in layers]
+        elif isinstance(layers, str):
+            self.layers = list(map(int, layers.split(',')))
+        else:
+            raise TypeError("`layers` only supports str or list[int]")
         for layer in self.layers:
             if layer < 0:
                 assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
                                                        f"a bert model with {encoder_layer_number} layers."
             else:
-                assert layer < encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                assert layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
                                                      f"a bert model with {encoder_layer_number} layers."
-        
+
         assert pool_method in ('avg', 'max', 'first', 'last')
         self.pool_method = pool_method
         self.include_cls_sep = include_cls_sep
         self.pooled_cls = pooled_cls
         self.auto_truncate = auto_truncate
-        
+
         # 将所有vocab中word的wordpiece计算出来, 需要额外考虑[CLS]和[SEP]
         logger.info("Start to generate word pieces for word.")
+        self._has_sep_in_vocab = '[SEP]' in vocab  # 用来判断传入的数据是否需要生成token_ids
+
         # 第一步统计出需要的word_piece, 然后创建新的embed和word_piece_vocab, 然后填入值
         word_piece_dict = {'[CLS]': 1, '[SEP]': 1}  # 用到的word_piece以及新增的
-        found_count = 0
-        self._has_sep_in_vocab = '[SEP]' in vocab  # 用来判断传入的数据是否需要生成token_ids
+        new_add_to_bpe_vocab = 0
+        unsegment_count = 0
         if '[sep]' in vocab:
             warnings.warn("Lower cased [sep] detected, it cannot be correctly recognized as [SEP] by BertEmbedding.")
         if "[CLS]" in vocab:
@@ -311,27 +329,42 @@ class _WordBertModel(nn.Module):
                         if vocab.word_count[word] >= min_freq and not vocab._is_word_no_create_entry(
                                 word) and not only_use_pretrain_bpe:  # 出现次数大于这个次数才新增
                             word_piece_dict[word] = 1  # 新增一个值
+                            new_add_to_bpe_vocab += 1
+                        unsegment_count += 1
                         continue
             for word_piece in word_pieces:
                 word_piece_dict[word_piece] = 1
-            found_count += 1
         original_embed = self.encoder.embeddings.word_embeddings.weight.data
+
         # 特殊词汇要特殊处理
+        if not truncate_embed:# 如果不删除的话需要将已有的加上
+            word_piece_dict.update(self.tokenzier.vocab)
         embed = nn.Embedding(len(word_piece_dict), original_embed.size(1))  # 新的embed
         new_word_piece_vocab = collections.OrderedDict()
+
         for index, token in enumerate(['[PAD]', '[UNK]']):
-            word_piece_dict.pop(token, None)
-            embed.weight.data[index] = original_embed[self.tokenzier.vocab[token]]
-            new_word_piece_vocab[token] = index
+            index = word_piece_dict.pop(token, None)
+            if index is not None:
+                new_word_piece_vocab[token] = len(new_word_piece_vocab)
+                embed.weight.data[new_word_piece_vocab[token]] = original_embed[self.tokenzier.vocab[token]]
         for token in word_piece_dict.keys():
+            if token not in new_word_piece_vocab:
+                new_word_piece_vocab[token] = len(new_word_piece_vocab)
+            index = new_word_piece_vocab[token]
             if token in self.tokenzier.vocab:
-                embed.weight.data[len(new_word_piece_vocab)] = original_embed[self.tokenzier.vocab[token]]
+                embed.weight.data[index] = original_embed[self.tokenzier.vocab[token]]
             else:
-                embed.weight.data[len(new_word_piece_vocab)] = original_embed[self.tokenzier.vocab['[UNK]']]
-            new_word_piece_vocab[token] = len(new_word_piece_vocab)
+                embed.weight.data[index] = original_embed[self.tokenzier.vocab['[UNK]']]
+
         self.tokenzier._reinit_on_new_vocab(new_word_piece_vocab)
         self.encoder.embeddings.word_embeddings = embed
-        
+        self.encoder.config.vocab_size = len(new_word_piece_vocab)
+        if unsegment_count>0:
+            if only_use_pretrain_bpe or new_add_to_bpe_vocab==0:
+                logger.info(f"{unsegment_count} words are unsegmented.")
+            else:
+                logger.info(f"{unsegment_count} words are unsegmented. Among them, {new_add_to_bpe_vocab} added to the BPE vocab.")
+
         word_to_wordpieces = []
         word_pieces_lengths = []
         for word, index in vocab:
@@ -347,11 +380,10 @@ class _WordBertModel(nn.Module):
         self._sep_index = self.tokenzier.vocab['[SEP]']
         self._word_pad_index = vocab.padding_idx
         self._wordpiece_pad_index = self.tokenzier.vocab['[PAD]']  # 需要用于生成word_piece
-        logger.info("Found(Or segment into word pieces) {} words out of {}.".format(found_count, len(vocab)))
         self.word_to_wordpieces = np.array(word_to_wordpieces)
         self.register_buffer('word_pieces_lengths', torch.LongTensor(word_pieces_lengths))
         logger.debug("Successfully generate word pieces.")
-    
+
     def forward(self, words):
         r"""
 
@@ -365,8 +397,8 @@ class _WordBertModel(nn.Module):
             batch_word_pieces_length = self.word_pieces_lengths[words].masked_fill(word_mask.eq(False),
                                                                                    0)  # batch_size x max_len
             word_pieces_lengths = batch_word_pieces_length.sum(dim=-1)  # batch_size
-            word_piece_length = batch_word_pieces_length.sum(dim=-1).max().item()  # 表示word piece的长度(包括padding)
-            if word_piece_length + 2 > self._max_position_embeddings:
+            max_word_piece_length = batch_word_pieces_length.sum(dim=-1).max().item()  # 表示word piece的长度(包括padding)
+            if max_word_piece_length + 2 > self._max_position_embeddings:
                 if self.auto_truncate:
                     word_pieces_lengths = word_pieces_lengths.masked_fill(
                         word_pieces_lengths + 2 > self._max_position_embeddings,
@@ -376,9 +408,9 @@ class _WordBertModel(nn.Module):
                         "After split words into word pieces, the lengths of word pieces are longer than the "
                         f"maximum allowed sequence length:{self._max_position_embeddings} of bert. You can set "
                         f"`auto_truncate=True` for BertEmbedding to automatically truncate overlong input.")
-            
+
             # +2是由于需要加入[CLS]与[SEP]
-            word_pieces = words.new_full((batch_size, min(word_piece_length + 2, self._max_position_embeddings)),
+            word_pieces = words.new_full((batch_size, min(max_word_piece_length + 2, self._max_position_embeddings)),
                                          fill_value=self._wordpiece_pad_index)
             attn_masks = torch.zeros_like(word_pieces)
             # 1. 获取words的word_pieces的id，以及对应的span范围
@@ -406,7 +438,7 @@ class _WordBertModel(nn.Module):
         bert_outputs, pooled_cls = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks,
                                                 output_all_encoded_layers=True)
         # output_layers = [self.layers]  # len(self.layers) x batch_size x real_word_piece_length x hidden_size
-        
+
         if self.include_cls_sep:
             s_shift = 1
             outputs = bert_outputs[-1].new_zeros(len(self.layers), batch_size, max_word_len + 2,
@@ -421,19 +453,19 @@ class _WordBertModel(nn.Module):
 
         if self.pool_method == 'first':
             batch_word_pieces_cum_length = batch_word_pieces_cum_length[:, :seq_len.max()]
-            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(word_piece_length), 0)
+            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(max_word_piece_length), 0)
             _batch_indexes = batch_indexes[:, None].expand((batch_size, batch_word_pieces_cum_length.size(1)))
         elif self.pool_method == 'last':
             batch_word_pieces_cum_length = batch_word_pieces_cum_length[:, 1:seq_len.max()+1] - 1
-            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(word_piece_length), 0)
+            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(max_word_piece_length), 0)
             _batch_indexes = batch_indexes[:, None].expand((batch_size, batch_word_pieces_cum_length.size(1)))
 
         for l_index, l in enumerate(self.layers):
             output_layer = bert_outputs[l]
             real_word_piece_length = output_layer.size(1) - 2
-            if word_piece_length > real_word_piece_length:  # 如果实际上是截取出来的
+            if max_word_piece_length > real_word_piece_length:  # 如果实际上是截取出来的
                 paddings = output_layer.new_zeros(batch_size,
-                                                  word_piece_length - real_word_piece_length,
+                                                  max_word_piece_length - real_word_piece_length,
                                                   output_layer.size(2))
                 output_layer = torch.cat((output_layer, paddings), dim=1).contiguous()
             # 从word_piece collapse到word的表示
@@ -462,7 +494,85 @@ class _WordBertModel(nn.Module):
                     outputs[l_index, :, 0] = pooled_cls
                 else:
                     outputs[l_index, :, 0] = output_layer[:, 0]
-                outputs[l_index, batch_indexes, seq_len + s_shift] = output_layer[batch_indexes, seq_len + s_shift]
+                outputs[l_index, batch_indexes, seq_len + s_shift] = output_layer[batch_indexes, word_pieces_lengths + s_shift]
 
         # 3. 最终的embedding结果
         return outputs
+
+
+class _BertWordPieceModel(nn.Module):
+    r"""
+    这个模块用于直接计算word_piece的结果.
+
+    """
+
+    def __init__(self, model_dir_or_name: str, layers: str = '-1', pooled_cls: bool=False):
+        super().__init__()
+
+        self.tokenzier = BertTokenizer.from_pretrained(model_dir_or_name)
+        self.encoder = BertModel.from_pretrained(model_dir_or_name)
+        #  检查encoder_layer_number是否合理
+        encoder_layer_number = len(self.encoder.encoder.layer)
+
+        if isinstance(layers, list):
+            self.layers = [int(l) for l in layers]
+        elif isinstance(layers, str):
+            self.layers = list(map(int, layers.split(',')))
+        else:
+            raise TypeError("`layers` only supports str or list[int]")
+
+        for layer in self.layers:
+            if layer < 0:
+                assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                    f"a bert model with {encoder_layer_number} layers."
+            else:
+                assert layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                    f"a bert model with {encoder_layer_number} layers."
+
+        self._cls_index = self.tokenzier.cls_index
+        self._sep_index = self.tokenzier.sep_index
+        self._wordpiece_unknown_index = self.tokenzier.unk_index
+        self._wordpiece_pad_index = self.tokenzier.pad_index  # 需要用于生成word_piece
+        self.pooled_cls = pooled_cls
+
+    def index_datasets(self, *datasets, field_name, add_cls_sep=True):
+        r"""
+        使用bert的tokenizer新生成word_pieces列加入到datasets中，并将他们设置为input。如果首尾不是
+            [CLS]与[SEP]会在首尾额外加入[CLS]与[SEP], 且将word_pieces这一列的pad value设置为了bert的pad value。
+
+        :param datasets: DataSet对象
+        :param field_name: 基于哪一列index
+        :return:
+        """
+
+        encode_func = partial(self.tokenzier.encode, add_special_tokens=add_cls_sep)
+
+        for index, dataset in enumerate(datasets):
+            try:
+                dataset.apply_field(encode_func, field_name=field_name, new_field_name='word_pieces',
+                                    is_input=True)
+                dataset.set_pad_val('word_pieces', self._wordpiece_pad_index)
+            except Exception as e:
+                logger.error(f"Exception happens when processing the {index} dataset.")
+                raise e
+
+    def forward(self, word_pieces, token_type_ids=None):
+        r"""
+
+        :param word_pieces: torch.LongTensor, batch_size x max_len
+        :param token_type_ids: torch.LongTensor, batch_size x max_len
+        :return: num_layers x batch_size x max_len x hidden_size或者num_layers x batch_size x (max_len+2) x hidden_size
+        """
+        batch_size, max_len = word_pieces.size()
+
+        attn_masks = word_pieces.ne(self._wordpiece_pad_index)
+        bert_outputs, pooled_cls = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks,
+                                                output_all_encoded_layers=True)
+        # output_layers = [self.layers]  # len(self.layers) x batch_size x max_word_piece_length x hidden_size
+        outputs = bert_outputs[0].new_zeros((len(self.layers), batch_size, max_len, bert_outputs[0].size(-1)))
+        for l_index, l in enumerate(self.layers):
+            bert_output = bert_outputs[l]
+            if l in (len(bert_outputs)-1, -1) and self.pooled_cls:
+                bert_output[:, 0] = pooled_cls
+            outputs[l_index] = bert_output
+        return outputs
\ No newline at end of file
diff --git a/fastNLP/embeddings/gpt2_embedding.py b/fastNLP/embeddings/gpt2_embedding.py
new file mode 100644
index 00000000..fdae4240
--- /dev/null
+++ b/fastNLP/embeddings/gpt2_embedding.py
@@ -0,0 +1,649 @@
+"""
+.. todo::
+    doc
+"""
+
+__all__ = [
+    "GPT2Embedding",
+    "GPT2WordPieceEncoder"
+]
+
+import warnings
+from functools import partial
+from itertools import chain
+from collections import OrderedDict
+
+import torch
+from torch import nn
+import numpy as np
+
+from .contextual_embedding import ContextualEmbedding
+from ..core import logger
+from ..core.utils import _get_model_device
+from ..core.vocabulary import Vocabulary
+from ..io.file_utils import PRETRAINED_BERT_MODEL_DIR
+from ..modules.tokenizer import GPT2Tokenizer
+from ..modules.encoder.gpt2 import GPT2LMHeadModel, GPT2Model
+
+
+class GPT2Embedding(ContextualEmbedding):
+    """
+    使用GPT2对words进行编码的Embedding。
+
+    Example::
+
+        >>> import torch
+        >>> from fastNLP import Vocabulary
+        >>> from fastNLP.embeddings import BertEmbedding
+        >>> vocab = Vocabulary().add_word_lst("The whether is good .".split())
+        >>> embed = GPT2Embedding(vocab, model_dir_or_name='en-small', requires_grad=False, layers='4,-2,-1')
+        >>> words = torch.LongTensor([[vocab.to_index(word) for word in "The whether is good .".split()]])
+        >>> outputs = embed(words)
+        >>> outputs.size()
+        >>> # torch.Size([1, 5, 3096])
+    """
+
+    def __init__(self, vocab: Vocabulary, model_dir_or_name: str = 'en-small', layers: str = '-1',
+                 pool_method: str = 'first', dropout=0, requires_grad: bool = True,
+                 auto_truncate: bool = False, language_model: bool = False, **kwargs):
+        """
+
+        :param ~fastNLP.Vocabulary vocab: 词表
+        :param str model_dir_or_name: 模型所在目录或者模型的名称。当传入模型所在目录时，目录中应该包含一个词表文件(以.txt作为后缀名),
+            权重文件(以.bin作为文件后缀名), 配置文件(以.json作为后缀名)。
+        :param str layers: 输出embedding表示来自于哪些层，不同层的结果按照layers中的顺序在最后一维concat起来。以','隔开层数，层的序号是
+            从0开始，可以以负数去索引倒数几层。
+        :param str pool_method: 因为在bert中，每个word会被表示为多个word pieces, 当获取一个word的表示的时候，怎样从它的word pieces
+            中计算得到它对应的表示。支持 ``last`` , ``first`` , ``avg`` , ``max``。
+        :param float dropout: 以多大的概率对embedding的表示进行Dropout。0.1即随机将10%的值置为0。
+        :param bool requires_grad: 是否需要gradient以更新Bert的权重。
+        :param bool auto_truncate: 当句子words拆分为word pieces长度超过bert最大允许长度(一般为512), 自动截掉拆分后的超过510个
+            word pieces后的内容，并将第512个word piece置为[SEP]。超过长度的部分的encode结果直接全部置零。一般仅有只使用[CLS]
+            来进行分类的任务将auto_truncate置为True。
+        :param bool language_model: 是否计算gpt2的lm loss，可以通过get_loss()获取，输入一个batch之后的get_loss调用即为batch的language
+            model的loss
+        :param **kwargs:
+            bool only_use_pretrain_bpe: 仅使用出现在pretrain词表中的bpe，如果该词没法tokenize则使用unk。如果embedding不需要更新
+                建议设置为True。
+            int min_freq: 仅在only_use_pretrain_bpe为False有效，大于等于该次数的词会被新加入GPT2的BPE词表中
+            bool truncate_embed: 是否仅保留用到的bpe(这样会减内存占用和加快速度)
+        """
+        super().__init__(vocab, word_dropout=0, dropout=dropout)
+
+        if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR:
+            if 'cn' in model_dir_or_name.lower() and pool_method not in ('first', 'last'):
+                logger.warning("For Chinese GPT, pooled_method should choose from 'first', 'last' in order to achieve"
+                               " faster speed.")
+                warnings.warn("For Chinese GPT, pooled_method should choose from 'first', 'last' in order to achieve"
+                              " faster speed.")
+
+        only_use_pretrain_bpe = kwargs.get('only_use_pretrain_bpe', False)
+        truncate_embed = kwargs.get('truncate_embed', True)
+        min_freq = kwargs.get('min_freq', 2)
+
+        self.lm_loss =language_model
+        self.model = _GPT2Model(model_dir_or_name=model_dir_or_name, vocab=vocab, layers=layers,
+                                    pool_method=pool_method, auto_truncate=auto_truncate, language_model=language_model,
+                                only_use_pretrain_bpe=only_use_pretrain_bpe, truncate_embed=truncate_embed,
+                                min_freq=min_freq)
+
+        self.requires_grad = requires_grad
+        self._embed_size = len(self.model.layers) * self.model.encoder.config.n_embd
+
+    def _delete_model_weights(self):
+        del self.model
+
+    def forward(self, words):
+        """
+        计算words的bert embedding表示。计算之前会在每句话的开始增加[CLS]在结束增加[SEP], 并根据include_cls_sep判断要不要
+            删除这两个token的表示。
+
+        :param torch.LongTensor words: [batch_size, max_len]
+        :return: torch.FloatTensor. batch_size x max_len x (768*len(self.layers))
+        """
+        outputs = self._get_sent_reprs(words)
+        if outputs is not None:
+            return self.dropout(outputs)
+        outputs = self.model(words)
+        outputs = torch.cat([*outputs], dim=-1)
+
+        return self.dropout(outputs)
+
+    def drop_word(self, words):
+        """
+        :param torch.LongTensor words: batch_size x max_len
+        :return:
+        """
+        if self.word_dropout > 0 and self.training:
+            with torch.no_grad():
+                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
+                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大，越多位置为1
+                words = words.masked_fill(mask, self._word_unk_index)
+        return words
+
+    def get_lm_loss(self, release=True):
+        """
+        当language_model=True时，可以通过该接口获取当前batch的language model loss的大小
+
+        :param bool release: 如果为True，获取了lm_loss后在下一次forward完成之前都无法获取lm_loss了
+        :return: torch.FloatTensor([])
+        """
+        if hasattr(self.model, '_lm_loss_value'):
+            lm_loss_value = self.model._lm_loss_value
+            if release:
+                delattr(self.model, '_lm_loss_value')
+            return lm_loss_value
+        elif self.lm_loss:
+            raise RuntimeError("Make sure you have passed a batch into GPT2Embdding before accessing loss.")
+        else:
+            raise RuntimeError("Initialize your GPT2Embedding with language_model=True.")
+
+
+class GPT2WordPieceEncoder(nn.Module):
+    """
+    GPT2模型，使用时先使用本模型对应的Tokenizer对数据进行tokenize
+
+    """
+
+    def __init__(self, model_dir_or_name: str = 'en-small', layers: str = '-1',
+                 word_dropout=0, dropout=0, requires_grad: bool = True, language_model:bool=False):
+        """
+
+        :param str model_dir_or_name: 模型所在目录或者模型的名称。
+        :param str,list layers: 最终结果中的表示。以','隔开层数，可以以负数去索引倒数几层
+        :param float word_dropout: 多大概率将word piece置为<|endoftext|>
+        :param float dropout: 以多大的概率对embedding的表示进行Dropout。0.1即随机将10%的值置为0。
+        :param bool language_model: 是否使用language model
+        :param bool requires_grad: 是否需要gradient。
+        """
+        super().__init__()
+
+        self.model = _GPT2WordPieceModel(model_dir_or_name=model_dir_or_name, layers=layers, language_model=language_model)
+        self._wordpiece_pad_index = self.model._wordpiece_pad_index
+        self._embed_size = len(self.model.layers) * self.model.encoder.config.n_embd
+        self.requires_grad = requires_grad
+        self.dropout_layer = nn.Dropout(dropout)
+        self._wordpiece_endoftext_index = self.model._endoftext_index
+        self.word_dropout = word_dropout
+        self.language_model = language_model
+
+    @property
+    def embed_size(self):
+        return self._embed_size
+
+    @property
+    def embedding_dim(self):
+        return self._embed_size
+
+    @property
+    def num_embedding(self):
+        return self.model.encoder.config.vocab_size
+
+    def index_datasets(self, *datasets, field_name, add_endoftext=False, add_prefix_space=True):
+        """
+        使用bert的tokenizer新生成word_pieces列加入到datasets中，并将他们设置为input,且将word_pieces这一列的pad value设置为了
+        bert的pad value。
+
+        :param ~fastNLP.DataSet datasets: DataSet对象
+        :param list[str] field_name: 基于哪一列的内容生成word_pieces列。这一列中每个数据应该是List[str]的形式。
+        :param bool add_endoftext: 在句子开头加入<|endofline|>。
+        :param bool add_prefix_space: 是否在句首增加空格
+        :return:
+        """
+        self.model.index_datasets(*datasets, field_name=field_name, add_endoftext=add_endoftext,
+                                 add_prefix_space=add_prefix_space)
+
+    def forward(self, word_pieces, token_type_ids=None):
+        """
+        计算words的bert embedding表示。传入的words中应该在开头包含<|endofline|>。
+
+        :param word_pieces: batch_size x max_len
+        :param token_type_ids: batch_size x max_len,
+        :return: torch.FloatTensor.
+        """
+
+        outputs = self.model(word_pieces)
+        outputs = torch.cat([*outputs], dim=-1)
+
+        return self.dropout_layer(outputs)
+
+    def drop_word(self, words):
+        """
+
+        :param torch.LongTensor words: batch_size x max_len
+        :return:
+        """
+        if self.word_dropout > 0 and self.training:
+            with torch.no_grad():
+                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
+                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大，越多位置为1
+                endoftext_mask = words.ne(self._wordpiece_endoftext_index)
+                mask = endoftext_mask.__and__(mask)  # pad的位置不为unk
+                words = words.masked_fill(mask, self._wordpiece_unk_index)
+        return words
+
+    def generate_from_str(self, text='', max_len=40, do_sample=True, num_beams=1, temperature=1, top_k=50, top_p=1.0,
+                          repetition_penalty=1.0, length_penalty=1.0):
+        """
+
+        :param str text: 故事的开头
+        :param int max_len: 生成多长的句子
+        :param bool do_sample: 是否使用采样的方式生成，如果使用采样，相同的参数可能出现不同的句子。
+        :param int num_beams: 使用多大的beam size
+        :param float temperature: 用以调节采样分布的
+        :param int top_k: 只保留此表中top_k个词进行生成。范围1-infinity
+        :param float top_p: 保留概率累积为top_p的词汇，范围0-1.
+        :param float repetition_penalty: 对重复token的惩罚
+        :param float length_penalty: 惩罚过长的句子
+        :return: list[str]
+        """
+        if len(text)==0:
+            word_pieces = torch.LongTensor([[self.model.tokenizer.bos_index]])
+            start_idx = 1
+        else:
+            assert isinstance(text, str), "Only string input allowed."
+            assert self.language_model, "You must set `language_model=True`."
+            word_pieces = self.model.convert_words_to_word_pieces(text, add_prefix_space=True)
+            word_pieces = torch.LongTensor([word_pieces])
+            start_idx = 0
+        device = _get_model_device(self)
+        word_pieces = word_pieces.to(device)
+        outputs = self.model.encoder.generate(input_ids=word_pieces,
+                        max_length=max_len,
+                        do_sample=do_sample,
+                        num_beams=num_beams,
+                        temperature=temperature,
+                        top_k=top_k,
+                        top_p=top_p,
+                        repetition_penalty=repetition_penalty,
+                        bos_token_id=self.model.tokenizer.bos_index,
+                        pad_token_id=self.model.tokenizer.eos_index,  # 使用<|endoftext|>代替pad
+                        eos_token_ids=self.model.tokenizer.eos_index,
+                        length_penalty=length_penalty).squeeze(0)
+
+        output_strs = []
+        if outputs.dim()==1:
+            outputs = outputs[None]
+        outputs = outputs[:, start_idx:]
+        for i in range(len(outputs)):
+            str_ = self.model.tokenizer.convert_tokens_to_string(self.model.tokenizer.convert_ids_to_tokens(outputs[i].tolist()))
+            output_strs.append(str_)
+
+        return output_strs
+
+    def generate(self, word_pieces, max_len=40, do_sample=True, num_beams=1, temperature=1, top_k=50, top_p=1.0,
+                    repetition_penalty=1.0, length_penalty=1.0):
+        """
+
+        :param word_pieces:
+        :param int max_len: 生成多长的句子
+        :param bool do_sample: 是否使用采样的方式生成，如果使用采样，相同的参数可能出现不同的句子。
+        :param int num_beams: 使用多大的beam size
+        :param float temperature: 用以调节采样分布的
+        :param int top_k: 只保留此表中top_k个词进行生成。范围1-infinity
+        :param float top_p: 保留概率累积为top_p的词汇，范围0-1.
+        :param float repetition_penalty: 对重复token的惩罚
+        :param float length_penalty: 惩罚过长的句子
+        :return:
+        """
+        pass
+
+    def get_lm_loss(self, release=True):
+        """
+        当language_model=True时，可以通过该接口获取当前batch的language model loss的大小
+
+        :param bool release: 如果为True，获取了lm_loss后在下一次forward完成之前都无法获取lm_loss了
+        :return: torch.FloatTensor([])
+        """
+        if hasattr(self.model, '_lm_loss_value'):
+            lm_loss_value = self.model._lm_loss_value
+            if release:
+                delattr(self.model, '_lm_loss_value')
+            return lm_loss_value
+        elif self.lm_loss:
+            raise RuntimeError("Make sure you have passed a batch into GPT2Embdding before accessing loss.")
+        else:
+            raise RuntimeError("Initialize your GPT2Embedding with language_model=True.")
+
+
+class _GPT2Model(nn.Module):
+    def __init__(self, model_dir_or_name, vocab, layers,  pool_method='first', auto_truncate=True, language_model=False,
+                 only_use_pretrain_bpe=False, min_freq=2, truncate_embed=False):
+        super().__init__()
+
+        self.tokenzier = GPT2Tokenizer.from_pretrained(model_dir_or_name)
+        if language_model:
+            self.encoder = GPT2LMHeadModel.from_pretrained(model_dir_or_name)
+        else:
+            self.encoder = GPT2Model.from_pretrained(model_dir_or_name)
+
+        self.lm_loss = language_model
+        self._max_position_embeddings = self.encoder.config.max_position_embeddings
+        #  检查encoder_layer_number是否合理
+        encoder_layer_number = self.encoder.config.n_layer
+        if isinstance(layers, list):
+            self.layers = [int(l) for l in layers]
+        elif isinstance(layers, str):
+            self.layers = list(map(int, layers.split(',')))
+        else:
+            raise TypeError("`layers` only supports str or list[int]")
+        for layer in self.layers:
+            if layer < 0:
+                assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                                                       f"a GPT2 model with {encoder_layer_number} layers."
+            else:
+                assert layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                                                     f"a GPT2 model with {encoder_layer_number} layers."
+
+        assert pool_method in ('avg', 'max', 'first', 'last')
+        self.pool_method = pool_method
+        self.auto_truncate = auto_truncate
+
+        # 将所有vocab中word的wordpiece计算出来, 需要额外考虑<s>和</s>
+        logger.info("Start to generate word pieces for word.")
+        # 第一步统计出需要的word_piece, 然后创建新的embed和word_piece_vocab, 然后填入值
+        word_piece_dict = {'<|endoftext|>': 1}  # 用到的word_piece以及新增的
+        found_count = 0
+        new_add_to_bpe_vocab = 0
+        unsegment_count = 0
+
+        for word, index in vocab:
+            if index == vocab.padding_idx:  # pad是个特殊的符号
+                word = '<|endoftext|>'
+            elif index == vocab.unknown_idx:
+                word = '<|endoftext|>'
+            # _words = self.tokenzier.basic_tokenizer._tokenize_chinese_chars(word).split()  # 这里暂时不考虑中文内容
+            word_pieces = []
+            word_pieces.extend(self.tokenzier.tokenize(word, add_prefix_space=True))
+            if len(word_pieces) == 1:
+                if not vocab._is_word_no_create_entry(word):  # 如果是train中的值, 但是却没有找到
+                    if index not in (vocab.unknown_idx, vocab.padding_idx) and word_pieces[0] == '<|endoftext|>':  # 说明这个词不在原始的word里面
+                        if vocab.word_count[word] >= min_freq and not vocab._is_word_no_create_entry(
+                                word) and not only_use_pretrain_bpe:  # 出现次数大于这个次数才新增
+                            word_piece_dict[word] = 1  # 新增一个值
+                            new_add_to_bpe_vocab += 1
+                        unsegment_count += 1
+                        continue
+            for word_piece in word_pieces:
+                word_piece_dict[word_piece] = 1
+            found_count += 1
+
+        if unsegment_count>0:
+            if only_use_pretrain_bpe or new_add_to_bpe_vocab==0:
+                logger.info(f"{unsegment_count} words are unsegmented.")
+            else:
+                logger.info(f"{unsegment_count} words are unsegmented. Among them, {new_add_to_bpe_vocab} added to the BPE vocab.")
+
+        original_embed = self.encoder.get_input_embeddings().weight
+        # 特殊词汇要特殊处理
+        if not truncate_embed:  # 如果不删除的话需要将已有的加上
+            word_piece_dict.update(self.tokenzier.encoder)
+
+        embed = nn.Embedding(len(word_piece_dict), original_embed.size(1))  # 新的embed
+        new_word_piece_vocab = OrderedDict()
+
+        for index, token in enumerate(['<|endoftext|>']):
+            index = word_piece_dict.pop(token, None)
+            if index is not None:
+                new_word_piece_vocab[token] = len(new_word_piece_vocab)
+                embed.weight.data[new_word_piece_vocab[token]] = original_embed[self.tokenzier.encoder[token]]
+
+        for token in word_piece_dict.keys():
+            if token not in new_word_piece_vocab:
+                new_word_piece_vocab[token] = len(new_word_piece_vocab)
+            index = new_word_piece_vocab[token]
+            if token in self.tokenzier.encoder:
+                embed.weight.data[index] = original_embed[self.tokenzier.encoder[token]]
+            else:
+                embed.weight.data[index] = original_embed[self.tokenzier.encoder['<|endoftext|>']]
+
+        self.tokenzier._reinit_on_new_vocab(new_word_piece_vocab)
+        self.encoder.set_input_embeddings(embed)
+        self.encoder.tie_weights()
+        self.encoder.config.vocab_size = len(new_word_piece_vocab)
+
+        word_to_wordpieces = []
+        word_pieces_lengths = []
+        for word, index in vocab:
+            if index == vocab.padding_idx:  # pad是个特殊的符号
+                word = '<|endoftext|>'
+            elif index == vocab.unknown_idx:
+                word = '<|endoftext|>'
+            word_pieces = self.tokenzier.tokenize(word)
+            word_pieces = self.tokenzier.convert_tokens_to_ids(word_pieces)
+            word_to_wordpieces.append(word_pieces)
+            word_pieces_lengths.append(len(word_pieces))
+        self._word_pad_index = vocab.padding_idx
+        self._endoftext_index = self.tokenzier.encoder.get('<|endoftext|>')
+        self._wordpiece_pad_index = self.tokenzier.encoder.get('<|endoftext|>')  # 需要用于生成word_piece
+        self.word_to_wordpieces = np.array(word_to_wordpieces)
+        self.register_buffer('word_pieces_lengths', torch.LongTensor(word_pieces_lengths))
+        logger.debug("Successfully generate word pieces.")
+
+    def forward(self, words):
+        """
+
+        :param words: torch.LongTensor, batch_size x max_len
+        :return: num_layers x batch_size x max_len x hidden_size或者num_layers x batch_size x (max_len+2) x hidden_size
+        """
+        with torch.no_grad():
+            batch_size, max_word_len = words.size()
+            word_mask = words.ne(self._word_pad_index)  # 为1的地方有word
+            seq_len = word_mask.sum(dim=-1)
+            batch_word_pieces_length = self.word_pieces_lengths[words].masked_fill(word_mask.eq(False),
+                                                                                   0)  # batch_size x max_len
+            word_pieces_lengths = batch_word_pieces_length.sum(dim=-1)  # batch_size
+            max_word_piece_length = batch_word_pieces_length.sum(dim=-1).max().item()  # 表示word piece的长度(包括padding)
+            if max_word_piece_length > self._max_position_embeddings:
+                if self.auto_truncate:
+                    word_pieces_lengths = word_pieces_lengths.masked_fill(
+                        word_pieces_lengths > self._max_position_embeddings,
+                        self._max_position_embeddings)
+                else:
+                    raise RuntimeError(
+                        "After split words into word pieces, the lengths of word pieces are longer than the "
+                        f"maximum allowed sequence length:{self._max_position_embeddings} of GPT2. You can set "
+                        f"`auto_truncate=True` for BertEmbedding to automatically truncate overlong input.")
+
+            word_pieces = words.new_full((batch_size, min(max_word_piece_length, self._max_position_embeddings)),
+                                         fill_value=self._wordpiece_pad_index)
+            word_labels = word_pieces.clone()
+            attn_masks = torch.zeros_like(word_pieces)
+            # 1. 获取words的word_pieces的id，以及对应的span范围
+            word_indexes = words.cpu().numpy()
+            for i in range(batch_size):
+                word_pieces_i = list(chain(*self.word_to_wordpieces[word_indexes[i, :seq_len[i]]]))
+                if self.auto_truncate and len(word_pieces_i) > self._max_position_embeddings:
+                    word_pieces_i = word_pieces_i[:self._max_position_embeddings]
+                word_pieces[i, :word_pieces_lengths[i]] = torch.LongTensor(word_pieces_i)
+                word_labels[i, word_pieces_lengths[i]:].fill_(-100)  # 计算lm_loss用的
+                attn_masks[i, :word_pieces_lengths[i]].fill_(1)
+            # 添加<|endoftext|>, 默认不添加了
+            # word_pieces[:, 0].fill_(self._endoftext_index)
+            batch_indexes = torch.arange(batch_size).to(words)
+        # 2. 获取hidden的结果，根据word_pieces进行对应的pool计算
+        # all_outputs: [batch_size x max_len x hidden_size, batch_size x max_len x hidden_size, ...]
+        if self.lm_loss:
+            gpt2_outputs = self.encoder(word_pieces, token_type_ids=None, attention_mask=attn_masks, labels=word_labels,
+                                        output_attentions=False)
+            gpt2_outputs, self._lm_loss_value = gpt2_outputs[-1], gpt2_outputs[0]  # n_layers x batch_size x max_len x hidden_size
+        else:
+            gpt2_outputs = self.encoder(word_pieces, token_type_ids=None, attention_mask=attn_masks,
+                                        output_attentions=False)[-1]
+        outputs = gpt2_outputs[-1].new_zeros(len(self.layers), batch_size, max_word_len,
+                                             gpt2_outputs[-1].size(-1))
+
+        batch_word_pieces_cum_length = batch_word_pieces_length.new_zeros(batch_size, max_word_len+1)
+        batch_word_pieces_cum_length[:, 1:] = batch_word_pieces_length.cumsum(dim=-1)  # batch_size x max_len
+
+        if self.pool_method == 'first':
+            batch_word_pieces_cum_length = batch_word_pieces_cum_length[:, :seq_len.max()]
+            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(max_word_piece_length), 0)
+            _batch_indexes = batch_indexes[:, None].expand((batch_size, batch_word_pieces_cum_length.size(1)))
+        elif self.pool_method == 'last':
+            batch_word_pieces_cum_length = batch_word_pieces_cum_length[:, :seq_len.max()] - 1
+            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(max_word_piece_length), 0)
+            _batch_indexes = batch_indexes[:, None].expand((batch_size, batch_word_pieces_cum_length.size(1)))
+
+        for l_index, l in enumerate(self.layers):
+            output_layer = gpt2_outputs[l]
+            real_word_piece_length = output_layer.size(1)
+            if max_word_piece_length > real_word_piece_length:  # 如果实际上是截取出来的
+                paddings = output_layer.new_zeros(batch_size,
+                                                  max_word_piece_length - real_word_piece_length,
+                                                  output_layer.size(2))
+                output_layer = torch.cat((output_layer, paddings), dim=1).contiguous()
+            # 从word_piece collapse到word的表示
+            # truncate_output_layer = output_layer  # 删除endoftext batch_size x len x hidden_size
+            if self.pool_method == 'first':
+                tmp = output_layer[_batch_indexes, batch_word_pieces_cum_length]
+                tmp = tmp.masked_fill(word_mask[:, :batch_word_pieces_cum_length.size(1), None].eq(False), 0)
+                outputs[l_index, :, :batch_word_pieces_cum_length.size(1)] = tmp
+            elif self.pool_method == 'last':
+                tmp = output_layer[_batch_indexes, batch_word_pieces_cum_length]
+                tmp = tmp.masked_fill(word_mask[:, :batch_word_pieces_cum_length.size(1), None].eq(False), 0)
+                outputs[l_index, :, :batch_word_pieces_cum_length.size(1)] = tmp
+            elif self.pool_method == 'max':
+                for i in range(batch_size):
+                    for j in range(seq_len[i]):
+                        start, end = batch_word_pieces_cum_length[i, j], batch_word_pieces_cum_length[i, j + 1]
+                        outputs[l_index, i, j], _ = torch.max(output_layer[i, start:end], dim=-2)
+            else:
+                for i in range(batch_size):
+                    for j in range(seq_len[i]):
+                        start, end = batch_word_pieces_cum_length[i, j], batch_word_pieces_cum_length[i, j + 1]
+                        outputs[l_index, i, j] = torch.mean(output_layer[i, start:end], dim=-2)
+
+        # 3. 最终的embedding结果
+        return outputs
+
+    def get_lm_loss(self):
+        """
+        当language_model为True时，通过该接口可以获取最近传入的一个batch的lanuage model loss
+
+        :return:
+        """
+        return self._lm_loss_value
+
+
+class _GPT2WordPieceModel(nn.Module):
+    """
+    这个模块用于直接计算word_piece的结果.
+
+    """
+
+    def __init__(self, model_dir_or_name: str, layers: str = '-1', language_model: bool=False):
+        super().__init__()
+
+        self.tokenizer = GPT2Tokenizer.from_pretrained(model_dir_or_name)
+        if language_model:
+            self.encoder = GPT2LMHeadModel.from_pretrained(model_dir_or_name)
+        else:
+            self.encoder = GPT2Model.from_pretrained(model_dir_or_name)
+
+        self.lm_loss = language_model
+
+        #  检查encoder_layer_number是否合理
+        encoder_layer_number = self.encoder.config.n_layer
+
+        if isinstance(layers, list):
+            self.layers = [int(l) for l in layers]
+        elif isinstance(layers, str):
+            self.layers = list(map(int, layers.split(',')))
+        else:
+            raise TypeError("`layers` only supports str or list[int]")
+
+        for layer in self.layers:
+            if layer < 0:
+                assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                    f"a gpt2 model with {encoder_layer_number} layers."
+            else:
+                assert layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                    f"a gpt2 model with {encoder_layer_number} layers."
+
+        self._endoftext_index = self.tokenizer.encoder.get('<|endoftext|>')
+        self._wordpiece_pad_index = self.tokenizer.encoder.get('<|endoftext|>') # 原来并没有pad，使用这个值替代一下。这个pad值并不重要，因为是从左到右计算的
+        self._max_position_embeddings = self.encoder.config.max_position_embeddings
+
+    def index_datasets(self, *datasets, field_name, add_endoftext=False, add_prefix_space=True):
+        """
+        使用gpt2的tokenizer新生成word_pieces列加入到datasets中，并将他们设置为input。如果开头不是<|endoftext|>, 且将
+            word_pieces这一列的pad value设置为了bert的pad value。
+
+        :param datasets: DataSet对象
+        :param field_name: 基于哪一列index
+        :param bool add_prefix_space: 是否添加句首的空格
+        :return:
+        """
+        convert_words_to_word_pieces = partial(self.convert_words_to_word_pieces, add_endoftext=add_endoftext,
+                                               add_prefix_space=add_prefix_space)
+        for index, dataset in enumerate(datasets):
+            try:
+                dataset.apply_field(convert_words_to_word_pieces, field_name=field_name, new_field_name='word_pieces',
+                                    is_input=True)
+                dataset.set_pad_val('word_pieces', self._wordpiece_pad_index)
+            except Exception as e:
+                logger.error(f"Exception happens when processing the {index} dataset.")
+                raise e
+
+    def convert_words_to_word_pieces(self, words, add_endoftext=False, add_prefix_space=True):
+        """
+
+        :param list[str],str words: 将str数据转换为index
+        :param bool add_endoftext: 是否在句首增加endoftext
+        :param bool add_prefix_space: 是否添加句首的空格
+        :return:
+        """
+        word_pieces = []
+        if isinstance(words, str):
+            words = self.tokenizer.tokenize(words, add_prefix_space=add_prefix_space)
+            word_piece_ids = self.tokenizer.convert_tokens_to_ids(words)
+            word_pieces.extend(word_piece_ids)
+        else:
+            for word in words:
+                tokens = self.tokenizer.tokenize(word, add_prefix_space=add_prefix_space)
+                word_piece_ids = self.tokenizer.convert_tokens_to_ids(tokens)
+                word_pieces.extend(word_piece_ids)
+        if add_endoftext:
+            if word_pieces[0] != self._endoftext_index:
+                word_pieces.insert(0, self._endoftext_index)
+        if len(word_pieces) > self._max_position_embeddings:
+            word_pieces[self._max_position_embeddings - 1] = word_pieces[-1]
+            word_pieces = word_pieces[:self._max_position_embeddings]
+        return word_pieces
+
+    def forward(self, word_pieces, token_type_ids=None):
+        """
+
+        :param word_pieces: torch.LongTensor, batch_size x max_len
+        :param token_type_ids: torch.LongTensor, batch_size x max_len
+        :return: num_layers x batch_size x max_len x hidden_size或者num_layers x batch_size x (max_len+2) x hidden_size
+        """
+        batch_size, max_len = word_pieces.size()
+
+        attn_masks = word_pieces.ne(self._wordpiece_pad_index)  # 可能会错误导致开头的词被mask掉
+        word_pieces = word_pieces.masked_fill(attn_masks.eq(0), self._endoftext_index)  # 替换pad的值
+        if self.lm_loss:
+            labels = word_pieces.clone()
+            labels = labels.masked_fill(labels.eq(self._wordpiece_pad_index), -100)
+            gpt_outputs = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks,
+                                        output_attentions=False, labels=labels)
+            gpt_outputs, self._lm_loss_value = gpt_outputs[-1], gpt_outputs[0]  # n_layers x batch_size x max_len x hidden_size
+        else:
+            gpt_outputs = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks,
+                                        output_attentions=False)
+            gpt_outputs = gpt_outputs[-1]
+        # output_layers = [self.layers]  # len(self.layers) x batch_size x max_word_piece_length x hidden_size
+        outputs = gpt_outputs[0].new_zeros((len(self.layers), batch_size, max_len, gpt_outputs[0].size(-1)))
+        for l_index, l in enumerate(self.layers):
+            outputs[l_index] = gpt_outputs[l]  # 删除开头
+        return outputs
+
+    def get_lm_loss(self):
+        """
+        当language_model为True时，通过该接口可以获取最近传入的一个batch的lanuage model loss
+
+        :return:
+        """
+        return self._lm_loss_value
+
diff --git a/fastNLP/embeddings/roberta_embedding.py b/fastNLP/embeddings/roberta_embedding.py
index 46b4ebb2..4e77a310 100644
--- a/fastNLP/embeddings/roberta_embedding.py
+++ b/fastNLP/embeddings/roberta_embedding.py
@@ -1,5 +1,10 @@
+r"""
+.. todo::
+    doc
+"""
 
-import os
+
+from functools import partial
 import collections
 import warnings
 from itertools import chain
@@ -10,7 +15,8 @@ import torch.nn as nn
 
 from .contextual_embedding import ContextualEmbedding
 from ..core import logger, Vocabulary
-from ..modules.encoder.roberta import RobertaModel, RobertaTokenizer
+from ..modules.encoder.roberta import RobertaModel
+from ..modules.tokenizer import RobertaTokenizer
 
 
 class RobertaEmbedding(ContextualEmbedding):
@@ -20,7 +26,8 @@ class RobertaEmbedding(ContextualEmbedding):
     时切分)，在分割之后长度可能会超过最大长度限制。
 
     RobertaEmbedding可以支持自动下载权重，当前支持的模型:
-        ..TODO
+        en: roberta-base
+        en-large: roberta-large
 
     Example::
 
@@ -43,8 +50,8 @@ class RobertaEmbedding(ContextualEmbedding):
         :param ~fastNLP.Vocabulary vocab: 词表
         :param str model_dir_or_name: 模型所在目录或者模型的名称。当传入模型所在目录时，目录中应该包含一个词表文件
             (以vocab.json作为后缀名), 权重文件(以.bin作为文件后缀名), 配置文件(以config.json作为后缀名)。
-        :param str layers: 输出embedding表示来自于哪些层，不同层的结果按照layers中的顺序在最后一维concat起来。以','隔开层数，层的序号是
-            从0开始，可以以负数去索引倒数几层。
+        :param str,list layers: 输出embedding表示来自于哪些层，不同层的结果按照layers中的顺序在最后一维concat起来。以','隔开层数，层的序号是
+            从0开始，可以以负数去索引倒数几层。layer=0为embedding层（包括wordpiece embedding, position embedding）
         :param str pool_method: 因为在bert中，每个word会被表示为多个word pieces, 当获取一个word的表示的时候，怎样从它的word pieces
             中计算得到它对应的表示。支持 ``last`` , ``first`` , ``avg`` , ``max``。
         :param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。
@@ -61,24 +68,30 @@ class RobertaEmbedding(ContextualEmbedding):
         :param kwargs:
             bool only_use_pretrain_bpe: 仅使用出现在pretrain词表中的bpe，如果该词没法tokenize则使用unk。如果embedding不需要更新
                 建议设置为True。
+            int min_freq: 仅在only_use_pretrain_bpe为False有效，大于等于该次数的词会被新加入BERT的BPE词表中
+            bool truncate_embed: 是否仅保留用到的bpe(这样会减内存占用和加快速度)
         """
         super().__init__(vocab, word_dropout=word_dropout, dropout=dropout)
 
         if word_dropout > 0:
             assert vocab.unknown is not None, "When word_drop > 0, Vocabulary must contain the unknown token."
 
-        self._word_sep_index = None
+        self._word_sep_index = -100
         if '</s>' in vocab:
             self._word_sep_index = vocab['</s>']
 
+        self._word_cls_index = -100
+        if '<s>' in vocab:
+            self._word_cls_index = vocab['<s>']
+
         only_use_pretrain_bpe = kwargs.get('only_use_pretrain_bpe', False)
+        truncate_embed = kwargs.get('truncate_embed', True)
+        min_freq = kwargs.get('min_freq', 2)
 
-        self.model = _WordRobertaModel(model_dir_or_name=model_dir_or_name, vocab=vocab, layers=layers,
+        self.model = _RobertaWordModel(model_dir_or_name=model_dir_or_name, vocab=vocab, layers=layers,
                                        pool_method=pool_method, include_cls_sep=include_cls_sep,
-                                       pooled_cls=pooled_cls, auto_truncate=auto_truncate, min_freq=2,
-                                       only_use_pretrain_bpe=only_use_pretrain_bpe)
-        self._sep_index = self.model._sep_index
-        self._cls_index = self.model._cls_index
+                                       pooled_cls=pooled_cls, auto_truncate=auto_truncate, min_freq=min_freq,
+                                       only_use_pretrain_bpe=only_use_pretrain_bpe, truncate_embed=truncate_embed)
         self.requires_grad = requires_grad
         self._embed_size = len(self.model.layers) * self.model.encoder.hidden_size
 
@@ -111,37 +124,46 @@ class RobertaEmbedding(ContextualEmbedding):
         """
         if self.word_dropout > 0 and self.training:
             with torch.no_grad():
-                not_sep_mask = words.ne(self._sep_index)
-                not_cls_mask = words.ne(self._cls_index)
-                if self._word_sep_index:
-                    not_sep_mask = not_sep_mask.__and__(words.ne(self._word_sep_index))
-                replaceable_mask = not_sep_mask.__and__(not_cls_mask)
                 mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
                 mask = torch.bernoulli(mask).eq(1)  # dropout_word越大，越多位置为1
                 pad_mask = words.ne(self._word_pad_index)
-                mask = pad_mask.__and__(mask).__and__(replaceable_mask)  # pad的位置不为unk
+                mask = pad_mask.__and__(mask)  # pad的位置不为unk
+                if self._word_sep_index!=-100:
+                    not_sep_mask = words.ne(self._word_sep_index)
+                    mask = mask.__and__(not_sep_mask)
+                if self._word_cls_index!=-100:
+                    not_cls_mask = words.ne(self._word_cls_index)
+                    mask = mask.__and__(not_cls_mask)
                 words = words.masked_fill(mask, self._word_unk_index)
         return words
 
 
-class _WordRobertaModel(nn.Module):
+class _RobertaWordModel(nn.Module):
     def __init__(self, model_dir_or_name: str, vocab: Vocabulary, layers: str = '-1', pool_method: str = 'first',
                  include_cls_sep: bool = False, pooled_cls: bool = False, auto_truncate: bool = False, min_freq=2,
-                 only_use_pretrain_bpe=False):
+                 only_use_pretrain_bpe=False, truncate_embed=True):
         super().__init__()
 
         self.tokenzier = RobertaTokenizer.from_pretrained(model_dir_or_name)
         self.encoder = RobertaModel.from_pretrained(model_dir_or_name)
-        self._max_position_embeddings = self.encoder.config.max_position_embeddings
+        # 由于RobertaEmbedding中设置了padding_idx为1, 且使用了非常神奇的position计算方式，所以-2
+        self._max_position_embeddings = self.encoder.config.max_position_embeddings - 2
         #  检查encoder_layer_number是否合理
         encoder_layer_number = len(self.encoder.encoder.layer)
-        self.layers = list(map(int, layers.split(',')))
+
+        if isinstance(layers, list):
+            self.layers = [int(l) for l in layers]
+        elif isinstance(layers, str):
+            self.layers = list(map(int, layers.split(',')))
+        else:
+            raise TypeError("`layers` only supports str or list[int]")
+
         for layer in self.layers:
             if layer < 0:
                 assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
                                                        f"a roberta model with {encoder_layer_number} layers."
             else:
-                assert layer < encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                assert layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
                                                      f"a roberta model with {encoder_layer_number} layers."
 
         assert pool_method in ('avg', 'max', 'first', 'last')
@@ -155,7 +177,8 @@ class _WordRobertaModel(nn.Module):
         # 第一步统计出需要的word_piece, 然后创建新的embed和word_piece_vocab, 然后填入值
         word_piece_dict = {'<s>': 1, '</s>': 1}  # 用到的word_piece以及新增的
         found_count = 0
-        self._has_sep_in_vocab = '</s>' in vocab  # 用来判断传入的数据是否需要生成token_ids
+        new_add_to_bpe_vocab = 0
+        unsegment_count = 0
         if "<s>" in vocab:
             warnings.warn("<s> detected in your vocabulary. RobertaEmbedding will add <s> and </s> to the begin "
                           "and end of the input automatically, make sure you don't add <s> and </s> at the begin"
@@ -167,33 +190,53 @@ class _WordRobertaModel(nn.Module):
                 word = '<unk>'
             # _words = self.tokenzier.basic_tokenizer._tokenize_chinese_chars(word).split()  # 这里暂时不考虑中文内容
             word_pieces = []
-            word_pieces.extend(self.tokenzier.tokenize(word))
+            # 如果这个word不是在句子开头
+            word_pieces.extend(self.tokenzier.tokenize(word, add_prefix_space=True))
             if len(word_pieces) == 1:
                 if not vocab._is_word_no_create_entry(word):  # 如果是train中的值, 但是却没有找到
                     if index != vocab.unknown_idx and word_pieces[0] == '<unk>':  # 说明这个词不在原始的word里面
                         if vocab.word_count[word] >= min_freq and not vocab._is_word_no_create_entry(
                                 word) and not only_use_pretrain_bpe:  # 出现次数大于这个次数才新增
                             word_piece_dict[word] = 1  # 新增一个值
+                            new_add_to_bpe_vocab += 1
+                        unsegment_count += 1
                         continue
+            found_count += 1
             for word_piece in word_pieces:
                 word_piece_dict[word_piece] = 1
-            found_count += 1
+            # 如果这个word是在句子开头
+
         original_embed = self.encoder.embeddings.word_embeddings.weight.data
         # 特殊词汇要特殊处理
+        if not truncate_embed:  # 如果不删除的话需要将已有的加上
+            word_piece_dict.update(self.tokenzier.encoder)
+
         embed = nn.Embedding(len(word_piece_dict), original_embed.size(1))  # 新的embed
         new_word_piece_vocab = collections.OrderedDict()
-        for index, token in enumerate(['<pad>', '<unk>']):
-            word_piece_dict.pop(token, None)
-            embed.weight.data[index] = original_embed[self.tokenzier.encoder[token]]
-            new_word_piece_vocab[token] = index
+
+        for index, token in enumerate(['<s>', '<pad>', '</s>', '<unk>']):
+            index = word_piece_dict.pop(token, None)
+            if index is not None:
+                new_word_piece_vocab[token] = len(new_word_piece_vocab)
+                embed.weight.data[new_word_piece_vocab[token]] = original_embed[self.tokenzier.encoder[token]]
         for token in word_piece_dict.keys():
+            if token not in new_word_piece_vocab:
+                new_word_piece_vocab[token] = len(new_word_piece_vocab)
+            index = new_word_piece_vocab[token]
             if token in self.tokenzier.encoder:
-                embed.weight.data[len(new_word_piece_vocab)] = original_embed[self.tokenzier.encoder[token]]
+                embed.weight.data[index] = original_embed[self.tokenzier.encoder[token]]
             else:
-                embed.weight.data[len(new_word_piece_vocab)] = original_embed[self.tokenzier.encoder['<unk>']]
-            new_word_piece_vocab[token] = len(new_word_piece_vocab)
-        self._reinit_on_new_vocab(new_word_piece_vocab, model_dir_or_name)
+                embed.weight.data[index] = original_embed[self.tokenzier.encoder['<unk>']]
+
+        self.tokenzier._reinit_on_new_vocab(new_word_piece_vocab)
         self.encoder.embeddings.word_embeddings = embed
+        self.encoder.config.vocab_size = len(new_word_piece_vocab)
+
+        if unsegment_count>0:
+            if only_use_pretrain_bpe or new_add_to_bpe_vocab==0:
+                logger.info(f"{unsegment_count} words are unsegmented.")
+            else:
+                logger.info(f"{unsegment_count} words are unsegmented. Among them, {new_add_to_bpe_vocab} added to the BPE vocab.")
 
         word_to_wordpieces = []
         word_pieces_lengths = []
@@ -210,18 +253,10 @@ class _WordRobertaModel(nn.Module):
         self._sep_index = self.tokenzier.encoder['</s>']
         self._word_pad_index = vocab.padding_idx
         self._wordpiece_pad_index = self.tokenzier.encoder['<pad>']  # 需要用于生成word_piece
-        logger.info("Found(Or segment into word pieces) {} words out of {}.".format(found_count, len(vocab)))
         self.word_to_wordpieces = np.array(word_to_wordpieces)
         self.register_buffer('word_pieces_lengths', torch.LongTensor(word_pieces_lengths))
         logger.debug("Successfully generate word pieces.")
 
-    def _reinit_on_new_vocab(self, vocab, model_dir_or_name):
-        import json
-        with open('./.tmp-new-vocab-file.json', 'w') as f:
-            json.dump(vocab, f)
-        self.tokenzier = RobertaTokenizer.from_pretrained(model_dir_or_name, vocab_file='./.tmp-new-vocab-file.json')
-        os.remove('./.tmp-new-vocab-file.json')
-
     def forward(self, words):
         r"""
 
@@ -232,15 +267,13 @@ class _WordRobertaModel(nn.Module):
             batch_size, max_word_len = words.size()
             word_mask = words.ne(self._word_pad_index)  # 为1的地方有word
             seq_len = word_mask.sum(dim=-1)
-            batch_word_pieces_length = self.word_pieces_lengths[words].masked_fill(word_mask.eq(False),
-                                                                                   0)  # batch_size x max_len
+            batch_word_pieces_length = self.word_pieces_lengths[words].masked_fill(word_mask.eq(False), 0)  # batch_size x max_len
             word_pieces_lengths = batch_word_pieces_length.sum(dim=-1)  # batch_size
-            word_piece_length = batch_word_pieces_length.sum(dim=-1).max().item()  # 表示word piece的长度(包括padding)
-            if word_piece_length + 2 > self._max_position_embeddings:
+            max_word_piece_length = batch_word_pieces_length.sum(dim=-1).max().item()  # 表示word piece的长度(包括padding)
+            if max_word_piece_length + 2 > self._max_position_embeddings:
                 if self.auto_truncate:
                     word_pieces_lengths = word_pieces_lengths.masked_fill(
-                        word_pieces_lengths + 2 > self._max_position_embeddings,
-                        self._max_position_embeddings - 2)
+                        word_pieces_lengths + 2 > self._max_position_embeddings, self._max_position_embeddings - 2)
                 else:
                     raise RuntimeError(
                         "After split words into word pieces, the lengths of word pieces are longer than the "
@@ -248,7 +281,7 @@ class _WordRobertaModel(nn.Module):
                         f"`auto_truncate=True` for BertEmbedding to automatically truncate overlong input.")
 
             # +2是由于需要加入<s>与</s>
-            word_pieces = words.new_full((batch_size, min(word_piece_length + 2, self._max_position_embeddings)),
+            word_pieces = words.new_full((batch_size, min(max_word_piece_length + 2, self._max_position_embeddings)),
                                          fill_value=self._wordpiece_pad_index)
             attn_masks = torch.zeros_like(word_pieces)
             # 1. 获取words的word_pieces的id，以及对应的span范围
@@ -259,17 +292,9 @@ class _WordRobertaModel(nn.Module):
                     word_pieces_i = word_pieces_i[:self._max_position_embeddings - 2]
                 word_pieces[i, 1:word_pieces_lengths[i] + 1] = torch.LongTensor(word_pieces_i)
                 attn_masks[i, :word_pieces_lengths[i] + 2].fill_(1)
-            # 添加[cls]和[sep]
             word_pieces[:, 0].fill_(self._cls_index)
             batch_indexes = torch.arange(batch_size).to(words)
             word_pieces[batch_indexes, word_pieces_lengths + 1] = self._sep_index
-            # if self._has_sep_in_vocab:  # 但</s>在vocab中出现应该才会需要token_ids
-            #     sep_mask = word_pieces.eq(self._sep_index).long()  # batch_size x max_len
-            #     sep_mask_cumsum = sep_mask.flip(dims=[-1]).cumsum(dim=-1).flip(dims=[-1])
-            #     token_type_ids = sep_mask_cumsum.fmod(2)
-            #     if token_type_ids[0, 0].item():  # 如果开头是奇数，则需要flip一下结果，因为需要保证开头为0
-            #         token_type_ids = token_type_ids.eq(0).long()
-            # else:  # RoBERTa不需要额外设置token_type_ids
             token_type_ids = torch.zeros_like(word_pieces)
         # 2. 获取hidden的结果，根据word_pieces进行对应的pool计算
         # all_outputs: [batch_size x max_len x hidden_size, batch_size x max_len x hidden_size, ...]
@@ -292,19 +317,19 @@ class _WordRobertaModel(nn.Module):
 
         if self.pool_method == 'first':
             batch_word_pieces_cum_length = batch_word_pieces_cum_length[:, :seq_len.max()]
-            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(word_piece_length), 0)
+            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(max_word_piece_length), 0)
             _batch_indexes = batch_indexes[:, None].expand((batch_size, batch_word_pieces_cum_length.size(1)))
         elif self.pool_method == 'last':
             batch_word_pieces_cum_length = batch_word_pieces_cum_length[:, 1:seq_len.max() + 1] - 1
-            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(word_piece_length), 0)
+            batch_word_pieces_cum_length.masked_fill_(batch_word_pieces_cum_length.ge(max_word_piece_length), 0)
             _batch_indexes = batch_indexes[:, None].expand((batch_size, batch_word_pieces_cum_length.size(1)))
 
         for l_index, l in enumerate(self.layers):
             output_layer = bert_outputs[l]
             real_word_piece_length = output_layer.size(1) - 2
-            if word_piece_length > real_word_piece_length:  # 如果实际上是截取出来的
+            if max_word_piece_length > real_word_piece_length:  # 如果实际上是截取出来的
                 paddings = output_layer.new_zeros(batch_size,
-                                                  word_piece_length - real_word_piece_length,
+                                                  max_word_piece_length - real_word_piece_length,
                                                   output_layer.size(2))
                 output_layer = torch.cat((output_layer, paddings), dim=1).contiguous()
             # 从word_piece collapse到word的表示
@@ -333,7 +358,176 @@ class _WordRobertaModel(nn.Module):
                     outputs[l_index, :, 0] = pooled_cls
                 else:
                     outputs[l_index, :, 0] = output_layer[:, 0]
-                outputs[l_index, batch_indexes, seq_len + s_shift] = output_layer[batch_indexes, seq_len + s_shift]
+                outputs[l_index, batch_indexes, seq_len + s_shift] = output_layer[batch_indexes, word_pieces_lengths + s_shift]
 
         # 3. 最终的embedding结果
         return outputs
+
+
+class RobertaWordPieceEncoder(nn.Module):
+    r"""
+    读取bert模型，读取之后调用index_dataset方法在dataset中生成word_pieces这一列。
+
+    BertWordPieceEncoder可以支持自动下载权重，当前支持的模型:
+        en: roberta-base
+        en-large: roberta-large
+
+    """
+
+    def __init__(self, model_dir_or_name: str = 'en-base-uncased', layers: str = '-1', pooled_cls: bool = False,
+                 word_dropout=0, dropout=0, requires_grad: bool = True):
+        r"""
+
+        :param str model_dir_or_name: 模型所在目录或者模型的名称。默认值为 ``en-base-uncased``
+        :param str layers: 最终结果中的表示。以','隔开层数，可以以负数去索引倒数几层。layer=0为embedding层（包括wordpiece embedding,
+                position embedding）
+        :param bool pooled_cls: 返回的句子开头的<s>是否使用预训练中的BertPool映射一下。如果下游任务取<s>做预测，一般该值为True。
+        :param float word_dropout: 以多大的概率将一个词替换为unk。这样既可以训练unk也是一定的regularize。
+        :param float dropout: 以多大的概率对embedding的表示进行Dropout。0.1即随机将10%的值置为0。
+        :param bool requires_grad: 是否需要gradient。
+        """
+        super().__init__()
+
+        self.model = _WordPieceRobertaModel(model_dir_or_name=model_dir_or_name, layers=layers, pooled_cls=pooled_cls)
+        self._sep_index = self.model._sep_index
+        self._cls_index = self.model._cls_index
+        self._wordpiece_pad_index = self.model._wordpiece_pad_index
+        self._wordpiece_unk_index = self.model._wordpiece_unknown_index
+        self._embed_size = len(self.model.layers) * self.model.encoder.hidden_size
+        self.requires_grad = requires_grad
+        self.word_dropout = word_dropout
+        self.dropout_layer = nn.Dropout(dropout)
+
+    @property
+    def embed_size(self):
+        return self._embed_size
+
+    @property
+    def embedding_dim(self):
+        return self._embed_size
+
+    @property
+    def num_embedding(self):
+        return self.model.encoder.config.vocab_size
+
+    def index_datasets(self, *datasets, field_name, add_cls_sep=True, add_prefix_space=True):
+        r"""
+        使用bert的tokenizer新生成word_pieces列加入到datasets中，并将他们设置为input,且将word_pieces这一列的pad value设置为了
+        bert的pad value。
+
+        :param ~fastNLP.DataSet datasets: DataSet对象
+        :param str field_name: 基于哪一列的内容生成word_pieces列。这一列中每个数据应该是List[str]的形式。
+        :param bool add_cls_sep: 如果首尾不是<s>与</s>会在首尾额外加入<s>与</s>。
+        :param bool add_prefix_spance: 是否在句首添加额外的空格，RoBERTa预训练时该值为True
+        :return:
+        """
+        self.model.index_datasets(*datasets, field_name=field_name, add_cls_sep=add_cls_sep, add_prefix_space=add_prefix_space)
+
+    def forward(self, word_pieces, token_type_ids=None):
+        r"""
+        计算words的bert embedding表示。传入的words中应该自行包含[CLS]与[SEP]的tag。
+
+        :param words: batch_size x max_len
+        :param token_type_ids: batch_size x max_len, 用于区分前一句和后一句话. 如果不传入，则自动生成(大部分情况，都不需要输入),
+            第一个[SEP]及之前为0, 第二个[SEP]及到第一个[SEP]之间为1; 第三个[SEP]及到第二个[SEP]之间为0，依次往后推。
+        :return: torch.FloatTensor. batch_size x max_len x (768*len(self.layers))
+        """
+        word_pieces = self.drop_word(word_pieces)
+        outputs = self.model(word_pieces)
+        outputs = torch.cat([*outputs], dim=-1)
+
+        return self.dropout_layer(outputs)
+
+    def drop_word(self, words):
+        r"""
+        按照设定随机将words设置为unknown_index。
+
+        :param torch.LongTensor words: batch_size x max_len
+        :return:
+        """
+        if self.word_dropout > 0 and self.training:
+            with torch.no_grad():
+                not_sep_mask = words.ne(self._sep_index)
+                not_cls_mask = words.ne(self._cls_index)
+                replaceable_mask = not_sep_mask.__and__(not_cls_mask)
+                mask = torch.full_like(words, fill_value=self.word_dropout, dtype=torch.float, device=words.device)
+                mask = torch.bernoulli(mask).eq(1)  # dropout_word越大，越多位置为1
+                pad_mask = words.ne(self._wordpiece_pad_index)
+                mask = pad_mask.__and__(mask).__and__(replaceable_mask)  # pad的位置不为unk
+                words = words.masked_fill(mask, self._wordpiece_unk_index)
+        return words
+
+
+class _WordPieceRobertaModel(nn.Module):
+    def __init__(self, model_dir_or_name: str, layers: str = '-1', pooled_cls: bool=False):
+        super().__init__()
+
+        self.tokenzier = RobertaTokenizer.from_pretrained(model_dir_or_name)
+        self.encoder = RobertaModel.from_pretrained(model_dir_or_name)
+        #  检查encoder_layer_number是否合理
+        encoder_layer_number = len(self.encoder.encoder.layer)
+
+        if isinstance(layers, list):
+            self.layers = [int(l) for l in layers]
+        elif isinstance(layers, str):
+            self.layers = list(map(int, layers.split(',')))
+        else:
+            raise TypeError("`layers` only supports str or list[int]")
+
+        for layer in self.layers:
+            if layer < 0:
+                assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                    f"a RoBERTa model with {encoder_layer_number} layers."
+            else:
+                assert layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
+                    f"a RoBERTa model with {encoder_layer_number} layers."
+
+        self._cls_index = self.tokenzier.encoder['<s>']
+        self._sep_index = self.tokenzier.encoder['</s>']
+        self._wordpiece_pad_index = self.tokenzier.encoder['<pad>']  # 需要用于生成word_piece
+        self._wordpiece_unknown_index = self.tokenzier.encoder['<unk>']
+        self.pooled_cls = pooled_cls
+
+    def index_datasets(self, *datasets, field_name, add_cls_sep=True, add_prefix_space=True):
+        r"""
+        使用bert的tokenizer新生成word_pieces列加入到datasets中，并将他们设置为input。如果首尾不是
+            [CLS]与[SEP]会在首尾额外加入[CLS]与[SEP], 且将word_pieces这一列的pad value设置为了bert的pad value。
+
+        :param datasets: DataSet对象
+        :param field_name: 基于哪一列index
+        :param bool add_cls_sep: 是否在句首句尾添加cls和sep的index
+        :param bool add_prefix_space: 是否在句子开头添加空格，预训练时RoBERTa该值为True
+        :return:
+        """
+
+        encode_func = partial(self.tokenzier.encode, add_special_tokens=add_cls_sep, add_prefix_space=add_prefix_space)
+
+        for index, dataset in enumerate(datasets):
+            try:
+                dataset.apply_field(encode_func, field_name=field_name, new_field_name='word_pieces',
+                                    is_input=True)
+                dataset.set_pad_val('word_pieces', self._wordpiece_pad_index)
+            except Exception as e:
+                logger.error(f"Exception happens when processing the {index} dataset.")
+                raise e
+
+    def forward(self, word_pieces):
+        r"""
+
+        :param word_pieces: torch.LongTensor, batch_size x max_len
+        :return: num_layers x batch_size x max_len x hidden_size或者num_layers x batch_size x (max_len+2) x hidden_size
+        """
+        batch_size, max_len = word_pieces.size()
+
+        attn_masks = word_pieces.ne(self._wordpiece_pad_index)
+        roberta_outputs, pooled_cls = self.encoder(word_pieces, token_type_ids=torch.zeros_like(word_pieces),
+                                                attention_mask=attn_masks,
+                                                output_all_encoded_layers=True)
+        # output_layers = [self.layers]  # len(self.layers) x batch_size x max_word_piece_length x hidden_size
+        outputs = roberta_outputs[0].new_zeros((len(self.layers), batch_size, max_len, roberta_outputs[0].size(-1)))
+        for l_index, l in enumerate(self.layers):
+            roberta_output = roberta_outputs[l]
+            if l in (len(roberta_output)-1, -1) and self.pooled_cls:
+                roberta_output[:, 0] = pooled_cls
+            outputs[l_index] = roberta_output
+        return outputs
\ No newline at end of file
diff --git a/fastNLP/io/file_utils.py b/fastNLP/io/file_utils.py
index fe697699..96a9c1ed 100644
--- a/fastNLP/io/file_utils.py
+++ b/fastNLP/io/file_utils.py
@@ -48,6 +48,18 @@ PRETRAINED_BERT_MODEL_DIR = {
     'cn-wwm-ext': "bert-chinese-wwm-ext.zip"
 }
 
+PRETRAINED_GPT2_MODEL_DIR = {
+    'en': 'gpt2.zip',
+    'en-medium': 'gpt2-medium.zip',
+    'en-large': 'gpt2-large.zip',
+    'en-xl': 'gpt2-xl.zip'
+}
+
+PRETRAINED_ROBERTA_MODEL_DIR = {
+    'en': 'roberta-base.zip',
+    'en-large': 'roberta-large.zip'
+}
+
 PRETRAINED_ELMO_MODEL_DIR = {
     'en': 'elmo_en_Medium.zip',
     'en-small': "elmo_en_Small.zip",
@@ -127,14 +139,18 @@ DATASET_DIR = {
 
 PRETRAIN_MAP = {'elmo': PRETRAINED_ELMO_MODEL_DIR,
                 "bert": PRETRAINED_BERT_MODEL_DIR,
-                "static": PRETRAIN_STATIC_FILES}
+                "static": PRETRAIN_STATIC_FILES,
+                'gpt2': PRETRAINED_GPT2_MODEL_DIR,
+                'roberta': PRETRAINED_ROBERTA_MODEL_DIR}
 
 #  用于扩展fastNLP的下载
 FASTNLP_EXTEND_DATASET_URL = 'fastnlp_dataset_url.txt'
 FASTNLP_EXTEND_EMBEDDING_URL = {'elmo': 'fastnlp_elmo_url.txt',
-                         'bert':'fastnlp_bert_url.txt',
-                         'static': 'fastnlp_static_url.txt'
-}
+                                'bert':'fastnlp_bert_url.txt',
+                                'static': 'fastnlp_static_url.txt',
+                                'gpt2': 'fastnlp_gpt2_url.txt',
+                                'roberta': 'fastnlp_roberta_url.txt'
+                                }
 
 
 def cached_path(url_or_filename: str, cache_dir: str = None, name=None) -> Path:
@@ -273,7 +289,7 @@ def _get_embedding_url(embed_type, name):
             return url
         raise KeyError("There is no {}. Only supports {}.".format(name, list(embed_map.keys())))
     else:
-        raise KeyError(f"There is no {embed_type}. Only supports bert, elmo, static")
+        raise KeyError(f"There is no {embed_type}. Only supports bert, elmo, static, gpt2, roberta")
 
 def _read_extend_url_file(filename, name)->str:
     r"""
@@ -281,7 +297,7 @@ def _read_extend_url_file(filename, name)->str:
 
     :param str filename: 在默认的路径下寻找file这个文件
     :param str name: 需要寻找的资源的名称
-    :return: str or None
+    :return: str,None
     """
     cache_dir = get_cache_path()
     filepath = os.path.join(cache_dir, filename)
@@ -488,3 +504,42 @@ def match_file(dir_name: str, cache_dir: Path) -> str:
         return matched_filenames[-1]
     else:
         raise RuntimeError(f"Duplicate matched files:{matched_filenames}, this should be caused by a bug.")
+
+
+def _get_bert_dir(model_dir_or_name: str = 'en-base-uncased'):
+    if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR:
+        model_url = _get_embedding_url('bert', model_dir_or_name.lower())
+        model_dir = cached_path(model_url, name='embedding')
+        # 检查是否存在
+    elif os.path.isdir(os.path.abspath(os.path.expanduser(model_dir_or_name))):
+        model_dir = os.path.abspath(os.path.expanduser(model_dir_or_name))
+    else:
+        logger.error(f"Cannot recognize BERT dir or name ``{model_dir_or_name}``.")
+        raise ValueError(f"Cannot recognize BERT dir or name ``{model_dir_or_name}``.")
+    return str(model_dir)
+
+
+def _get_gpt2_dir(model_dir_or_name: str = 'en'):
+    if model_dir_or_name.lower() in PRETRAINED_GPT2_MODEL_DIR:
+        model_url = _get_embedding_url('gpt2', model_dir_or_name.lower())
+        model_dir = cached_path(model_url, name='embedding')
+        # 检查是否存在
+    elif os.path.isdir(os.path.abspath(os.path.expanduser(model_dir_or_name))):
+        model_dir = os.path.abspath(os.path.expanduser(model_dir_or_name))
+    else:
+        logger.error(f"Cannot recognize GPT2 dir or name ``{model_dir_or_name}``.")
+        raise ValueError(f"Cannot recognize GPT2 dir or name ``{model_dir_or_name}``.")
+    return str(model_dir)
+
+
+def _get_roberta_dir(model_dir_or_name: str = 'en'):
+    if model_dir_or_name.lower() in PRETRAINED_ROBERTA_MODEL_DIR:
+        model_url = _get_embedding_url('roberta', model_dir_or_name.lower())
+        model_dir = cached_path(model_url, name='embedding')
+        # 检查是否存在
+    elif os.path.isdir(os.path.abspath(os.path.expanduser(model_dir_or_name))):
+        model_dir = os.path.abspath(os.path.expanduser(model_dir_or_name))
+    else:
+        logger.error(f"Cannot recognize RoBERTa dir or name ``{model_dir_or_name}``.")
+        raise ValueError(f"Cannot recognize RoBERTa dir or name ``{model_dir_or_name}``.")
+    return str(model_dir)
diff --git a/fastNLP/modules/__init__.py b/fastNLP/modules/__init__.py
index 53651b59..d8eab276 100644
--- a/fastNLP/modules/__init__.py
+++ b/fastNLP/modules/__init__.py
@@ -49,7 +49,15 @@ __all__ = [
 
     "TimestepDropout",
 
-    'summary'
+    'summary',
+
+    "BertTokenizer",
+    "BertModel",
+
+    "RobertaTokenizer",
+    "RobertaModel",
+
+    "GPT2Tokenizer"
 ]
 
 import sys
@@ -61,5 +69,6 @@ from .dropout import TimestepDropout
 from .encoder import *
 from .utils import summary
 from ..doc_utils import doc_process
+from .tokenizer import *
 
 doc_process(sys.modules[__name__])
diff --git a/fastNLP/modules/decoder/seq2seq_decoder.py b/fastNLP/modules/decoder/seq2seq_decoder.py
new file mode 100755
index 00000000..3933867a
--- /dev/null
+++ b/fastNLP/modules/decoder/seq2seq_decoder.py
@@ -0,0 +1,109 @@
+# coding=utf-8
+__all__ = [
+    "TransformerPast",
+    "Past",
+    "Decoder"
+]
+import torch
+from torch import nn
+import abc
+import torch.nn.functional as F
+from ...embeddings import StaticEmbedding
+import numpy as np
+from typing import Union, Tuple
+from ...embeddings.utils import get_embeddings
+from torch.nn import LayerNorm
+import math
+
+
+class Past:
+    def __init__(self):
+        pass
+
+    @abc.abstractmethod
+    def num_samples(self):
+        pass
+
+    @abc.abstractmethod
+    def reorder_past(self, indices: torch.LongTensor):
+        """
+        根据indices中的index，将past的中状态置为正确的顺序。inplace改变
+
+        :param torch.LongTensor indices:
+        :param Past past:
+        :return:
+        """
+        raise NotImplemented
+
+
+class TransformerPast(Past):
+    def __init__(self, encoder_outputs: torch.Tensor = None, encoder_mask: torch.Tensor = None,
+                 num_decoder_layer: int = 6):
+        """
+
+        :param encoder_outputs: (batch,src_seq_len,dim)
+        :param encoder_mask: (batch,src_seq_len)
+        :param encoder_key: list of (batch, src_seq_len, dim)
+        :param encoder_value:
+        :param decoder_prev_key:
+        :param decoder_prev_value:
+        """
+        super().__init__()
+        self.encoder_outputs = encoder_outputs
+        self.encoder_mask = encoder_mask
+        self.encoder_key = [None] * num_decoder_layer
+        self.encoder_value = [None] * num_decoder_layer
+        self.decoder_prev_key = [None] * num_decoder_layer
+        self.decoder_prev_value = [None] * num_decoder_layer
+
+    def num_samples(self):
+        if self.encoder_outputs is not None:
+            return self.encoder_outputs.size(0)
+        return None
+
+    def _reorder_state(self, state, indices):
+        if type(state) == torch.Tensor:
+            state = state.index_select(index=indices, dim=0)
+        elif type(state) == list:
+            for i in range(len(state)):
+                assert state[i] is not None
+                state[i] = state[i].index_select(index=indices, dim=0)
+        else:
+            raise ValueError('State does not support other format')
+
+        return state
+
+    def reorder_past(self, indices: torch.LongTensor):
+        self.encoder_outputs = self._reorder_state(self.encoder_outputs, indices)
+        self.encoder_mask = self._reorder_state(self.encoder_mask, indices)
+        self.encoder_key = self._reorder_state(self.encoder_key, indices)
+        self.encoder_value = self._reorder_state(self.encoder_value, indices)
+        self.decoder_prev_key = self._reorder_state(self.decoder_prev_key, indices)
+        self.decoder_prev_value = self._reorder_state(self.decoder_prev_value, indices)
+        return self
+
+
+class Decoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @abc.abstractmethod
+    def decode(self, *args, **kwargs) -> Tuple[torch.Tensor, Past]:
+        """
+        当模型进行解码时，使用这个函数。返回一个batch_size x vocab_size的结果与更新的Past状态。需要考虑一种特殊情况，即tokens长度不是1，即给定了
+            解码句子开头的情况，这种情况需要查看Past中是否正确计算了decode的状态。
+
+        :return: tensor:batch_size x vocab_size, past: Past
+        """
+        raise NotImplemented
+
+    @abc.abstractmethod
+    def reorder_past(self, indices: torch.LongTensor, past: Past):
+        """
+        根据indices中的index，将past的中状态置为正确的顺序。inplace改变
+
+        :param torch.LongTensor indices:
+        :param Past past:
+        :return:
+        """
+        raise NotImplemented
\ No newline at end of file
diff --git a/fastNLP/modules/encoder/__init__.py b/fastNLP/modules/encoder/__init__.py
index 3c9af22d..fccb2c00 100644
--- a/fastNLP/modules/encoder/__init__.py
+++ b/fastNLP/modules/encoder/__init__.py
@@ -30,6 +30,10 @@ __all__ = [
     "MultiHeadAttention",
     "BiAttention",
     "SelfAttention",
+
+    "BertModel",
+
+    "RobertaModel",
 ]
 
 from .attention import MultiHeadAttention, BiAttention, SelfAttention
diff --git a/fastNLP/modules/encoder/bert.py b/fastNLP/modules/encoder/bert.py
index 32edafbe..bfa1c6a1 100644
--- a/fastNLP/modules/encoder/bert.py
+++ b/fastNLP/modules/encoder/bert.py
@@ -4,26 +4,23 @@ r"""undocumented
 """
 
 __all__ = [
-    "BertModel"
+    "BertModel",
 ]
 
-import collections
 import copy
 import json
 import math
-import os
-import unicodedata
 
 import torch
 from torch import nn
 import numpy as np
 
 from ..utils import _get_file_name_base_on_postfix
-from ...io.file_utils import _get_embedding_url, cached_path, PRETRAINED_BERT_MODEL_DIR
+from ...io.file_utils import _get_bert_dir
 from ...core import logger
 
+
 CONFIG_FILE = 'bert_config.json'
-VOCAB_NAME = 'vocab.txt'
 
 BERT_KEY_RENAME_MAP_1 = {
     'gamma': 'weight',
@@ -152,33 +149,22 @@ def swish(x):
 ACT2FN = {"gelu": gelu, "relu": torch.nn.functional.relu, "swish": swish}
 
 
-def _get_bert_dir(model_dir_or_name: str = 'en-base-uncased'):
-    if model_dir_or_name.lower() in PRETRAINED_BERT_MODEL_DIR:
-        model_url = _get_embedding_url('bert', model_dir_or_name.lower())
-        model_dir = cached_path(model_url, name='embedding')
-        # 检查是否存在
-    elif os.path.isdir(os.path.abspath(os.path.expanduser(model_dir_or_name))):
-        model_dir = os.path.abspath(os.path.expanduser(model_dir_or_name))
-    else:
-        logger.error(f"Cannot recognize BERT dir or name ``{model_dir_or_name}``.")
-        raise ValueError(f"Cannot recognize BERT dir or name ``{model_dir_or_name}``.")
-    return str(model_dir)
-
-
-class BertLayerNorm(nn.Module):
-    def __init__(self, hidden_size, eps=1e-12):
-        r"""Construct a layernorm module in the TF style (epsilon inside the square root).
-        """
-        super(BertLayerNorm, self).__init__()
-        self.weight = nn.Parameter(torch.ones(hidden_size))
-        self.bias = nn.Parameter(torch.zeros(hidden_size))
-        self.variance_epsilon = eps
+# class BertLayerNorm(nn.Module):
+#     def __init__(self, hidden_size, eps=1e-12):
+#         r"""Construct a layernorm module in the TF style (epsilon inside the square root).
+#         """
+#         super(BertLayerNorm, self).__init__()
+#         self.weight = nn.Parameter(torch.ones(hidden_size))
+#         self.bias = nn.Parameter(torch.zeros(hidden_size))
+#         self.variance_epsilon = eps
+#
+#     def forward(self, x):
+#         u = x.mean(-1, keepdim=True)
+#         s = (x - u).pow(2).mean(-1, keepdim=True)
+#         x = (x - u) / torch.sqrt(s + self.variance_epsilon)
+#         return self.weight * x + self.bias
 
-    def forward(self, x):
-        u = x.mean(-1, keepdim=True)
-        s = (x - u).pow(2).mean(-1, keepdim=True)
-        x = (x - u) / torch.sqrt(s + self.variance_epsilon)
-        return self.weight * x + self.bias
+BertLayerNorm = torch.nn.LayerNorm
 
 
 class DistilBertEmbeddings(nn.Module):
@@ -518,6 +504,7 @@ class BertModel(nn.Module):
             pooled_output = sequence_output[:, 0]
         if not output_all_encoded_layers:
             encoded_layers = encoded_layers[-1]
+        encoded_layers.insert(0, embedding_output)
         return encoded_layers, pooled_output
 
     @classmethod
@@ -615,435 +602,3 @@ class BertModel(nn.Module):
         logger.info(f"Load pre-trained {model_type} parameters from file {weights_path}.")
         return model
 
-
-def whitespace_tokenize(text):
-    r"""Runs basic whitespace cleaning and splitting on a piece of text."""
-    text = text.strip()
-    if not text:
-        return []
-    tokens = text.split()
-    return tokens
-
-
-class WordpieceTokenizer(object):
-    r"""Runs WordPiece tokenization."""
-
-    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
-        self.vocab = vocab
-        self.unk_token = unk_token
-        self.max_input_chars_per_word = max_input_chars_per_word
-
-    def tokenize(self, text):
-        r"""Tokenizes a piece of text into its word pieces.
-
-        This uses a greedy longest-match-first algorithm to perform tokenization
-        using the given vocabulary.
-
-        For example:
-          input = "unaffable"
-          output = ["un", "##aff", "##able"]
-
-        Args:
-          text: A single token or whitespace separated tokens. This should have
-            already been passed through `BasicTokenizer`.
-
-        Returns:
-          A list of wordpiece tokens.
-        """
-
-        output_tokens = []
-        for token in whitespace_tokenize(text):
-            chars = list(token)
-            if len(chars) > self.max_input_chars_per_word:
-                output_tokens.append(self.unk_token)
-                continue
-
-            is_bad = False
-            start = 0
-            sub_tokens = []
-            while start < len(chars):
-                end = len(chars)
-                cur_substr = None
-                while start < end:
-                    substr = "".join(chars[start:end])
-                    if start > 0:
-                        substr = "##" + substr
-                    if substr in self.vocab:
-                        cur_substr = substr
-                        break
-                    end -= 1
-                if cur_substr is None:
-                    is_bad = True
-                    break
-                sub_tokens.append(cur_substr)
-                start = end
-
-            if is_bad:
-                output_tokens.append(self.unk_token)
-            else:
-                output_tokens.extend(sub_tokens)
-        if len(output_tokens) == 0:  # 防止里面全是空格或者回车符号
-            return [self.unk_token]
-        return output_tokens
-
-
-def load_vocab(vocab_file):
-    r"""Loads a vocabulary file into a dictionary."""
-    vocab = collections.OrderedDict()
-    index = 0
-    with open(vocab_file, "r", encoding="utf-8") as reader:
-        while True:
-            token = reader.readline()
-            if not token:
-                break
-            token = token.strip()
-            vocab[token] = index
-            index += 1
-    return vocab
-
-
-class BasicTokenizer(object):
-    r"""Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
-
-    def __init__(self,
-                 do_lower_case=True,
-                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
-        r"""Constructs a BasicTokenizer.
-
-        Args:
-          do_lower_case: Whether to lower case the input.
-        """
-        self.do_lower_case = do_lower_case
-        self.never_split = never_split
-
-    def tokenize(self, text):
-        r"""Tokenizes a piece of text."""
-        text = self._clean_text(text)
-        # This was added on November 1st, 2018 for the multilingual and Chinese
-        # models. This is also applied to the English models now, but it doesn't
-        # matter since the English models were not trained on any Chinese data
-        # and generally don't have any Chinese data in them (there are Chinese
-        # characters in the vocabulary because Wikipedia does have some Chinese
-        # words in the English Wikipedia.).
-        text = self._tokenize_chinese_chars(text)
-        orig_tokens = whitespace_tokenize(text)
-        split_tokens = []
-        for token in orig_tokens:
-            if self.do_lower_case and token not in self.never_split:
-                token = token.lower()
-                token = self._run_strip_accents(token)
-            split_tokens.extend(self._run_split_on_punc(token))
-
-        output_tokens = whitespace_tokenize(" ".join(split_tokens))
-        return output_tokens
-
-    def _run_strip_accents(self, text):
-        r"""Strips accents from a piece of text."""
-        text = unicodedata.normalize("NFD", text)
-        output = []
-        for char in text:
-            cat = unicodedata.category(char)
-            if cat == "Mn":
-                continue
-            output.append(char)
-        return "".join(output)
-
-    def _run_split_on_punc(self, text):
-        r"""Splits punctuation on a piece of text."""
-        if text in self.never_split:
-            return [text]
-        chars = list(text)
-        i = 0
-        start_new_word = True
-        output = []
-        while i < len(chars):
-            char = chars[i]
-            if _is_punctuation(char):
-                output.append([char])
-                start_new_word = True
-            else:
-                if start_new_word:
-                    output.append([])
-                start_new_word = False
-                output[-1].append(char)
-            i += 1
-
-        return ["".join(x) for x in output]
-
-    def _tokenize_chinese_chars(self, text):
-        r"""Adds whitespace around any CJK character."""
-        output = []
-        for char in text:
-            cp = ord(char)
-            if self._is_chinese_char(cp):
-                output.append(" ")
-                output.append(char)
-                output.append(" ")
-            else:
-                output.append(char)
-        return "".join(output)
-
-    def _is_chinese_char(self, cp):
-        r"""Checks whether CP is the codepoint of a CJK character."""
-        # This defines a "chinese character" as anything in the CJK Unicode block:
-        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-        #
-        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
-        # despite its name. The modern Korean Hangul alphabet is a different block,
-        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
-        # space-separated words, so they are not treated specially and handled
-        # like the all of the other languages.
-        if (((cp >= 0x4E00) and (cp <= 0x9FFF)) or  #
-            ((cp >= 0x3400) and (cp <= 0x4DBF)) or  #
-            ((cp >= 0x20000) and (cp <= 0x2A6DF)) or  #
-            ((cp >= 0x2A700) and (cp <= 0x2B73F)) or  #
-            ((cp >= 0x2B740) and (cp <= 0x2B81F)) or  #
-            ((cp >= 0x2B820) and (cp <= 0x2CEAF)) or
-            ((cp >= 0xF900) and (cp <= 0xFAFF)) or  #
-            ((cp >= 0x2F800) and (cp <= 0x2FA1F))):  #
-            return True
-
-        return False
-
-    def _clean_text(self, text):
-        r"""Performs invalid character removal and whitespace cleanup on text."""
-        output = []
-        for char in text:
-            cp = ord(char)
-            if cp == 0 or cp == 0xfffd or _is_control(char):
-                continue
-            if _is_whitespace(char):
-                output.append(" ")
-            else:
-                output.append(char)
-        return "".join(output)
-
-
-def _is_whitespace(char):
-    r"""Checks whether `chars` is a whitespace character."""
-    # \t, \n, and \r are technically contorl characters but we treat them
-    # as whitespace since they are generally considered as such.
-    if char == " " or char == "\t" or char == "\n" or char == "\r":
-        return True
-    cat = unicodedata.category(char)
-    if cat == "Zs":
-        return True
-    return False
-
-
-def _is_control(char):
-    r"""Checks whether `chars` is a control character."""
-    # These are technically control characters but we count them as whitespace
-    # characters.
-    if char == "\t" or char == "\n" or char == "\r":
-        return False
-    cat = unicodedata.category(char)
-    if cat.startswith("C"):
-        return True
-    return False
-
-
-def _is_punctuation(char):
-    r"""Checks whether `chars` is a punctuation character."""
-    cp = ord(char)
-    # We treat all non-letter/number ASCII as punctuation.
-    # Characters such as "^", "$", and "`" are not in the Unicode
-    # Punctuation class but we treat them as punctuation anyways, for
-    # consistency.
-    if (((cp >= 33) and (cp <= 47)) or ((cp >= 58) and (cp <= 64)) or
-       ((cp >= 91) and (cp <= 96)) or ((cp >= 123) and (cp <= 126))):
-        return True
-    cat = unicodedata.category(char)
-    if cat.startswith("P"):
-        return True
-    return False
-
-
-class BertTokenizer(object):
-    r"""Runs end-to-end tokenization: punctuation splitting + wordpiece"""
-
-    def __init__(self, vocab_file, do_lower_case=True, max_len=None, do_basic_tokenize=True,
-                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
-        r"""Constructs a BertTokenizer.
-
-        Args:
-          vocab_file: Path to a one-wordpiece-per-line vocabulary file
-          do_lower_case: Whether to lower case the input
-                         Only has an effect when do_wordpiece_only=False
-          do_basic_tokenize: Whether to do basic tokenization before wordpiece.
-          max_len: An artificial maximum length to truncate tokenized sequences to;
-                         Effective maximum length is always the minimum of this
-                         value (if specified) and the underlying BERT model's
-                         sequence length.
-          never_split: List of tokens which will never be split during tokenization.
-                         Only has an effect when do_wordpiece_only=False
-        """
-        if not os.path.isfile(vocab_file):
-            raise ValueError(
-                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
-                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
-        self.vocab = load_vocab(vocab_file)
-        self.ids_to_tokens = collections.OrderedDict(
-            [(ids, tok) for tok, ids in self.vocab.items()])
-        self.do_basic_tokenize = do_basic_tokenize
-        if do_basic_tokenize:
-            self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case,
-                                                  never_split=never_split)
-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
-        self.max_len = max_len if max_len is not None else int(1e12)
-
-    def _reinit_on_new_vocab(self, vocab):
-        r"""
-        在load bert之后，可能会对vocab进行重新排列。重新排列之后调用这个函数重新初始化与vocab相关的性质
-
-        :param vocab:
-        :return:
-        """
-        self.vocab = vocab
-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
-
-    def tokenize(self, text):
-        split_tokens = []
-        if self.do_basic_tokenize:
-            for token in self.basic_tokenizer.tokenize(text):
-                for sub_token in self.wordpiece_tokenizer.tokenize(token):
-                    split_tokens.append(sub_token)
-        else:
-            split_tokens = self.wordpiece_tokenizer.tokenize(text)
-        return split_tokens
-
-    def convert_tokens_to_ids(self, tokens):
-        r"""Converts a sequence of tokens into ids using the vocab."""
-        ids = []
-        for token in tokens:
-            ids.append(self.vocab[token])
-        if len(ids) > self.max_len:
-            logger.warning(
-                "Token indices sequence length is longer than the specified maximum "
-                " sequence length for this BERT model ({} > {}). Running this"
-                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
-            )
-        return ids
-
-    def convert_ids_to_tokens(self, ids):
-        r"""Converts a sequence of ids in wordpiece tokens using the vocab."""
-        tokens = []
-        for i in ids:
-            tokens.append(self.ids_to_tokens[i])
-        return tokens
-
-    def save_vocabulary(self, vocab_path):
-        r"""Save the tokenizer vocabulary to a directory or file."""
-        index = 0
-        if os.path.isdir(vocab_path):
-            vocab_file = os.path.join(vocab_path, VOCAB_NAME)
-        else:
-            vocab_file = vocab_path
-        with open(vocab_file, "w", encoding="utf-8") as writer:
-            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
-                if index != token_index:
-                    logger.warning("Saving vocabulary to {}: vocabulary indices are not consecutive."
-                                   " Please check that the vocabulary is not corrupted!".format(vocab_file))
-                    index = token_index
-                writer.write(token + u'\n')
-                index += 1
-        return vocab_file
-
-    @classmethod
-    def from_pretrained(cls, model_dir_or_name, *inputs, **kwargs):
-        r"""
-        给定模型的名字或者路径，直接读取vocab.
-        """
-        model_dir = _get_bert_dir(model_dir_or_name)
-        pretrained_model_name_or_path = _get_file_name_base_on_postfix(model_dir, '.txt')
-        logger.info("loading vocabulary file {}".format(pretrained_model_name_or_path))
-        max_len = 512
-        kwargs['max_len'] = min(kwargs.get('max_position_embeddings', int(1e12)), max_len)
-        # Instantiate tokenizer.
-        tokenizer = cls(pretrained_model_name_or_path, *inputs, **kwargs)
-        return tokenizer
-
-
-class _WordPieceBertModel(nn.Module):
-    r"""
-    这个模块用于直接计算word_piece的结果.
-
-    """
-
-    def __init__(self, model_dir_or_name: str, layers: str = '-1', pooled_cls: bool=False):
-        super().__init__()
-
-        self.tokenzier = BertTokenizer.from_pretrained(model_dir_or_name)
-        self.encoder = BertModel.from_pretrained(model_dir_or_name)
-        #  检查encoder_layer_number是否合理
-        encoder_layer_number = len(self.encoder.encoder.layer)
-        self.layers = list(map(int, layers.split(',')))
-        for layer in self.layers:
-            if layer < 0:
-                assert -layer <= encoder_layer_number, f"The layer index:{layer} is out of scope for " \
-                    f"a bert model with {encoder_layer_number} layers."
-            else:
-                assert layer < encoder_layer_number, f"The layer index:{layer} is out of scope for " \
-                    f"a bert model with {encoder_layer_number} layers."
-
-        self._cls_index = self.tokenzier.vocab['[CLS]']
-        self._sep_index = self.tokenzier.vocab['[SEP]']
-        self._wordpiece_unknown_index = self.tokenzier.vocab['[UNK]']
-        self._wordpiece_pad_index = self.tokenzier.vocab['[PAD]']  # 需要用于生成word_piece
-        self.pooled_cls = pooled_cls
-
-    def index_dataset(self, *datasets, field_name, add_cls_sep=True):
-        r"""
-        使用bert的tokenizer新生成word_pieces列加入到datasets中，并将他们设置为input。如果首尾不是
-            [CLS]与[SEP]会在首尾额外加入[CLS]与[SEP], 且将word_pieces这一列的pad value设置为了bert的pad value。
-
-        :param datasets: DataSet对象
-        :param field_name: 基于哪一列index
-        :return:
-        """
-
-        def convert_words_to_word_pieces(words):
-            word_pieces = []
-            for word in words:
-                _words = self.tokenzier.basic_tokenizer._tokenize_chinese_chars(word).split()
-                tokens = []
-                for word in _words:
-                    tokens.extend(self.tokenzier.wordpiece_tokenizer.tokenize(word))
-                word_piece_ids = self.tokenzier.convert_tokens_to_ids(tokens)
-                word_pieces.extend(word_piece_ids)
-            if add_cls_sep:
-                if word_pieces[0] != self._cls_index:
-                    word_pieces.insert(0, self._cls_index)
-                if word_pieces[-1] != self._sep_index:
-                    word_pieces.insert(-1, self._sep_index)
-            return word_pieces
-
-        for index, dataset in enumerate(datasets):
-            try:
-                dataset.apply_field(convert_words_to_word_pieces, field_name=field_name, new_field_name='word_pieces',
-                                    is_input=True)
-                dataset.set_pad_val('word_pieces', self._wordpiece_pad_index)
-            except Exception as e:
-                logger.error(f"Exception happens when processing the {index} dataset.")
-                raise e
-
-    def forward(self, word_pieces, token_type_ids=None):
-        r"""
-
-        :param word_pieces: torch.LongTensor, batch_size x max_len
-        :param token_type_ids: torch.LongTensor, batch_size x max_len
-        :return: num_layers x batch_size x max_len x hidden_size或者num_layers x batch_size x (max_len+2) x hidden_size
-        """
-        batch_size, max_len = word_pieces.size()
-
-        attn_masks = word_pieces.ne(self._wordpiece_pad_index)
-        bert_outputs, pooled_cls = self.encoder(word_pieces, token_type_ids=token_type_ids, attention_mask=attn_masks,
-                                                output_all_encoded_layers=True)
-        # output_layers = [self.layers]  # len(self.layers) x batch_size x max_word_piece_length x hidden_size
-        outputs = bert_outputs[0].new_zeros((len(self.layers), batch_size, max_len, bert_outputs[0].size(-1)))
-        for l_index, l in enumerate(self.layers):
-            bert_output = bert_outputs[l]
-            if l in (len(bert_outputs)-1, -1) and self.pooled_cls:
-                bert_output[:, 0] = pooled_cls
-            outputs[l_index] = bert_output
-        return outputs
diff --git a/fastNLP/modules/encoder/gpt2.py b/fastNLP/modules/encoder/gpt2.py
index 5b692253..c1d3e2d9 100644
--- a/fastNLP/modules/encoder/gpt2.py
+++ b/fastNLP/modules/encoder/gpt2.py
@@ -1,773 +1,1069 @@
+r"""
 
-from functools import lru_cache
-import json
-import regex as re
-import itertools
-
+"""
 
-from ...io.file_utils import _get_embedding_url, cached_path
-from ...core import logger
 
+from torch import nn
+import torch
+from fastNLP.core import logger
 import os
+import copy
+import json
+import math
+from torch.nn import CrossEntropyLoss
+from ..utils import _get_file_name_base_on_postfix
 
-PRETRAINED_GPT2_MODEL_DIR = PRETRAINED_BERT_MODEL_DIR = {
-    'en-small': 'gpt2-small.zip',
-    'en-median': 'gpt2-medium.zip',
-    'en': 'gpt2-medium.zip'
-}
+from fastNLP.modules.decoder.seq2seq_decoder import Decoder, Past
+from fastNLP.modules.generator.seq2seq_generator import SequenceGenerator
+from typing import Tuple
 
 
-def _get_gpt2_dir(model_dir_or_name: str = 'en-median'):
-    if model_dir_or_name.lower() in PRETRAINED_GPT2_MODEL_DIR:
-        model_url = _get_embedding_url('gpt2', model_dir_or_name.lower())
-        model_dir = cached_path(model_url, name='embedding')
-        # 检查是否存在
-    elif os.path.isdir(os.path.abspath(os.path.expanduser(model_dir_or_name))):
-        model_dir = os.path.abspath(os.path.expanduser(model_dir_or_name))
-    else:
-        logger.error(f"Cannot recognize GPT2 dir or name ``{model_dir_or_name}``.")
-        raise ValueError(f"Cannot recognize GPT2 dir or name ``{model_dir_or_name}``.")
-    return str(model_dir)
+GELU_CONSTANT = math.sqrt(2 / math.pi)
 
 
-def _get_filepath_based_on_postfix(folder, postfix):
-    """
-    在folder下寻找结尾为postfix的文件. 比如寻找结尾为vocab.txt的文件。只会匹配第一个，如果有多个不会报错，没有找到会报错。
-    返回该文件的全路径
+from ...io.file_utils import _get_gpt2_dir
 
-    :param str folder:
-    :param str postfix:
-    :return:
-    """
-    for filename in os.listdir(folder):
-        if os.path.isfile(os.path.join(folder, filename)):
-            if filename.endswith(postfix):
-                return os.path.join(folder, filename)
-    raise FileNotFoundError(f"File {postfix} is not found in {folder}.")
 
+class GPT2Config:
+    """Configuration class to store the configuration of a `GPT2Model`.
 
-@lru_cache()
-def bytes_to_unicode():
-    """
-    Returns list of utf-8 byte and a mapping to unicode strings.
-    We specifically avoids mapping to whitespace/control characters the bpe code barfs on.
-
-    The reversible bpe codes work on unicode strings.
-    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
-    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
-    This is a signficant percentage of your normal, say, 32K bpe vocab.
-    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
-    """
-    bs = (
-        list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1))
-    )
-    cs = bs[:]
-    n = 0
-    for b in range(2 ** 8):
-        if b not in bs:
-            bs.append(b)
-            cs.append(2 ** 8 + n)
-            n += 1
-    cs = [chr(n) for n in cs]
-    return dict(zip(bs, cs))
-
-
-def get_pairs(word):
-    """Return set of symbol pairs in a word.
-
-    Word is represented as tuple of symbols (symbols being variable-length strings).
-    """
-    pairs = set()
-    prev_char = word[0]
-    for char in word[1:]:
-        pairs.add((prev_char, char))
-        prev_char = char
-    return pairs
-
-
-VOCAB_FILES_NAMES = {
-    "vocab_file": "vocab.json",
-    "merges_file": "merges.txt",
-}
-
-
-PRETRAINED_VOCAB_FILES_MAP = {
-    "vocab_file": {
-        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
-        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json",
-        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json",
-        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json",
-        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json",
-    },
-    "merges_file": {
-        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
-        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt",
-        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt",
-        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt",
-        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt",
-    },
-}
-
-
-PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
-    "en-small": 1024,
-    'en': 1024,
-    "en-medium": 1024,
-    "en-large": 1024,
-    "en-xl": 1024,
-    "en-distilgpt2": 1024,
-}
-
-PATTERN = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
-
-
-def gpt2_tokenize(text, add_prefix_space=True):
+    Args:
+        vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
+        n_positions: Number of positional embeddings.
+        n_ctx: Size of the causal mask (usually same as n_positions).
+        n_embd: Dimensionality of the embeddings and hidden states.
+        n_layer: Number of hidden layers in the Transformer encoder.
+        n_head: Number of attention heads for each attention layer in
+            the Transformer encoder.
+        layer_norm_epsilon: epsilon to use in the layer norm layers
+        resid_pdrop: The dropout probabilitiy for all fully connected
+            layers in the embeddings, encoder, and pooler.
+        attn_pdrop: The dropout ratio for the attention
+            probabilities.
+        embd_pdrop: The dropout ratio for the embeddings.
+        initializer_range: The sttdev of the truncated_normal_initializer for
+            initializing all weight matrices.
     """
 
-    :param str text:
-    :param bool add_prefix_space: 是否在句子前面加上space，如果加上才能保证与GPT2训练时一致
-    :return: []
-    """
-    if text is '':
-        return []
-    if add_prefix_space:
-        text = ' ' + text
-    tokens = []
-    for token in re.findall(PATTERN, text):
-        tokens.append(token)
-    return tokens
-
-
-class GPT2Tokenizer:
-    """
-    GPT-2 BPE tokenizer. Peculiarities:
-        - Byte-level Byte-Pair-Encoding
-        - Requires a space to start the input string => the encoding and tokenize methods should be called with the
-          ``add_prefix_space`` flag set to ``True``.
-          Otherwise, this tokenizer's ``encode``, ``decode``, and ``tokenize`` methods will not conserve
-          the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
-    """
-
-    vocab_files_names = VOCAB_FILES_NAMES
-    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
-
-    SPECIAL_TOKENS_ATTRIBUTES = [
-        "bos_token",
-        "eos_token",
-        "unk_token",
-        "pad_token",
-        "cls_token",
-        "mask_token",
-    ]
-
-    padding_side = "right"
-
     def __init__(
         self,
-        vocab_file,
-        merges_file,
-        errors="replace",
-        unk_token="<|endoftext|>",
-        bos_token="<|endoftext|>",
-        eos_token="<|endoftext|>",
+        vocab_size=50257,
+        n_positions=1024,
+        n_ctx=1024,
+        n_embd=768,
+        n_layer=12,
+        n_head=12,
+        resid_pdrop=0.1,
+        embd_pdrop=0.1,
+        attn_pdrop=0.1,
+        layer_norm_epsilon=1e-5,
+        initializer_range=0.02,
+        summary_type="cls_index",
+        summary_use_proj=True,
+        summary_activation=None,
+        summary_proj_to_labels=True,
+        summary_first_dropout=0.1,
         **kwargs
     ):
-        self._bos_token = None
-        self._eos_token = None
-        self._unk_token = None
-        self._sep_token = None
-        self._pad_token = None
-        self._cls_token = None
-        self._mask_token = None
-        self._pad_token_type_id = 0
-
-        self.bos_token = bos_token
-        self.eos_token = eos_token
-        self.unk_token = unk_token
-
-        self.max_len = int(1e12)
-        self.padding_side = kwargs.pop("padding_side", self.padding_side)
-        self.added_tokens_encoder = {}
-        self.unique_added_tokens_encoder = set()
-        self.added_tokens_decoder = {}
-        # inputs and kwargs for saving and re-loading (see ``from_pretrained`` and ``save_pretrained``)
-        self.init_inputs = ()
-        self.init_kwargs = {}
+        """Constructs GPT2Config.
 
+        Args:
+            vocab_size: Vocabulary size of `inputs_ids` in `GPT2Model` or a configuration json file.
+            n_positions: Number of positional embeddings.
+            n_ctx: Size of the causal mask (usually same as n_positions).
+            n_embd: Dimensionality of the embeddings and hidden states.
+            n_layer: Number of hidden layers in the Transformer encoder.
+            n_head: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            layer_norm_epsilon: epsilon to use in the layer norm layers
+            resid_pdrop: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attn_pdrop: The dropout ratio for the attention
+                probabilities.
+            embd_pdrop: The dropout ratio for the embeddings.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+        """
+        self.output_attentions = kwargs.pop("output_attentions", False)
+        self.output_hidden_states = kwargs.pop("output_hidden_states", False)
+        self.output_past = kwargs.pop("output_past", True)  # Not used by all models
+        self.torchscript = kwargs.pop("torchscript", False)  # Only used by PyTorch models
+        self.use_bfloat16 = kwargs.pop("use_bfloat16", False)
+        self.pruned_heads = kwargs.pop("pruned_heads", {})
+
+        # Is decoder is used in encoder-decoder models to differentiate encoder from decoder
+        self.is_decoder = kwargs.pop("is_decoder", False)
+
+        # Parameters for sequence generation
+        self.max_length = kwargs.pop("max_length", 20)
+        self.do_sample = kwargs.pop("do_sample", False)
+        self.num_beams = kwargs.pop("num_beams", 1)
+        self.temperature = kwargs.pop("temperature", 1.0)
+        self.top_k = kwargs.pop("top_k", 50)
+        self.top_p = kwargs.pop("top_p", 1.0)
+        self.repetition_penalty = kwargs.pop("repetition_penalty", 1.0)
+        self.bos_token_id = kwargs.pop("bos_token_id", 0)
+        self.pad_token_id = kwargs.pop("pad_token_id", 0)
+        self.eos_token_ids = kwargs.pop("eos_token_ids", 0)
+        self.length_penalty = kwargs.pop("length_penalty", 1.0)
+        self.num_return_sequences = kwargs.pop("num_return_sequences", 1)
+
+        # Fine-tuning task arguments
+        self.finetuning_task = kwargs.pop("finetuning_task", None)
+        self.num_labels = kwargs.pop("num_labels", 2)
+        self.id2label = kwargs.pop("id2label", {i: "LABEL_{}".format(i) for i in range(self.num_labels)})
+        self.id2label = dict((int(key), value) for key, value in self.id2label.items())
+        self.label2id = kwargs.pop("label2id", dict(zip(self.id2label.values(), self.id2label.keys())))
+        self.label2id = dict((key, int(value)) for key, value in self.label2id.items())
+
+        # Additional attributes without default values
         for key, value in kwargs.items():
-            if key in self.SPECIAL_TOKENS_ATTRIBUTES:
-                if key == "additional_special_tokens":
-                    assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
-                else:
-                    assert isinstance(value, str)
+            try:
                 setattr(self, key, value)
+            except AttributeError as err:
+                logger.error("Can't set {} with value {} for {}".format(key, value, self))
+                raise err
+
+        self.vocab_size = vocab_size
+        self.n_ctx = n_ctx
+        self.n_positions = n_positions
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.resid_pdrop = resid_pdrop
+        self.embd_pdrop = embd_pdrop
+        self.attn_pdrop = attn_pdrop
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.initializer_range = initializer_range
+        self.summary_type = summary_type
+        self.summary_use_proj = summary_use_proj
+        self.summary_activation = summary_activation
+        self.summary_first_dropout = summary_first_dropout
+        self.summary_proj_to_labels = summary_proj_to_labels
 
-        self.max_len_single_sentence = (
-            self.max_len
-        )  # no default special tokens - you can update this value if you add special tokens
-        self.max_len_sentences_pair = (
-            self.max_len
-        )  # no default special tokens - you can update this value if you add special tokens
-
-        with open(vocab_file, encoding="utf-8") as vocab_handle:
-            self.encoder = json.load(vocab_handle)
-        self.decoder = {v: k for k, v in self.encoder.items()}
-        self.errors = errors  # how to handle errors in decoding
-        self.byte_encoder = bytes_to_unicode()
-        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
-        with open(merges_file, encoding="utf-8") as merges_handle:
-            bpe_merges = merges_handle.read().split("\n")[1:-1]
-        bpe_merges = [tuple(merge.split()) for merge in bpe_merges]
-        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
-        self.cache = {}
-
-    def add_special_tokens(self, special_tokens_dict):
-        """
-        Add a dictionary of special tokens (eos, pad, cls...) to the encoder and link them
-        to class attributes. If special tokens are NOT in the vocabulary, they are added
-        to it (indexed starting from the last index of the current vocabulary).
+    @property
+    def max_position_embeddings(self):
+        return self.n_positions
 
-        Using `add_special_tokens` will ensure your special tokens can be used in several ways:
+    @property
+    def hidden_size(self):
+        return self.n_embd
 
-        - special tokens are carefully handled by the tokenizer (they are never split)
-        - you can easily refer to special tokens using tokenizer class attributes like `tokenizer.cls_token`. This makes it easy to develop model-agnostic training and fine-tuning scripts.
+    @property
+    def num_attention_heads(self):
+        return self.n_head
 
-        When possible, special tokens are already registered for provided pretrained models (ex: BertTokenizer cls_token is already registered to be '[CLS]' and XLM's one is also registered to be '</s>')
+    @property
+    def num_hidden_layers(self):
+        return self.n_layer
 
-        Args:
-            special_tokens_dict: dict of string. Keys should be in the list of predefined special attributes:
-                [``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``, ``cls_token``, ``mask_token``,
-                ``additional_special_tokens``].
+    def save_pretrained(self, save_directory):
+        """ Save a configuration object to the directory `save_directory`, so that it
+            can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
+        """
+        assert os.path.isdir(
+            save_directory
+        ), "Saving path should be a directory where the model and configuration can be saved"
 
-                Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+        # If we save using the predefined names, we can load using `from_pretrained`
+        output_config_file = os.path.join(save_directory, 'config.json')
 
-        Returns:
-            Number of tokens added to the vocabulary.
+        self.to_json_file(output_config_file)
 
-        Examples::
+    def to_json_file(self, json_file_path):
+        """ Save this instance to a json file."""
+        with open(json_file_path, "w", encoding="utf-8") as writer:
+            writer.write(self.to_json_string())
 
-            # Let's see how to add a new classification token to GPT-2
-            tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-            model = GPT2Model.from_pretrained('gpt2')
+    def to_dict(self):
+        """Serializes this instance to a Python dictionary."""
+        output = copy.deepcopy(self.__dict__)
+        return output
 
-            special_tokens_dict = {'cls_token': '<CLS>'}
+    def to_json_string(self):
+        """Serializes this instance to a JSON string."""
+        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
 
-            num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
-            print('We have added', num_added_toks, 'tokens')
-            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
+    @classmethod
+    def from_json_file(cls, json_file):
+        """Constructs a `Config` from a json file of parameters."""
+        with open(json_file, "r", encoding="utf-8") as reader:
+            text = reader.read()
+        dict_obj = json.loads(text)
+        return cls(**dict_obj)
 
-            assert tokenizer.cls_token == '<CLS>'
-        """
-        if not special_tokens_dict:
-            return 0
-
-        added_tokens = 0
-        for key, value in special_tokens_dict.items():
-            assert key in self.SPECIAL_TOKENS_ATTRIBUTES
-            if key == "additional_special_tokens":
-                assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
-                added_tokens += self.add_tokens(value)
-            else:
-                assert isinstance(value, str)
-                added_tokens += self.add_tokens([value])
-            logger.debug("Assigning %s to the %s key of the tokenizer", value, key)
-            setattr(self, key, value)
-
-        return added_tokens
-
-    def add_tokens(self, new_tokens):
-        """
-        Add a list of new tokens to the tokenizer class. If the new tokens are not in the
-        vocabulary, they are added to it with indices starting from length of the current vocabulary.
+    @classmethod
+    def from_pretrained(cls, model_dir_or_name, **kwargs):
+        r""" Instantiate a :class:`~transformers.PretrainedConfig` (or a derived class) from a pre-trained model configuration.
 
-        Args:
-            new_tokens: list of string. Each string is a token to add. Tokens are only added if they are not already in the vocabulary (tested by checking if the tokenizer assign the index of the ``unk_token`` to them).
+        Parameters:
+            model_dir_or_name:
 
-        Returns:
-            Number of tokens added to the vocabulary.
+        """
+        model_dir = _get_gpt2_dir(model_dir_or_name)
+        tokenizer_config_file = _get_file_name_base_on_postfix(model_dir, 'config.json')
 
-        Examples::
+        config = cls.from_json_file(tokenizer_config_file)
 
-            # Let's see how to increase the vocabulary of Bert model and tokenizer
-            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-            model = BertModel.from_pretrained('bert-base-uncased')
+        # if resolved_config_file == config_file:
+        #     logger.info("loading configuration file {}".format(config_file))
+        # else:
+        #     logger.info("loading configuration file {} from cache at {}".format(config_file, resolved_config_file))
 
-            num_added_toks = tokenizer.add_tokens(['new_tok1', 'my_new-tok2'])
-            print('We have added', num_added_toks, 'tokens')
-            model.resize_token_embeddings(len(tokenizer))  # Notice: resize_token_embeddings expect to receive the full size of the new vocabulary, i.e. the length of the tokenizer.
-        """
-        if not new_tokens:
-            return 0
-
-        to_add_tokens = []
-        for token in new_tokens:
-            assert isinstance(token, str)
-            if self.init_kwargs.get("do_lower_case", False) and token not in self.all_special_tokens:
-                token = token.lower()
-            if (
-                token != self.unk_token
-                and self.convert_tokens_to_ids(token) == self.convert_tokens_to_ids(self.unk_token)
-                and token not in to_add_tokens
-            ):
-                to_add_tokens.append(token)
-                logger.debug("Adding %s to the vocabulary", token)
-
-        added_tok_encoder = dict((tok, len(self) + i) for i, tok in enumerate(to_add_tokens))
-        added_tok_decoder = {v: k for k, v in added_tok_encoder.items()}
-        self.added_tokens_encoder.update(added_tok_encoder)
-        self.unique_added_tokens_encoder = set(self.added_tokens_encoder.keys()).union(set(self.all_special_tokens))
-        self.added_tokens_decoder.update(added_tok_decoder)
-
-        return len(to_add_tokens)
+        if hasattr(config, "pruned_heads"):
+            config.pruned_heads = dict((int(key), value) for key, value in config.pruned_heads.items())
 
-    @property
-    def bos_token(self):
-        """ Beginning of sentence token (string). Log an error if used while not having been set. """
-        if self._bos_token is None:
-            logger.error("Using bos_token, but it is not set yet.")
-        return self._bos_token
+        # Update config with kwargs if needed
+        to_remove = []
+        for key, value in kwargs.items():
+            if hasattr(config, key):
+                setattr(config, key, value)
+                to_remove.append(key)
+        for key in to_remove:
+            kwargs.pop(key, None)
 
-    @property
-    def eos_token(self):
-        """ End of sentence token (string). Log an error if used while not having been set. """
-        if self._eos_token is None:
-            logger.error("Using eos_token, but it is not set yet.")
-        return self._eos_token
+        return config
 
-    @property
-    def unk_token(self):
-        """ Unknown token (string). Log an error if used while not having been set. """
-        if self._unk_token is None:
-            logger.error("Using unk_token, but it is not set yet.")
-        return self._unk_token
 
-    @property
-    def pad_token(self):
-        """ Padding token (string). Log an error if used while not having been set. """
-        if self._pad_token is None:
-            logger.error("Using pad_token, but it is not set yet.")
-        return self._pad_token
+def gelu(x):
+    return 0.5 * x * (1 + torch.tanh(GELU_CONSTANT * (x + 0.044715 * torch.pow(x, 3))))
 
-    @property
-    def cls_token(self):
-        """ Classification token (string). E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
-        if self._cls_token is None:
-            logger.error("Using cls_token, but it is not set yet.")
-        return self._cls_token
 
-    @property
-    def mask_token(self):
-        """ Mask token (string). E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
-        if self._mask_token is None:
-            logger.error("Using mask_token, but it is not set yet.")
-        return self._mask_token
+def prune_conv1d_layer(layer, index, dim=1):
+    """ Prune a Conv1D layer (a model parameters) to keep only entries in index.
+        A Conv1D work as a Linear layer (see e.g. BERT) but the weights are transposed.
+        Return the pruned layer as a new layer with requires_grad=True.
+        Used to remove heads.
+    """
+    index = index.to(layer.weight.device)
+    W = layer.weight.index_select(dim, index).clone().detach()
+    if dim == 0:
+        b = layer.bias.clone().detach()
+    else:
+        b = layer.bias[index].clone().detach()
+    new_size = list(layer.weight.size())
+    new_size[dim] = len(index)
+    new_layer = Conv1D(new_size[1], new_size[0]).to(layer.weight.device)
+    new_layer.weight.requires_grad = False
+    new_layer.weight.copy_(W.contiguous())
+    new_layer.weight.requires_grad = True
+    new_layer.bias.requires_grad = False
+    new_layer.bias.copy_(b.contiguous())
+    new_layer.bias.requires_grad = True
+    return new_layer
+
+
+class Attention(nn.Module):
+    def __init__(self, nx, n_ctx, config, scale=False):
+        super(Attention, self).__init__()
+
+        n_state = nx  # in Attention: n_state=768 (nx=n_embd)
+        # [switch nx => n_state from Block to Attention to keep identical to TF implem]
+        assert n_state % config.n_head == 0
+        self.register_buffer("bias", torch.tril(torch.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))
+        self.n_head = config.n_head
+        self.split_size = n_state
+        self.scale = scale
+
+        self.c_attn = Conv1D(n_state * 3, nx)
+        self.c_proj = Conv1D(n_state, nx)
+        self.attn_dropout = nn.Dropout(config.attn_pdrop)
+        self.resid_dropout = nn.Dropout(config.resid_pdrop)
+        self.pruned_heads = set()
+
+    def prune_heads(self, heads):
+        if len(heads) == 0:
+            return
+        mask = torch.ones(self.n_head, self.split_size // self.n_head)
+        heads = set(heads) - self.pruned_heads  # Convert to set and emove already pruned heads
+        for head in heads:
+            # Compute how many pruned heads are before the head and move the index accordingly
+            head = head - sum(1 if h < head else 0 for h in self.pruned_heads)
+            mask[head] = 0
+        mask = mask.view(-1).contiguous().eq(1)
+        index = torch.arange(len(mask))[mask].long()
+        index_attn = torch.cat([index, index + self.split_size, index + (2 * self.split_size)])
+
+        # Prune conv1d layers
+        self.c_attn = prune_conv1d_layer(self.c_attn, index_attn, dim=1)
+        self.c_proj = prune_conv1d_layer(self.c_proj, index, dim=0)
+
+        # Update hyper params
+        self.split_size = (self.split_size // self.n_head) * (self.n_head - len(heads))
+        self.n_head = self.n_head - len(heads)
+        self.pruned_heads = self.pruned_heads.union(heads)
+
+    def _attn(self, q, k, v, attention_mask=None, head_mask=None):
+        w = torch.matmul(q, k)  # batch_size x n_head x pre_len x (past_len+pre_len)
+        if self.scale:
+            w = w / math.sqrt(v.size(-1))
+        nd, ns = w.size(-2), w.size(-1)
+        b = self.bias[:, :, ns - nd : ns, :ns]  # 1 x 1 x pre_len x (past_len + pre_len)
+        w = w * b - 1e4 * (1 - b)  # batch_size x n_head x pre_len x (past_len + pre_len)
+
+        if attention_mask is not None:
+            # Apply the attention mask
+            w = w + attention_mask
+
+        w = nn.Softmax(dim=-1)(w)
+        w = self.attn_dropout(w)
+
+        # Mask heads if we want to
+        if head_mask is not None:
+            w = w * head_mask
+
+        outputs = [torch.matmul(w, v)]
+        outputs.append(w)
+        return outputs
+
+    def merge_heads(self, x):
+        x = x.permute(0, 2, 1, 3).contiguous()
+        new_x_shape = x.size()[:-2] + (x.size(-2) * x.size(-1),)
+        return x.view(*new_x_shape)  # in Tensorflow implem: fct merge_states
+
+    def split_heads(self, x, k=False):
+        new_x_shape = x.size()[:-1] + (self.n_head, x.size(-1) // self.n_head)
+        x = x.view(*new_x_shape)  # in Tensorflow implem: fct split_states
+        if k:
+            return x.permute(0, 2, 3, 1)  # (batch, head, head_features, seq_length)
+        else:
+            return x.permute(0, 2, 1, 3)  # (batch, head, seq_length, head_features)
+
+    def forward(self, x, layer_past=None, attention_mask=None, head_mask=None):
+        x = self.c_attn(x)
+        query, key, value = x.split(self.split_size, dim=2)
+        query = self.split_heads(query)  # (batch, head, seq_length, head_features)
+        key = self.split_heads(key, k=True)
+        value = self.split_heads(value)
+        if layer_past is not None:
+            past_key, past_value = layer_past[0].transpose(-2, -1), layer_past[1]  # transpose back cf below
+            # key: (batch, head, head_features, seq_length)
+            key = torch.cat((past_key, key), dim=-1)
+            # value: (batch, head, seq_length, head_features)
+            value = torch.cat((past_value, value), dim=-2)
+        present = torch.stack((key.transpose(-2, -1), value))  # transpose to have same shapes for stacking
+
+        attn_outputs = self._attn(query, key, value, attention_mask, head_mask)
+        a = attn_outputs[0]
+
+        a = self.merge_heads(a)
+        a = self.c_proj(a)
+        a = self.resid_dropout(a)
+
+        outputs = [a, present] + attn_outputs[1:]
+        return outputs  # a, present, (attentions)
+
+
+class Conv1D(nn.Module):
+    def __init__(self, nf, nx):
+        """ Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
+            Basically works like a Linear layer but the weights are transposed
+        """
+        super(Conv1D, self).__init__()
+        self.nf = nf
+        w = torch.empty(nx, nf)
+        nn.init.normal_(w, std=0.02)
+        self.weight = nn.Parameter(w)
+        self.bias = nn.Parameter(torch.zeros(nf))
+
+    def forward(self, x):
+        size_out = x.size()[:-1] + (self.nf,)
+        x = torch.addmm(self.bias, x.view(-1, x.size(-1)), self.weight)
+        x = x.view(*size_out)
+        return x
+
+
+class MLP(nn.Module):
+    def __init__(self, n_state, config):  # in MLP: n_state=3072 (4 * n_embd)
+        super(MLP, self).__init__()
+        nx = config.n_embd
+        self.c_fc = Conv1D(n_state, nx)
+        self.c_proj = Conv1D(nx, n_state)
+        self.act = gelu
+        self.dropout = nn.Dropout(config.resid_pdrop)
+
+    def forward(self, x):
+        h = self.act(self.c_fc(x))
+        h2 = self.c_proj(h)
+        return self.dropout(h2)
+
+
+class Block(nn.Module):
+    def __init__(self, n_ctx, config, scale=False):
+        super(Block, self).__init__()
+        nx = config.n_embd
+        self.ln_1 = nn.LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.attn = Attention(nx, n_ctx, config, scale)
+        self.ln_2 = nn.LayerNorm(nx, eps=config.layer_norm_epsilon)
+        self.mlp = MLP(4 * nx, config)
+
+    def forward(self, x, layer_past=None, attention_mask=None, head_mask=None):
+        output_attn = self.attn(
+            self.ln_1(x), layer_past=layer_past, attention_mask=attention_mask, head_mask=head_mask
+        )
+        a = output_attn[0]  # output_attn: a, present, (attentions)
 
-    @bos_token.setter
-    def bos_token(self, value):
-        self._bos_token = value
+        x = x + a
+        m = self.mlp(self.ln_2(x))
+        x = x + m
 
-    @eos_token.setter
-    def eos_token(self, value):
-        self._eos_token = value
+        outputs = [x] + output_attn[1:]
+        return outputs  # x, present, (attentions)
 
-    @unk_token.setter
-    def unk_token(self, value):
-        self._unk_token = value
 
-    @pad_token.setter
-    def pad_token(self, value):
-        self._pad_token = value
+class GPT2PreTrainedModel(nn.Module):
+    """ An abstract class to handle weights initialization and
+        a simple interface for dowloading and loading pretrained models.
+    """
 
-    @cls_token.setter
-    def cls_token(self, value):
-        self._cls_token = value
+    config_class = GPT2Config
+    base_model_prefix = "transformer"
 
-    @mask_token.setter
-    def mask_token(self, value):
-        self._mask_token = value
+    def _init_weights(self, module):
+        """ Initialize the weights.
+        """
+        if isinstance(module, (nn.Linear, nn.Embedding, Conv1D)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+            if isinstance(module, (nn.Linear, Conv1D)) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+
+    def __init__(self, config, *inputs, **kwargs):
+        super().__init__()
+        if not isinstance(config, GPT2Config):
+            raise ValueError(
+                "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
+                "To create a model from a pretrained model use "
+                "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
+                    self.__class__.__name__, self.__class__.__name__
+                )
+            )
+        # Save config in model
+        self.config = config
 
     @property
-    def bos_token_id(self):
-        """ Id of the beginning of sentence token in the vocabulary. Log an error if used while not having been set. """
-        return self.convert_tokens_to_ids(self.bos_token)
+    def base_model(self):
+        return getattr(self, self.base_model_prefix, self)
 
-    @property
-    def eos_token_id(self):
-        """ Id of the end of sentence token in the vocabulary. Log an error if used while not having been set. """
-        return self.convert_tokens_to_ids(self.eos_token)
+    def get_input_embeddings(self):
+        """ Get model's input embeddings
+        """
+        base_model = getattr(self, self.base_model_prefix, self)
+        if base_model is not self:
+            return base_model.get_input_embeddings()
+        else:
+            raise NotImplementedError
 
-    @property
-    def unk_token_id(self):
-        """ Id of the unknown token in the vocabulary. Log an error if used while not having been set. """
-        return self.convert_tokens_to_ids(self.unk_token)
+    def set_input_embeddings(self, value):
+        """ Set model's input embeddings
+        """
+        base_model = getattr(self, self.base_model_prefix, self)
+        if base_model is not self:
+            base_model.set_input_embeddings(value)
+        else:
+            raise NotImplementedError
 
-    @property
-    def pad_token_id(self):
-        """ Id of the padding token in the vocabulary. Log an error if used while not having been set. """
-        return self.convert_tokens_to_ids(self.pad_token)
+    def get_output_embeddings(self):
+        """ Get model's output embeddings
+            Return None if the model doesn't have output embeddings
+        """
+        return None  # Overwrite for models with output embeddings
 
-    @property
-    def pad_token_type_id(self):
-        """ Id of the padding token type in the vocabulary."""
-        return self._pad_token_type_id
+    def tie_weights(self):
+        """ Make sure we are sharing the input and output embeddings.
+            Export to TorchScript can't handle parameter sharing so we are cloning them instead.
+        """
+        output_embeddings = self.get_output_embeddings()
+        if output_embeddings is not None:
+            self._tie_or_clone_weights(output_embeddings, self.get_input_embeddings())
 
-    @property
-    def cls_token_id(self):
-        """ Id of the classification token in the vocabulary. E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
-        return self.convert_tokens_to_ids(self.cls_token)
+    def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
+        """ Tie or clone module weights depending of weither we are using TorchScript or not
+        """
+        if self.config.torchscript:
+            output_embeddings.weight = nn.Parameter(input_embeddings.weight.clone())
+        else:
+            output_embeddings.weight = input_embeddings.weight
+
+        if hasattr(output_embeddings, "bias") and output_embeddings.bias is not None:
+            output_embeddings.bias.data = torch.nn.functional.pad(
+                output_embeddings.bias.data,
+                (0, output_embeddings.weight.shape[0] - output_embeddings.bias.shape[0]),
+                "constant",
+                0,
+            )
+        if hasattr(output_embeddings, "out_features") and hasattr(input_embeddings, "num_embeddings"):
+            output_embeddings.out_features = input_embeddings.num_embeddings
 
-    @property
-    def mask_token_id(self):
-        """ Id of the mask token in the vocabulary. E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
-        return self.convert_tokens_to_ids(self.mask_token)
+    def init_weights(self):
+        """ Initialize and prunes weights if needed. """
+        # Initialize weights
+        self.apply(self._init_weights)
 
-    @property
-    def vocab_size(self):
-        return len(self.encoder)
-
-    def bpe(self, token):
-        if token in self.cache:
-            return self.cache[token]
-        word = tuple(token)
-        pairs = get_pairs(word)
-
-        if not pairs:
-            return token
-
-        while True:
-            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
-            if bigram not in self.bpe_ranks:
-                break
-            first, second = bigram
-            new_word = []
-            i = 0
-            while i < len(word):
-                try:
-                    j = word.index(first, i)
-                except ValueError:
-                    new_word.extend(word[i:])
-                    break
-                else:
-                    new_word.extend(word[i:j])
-                    i = j
-
-                if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
-                    new_word.append(first + second)
-                    i += 2
-                else:
-                    new_word.append(word[i])
-                    i += 1
-            new_word = tuple(new_word)
-            word = new_word
-            if len(word) == 1:
-                break
-            else:
-                pairs = get_pairs(word)
-        word = " ".join(word)
-        self.cache[token] = word
-        return word
-
-    def _tokenize(self, text, add_prefix_space=False):
-        """ Tokenize a string.
-            Args:
-                - add_prefix_space (boolean, default False):
-                    Begin the sentence with at least one space to get invariance to word order in GPT-2 (and RoBERTa) tokenizers.
+        # Prune heads if needed
+        if self.config.pruned_heads:
+            self.prune_heads(self.config.pruned_heads)
+
+        # Tie weights if needed
+        self.tie_weights()
+
+    def prune_heads(self, heads_to_prune):
+        """ Prunes heads of the base model.
+
+            Arguments:
+
+                heads_to_prune: dict with keys being selected layer indices (`int`) and associated values being the list of heads to prune in said layer (list of `int`).
+                E.g. {1: [0, 2], 2: [2, 3]} will prune heads 0 and 2 on layer 1 and heads 2 and 3 on layer 2.
         """
-        bpe_tokens = []
-        for token in gpt2_tokenize(text, add_prefix_space=add_prefix_space):
-            token = "".join(
-                self.byte_encoder[b] for b in token.encode("utf-8")
-            )  # Maps all our bytes to unicode strings, avoiding controle tokens of the BPE (spaces in our case)
-            bpe_tokens.extend(bpe_token for bpe_token in self.bpe(token).split(" "))
-        return bpe_tokens
-
-    def _convert_token_to_id(self, token):
-        """ Converts a token (str) in an id using the vocab. """
-        return self.encoder.get(token, self.encoder.get(self.unk_token))
-
-    def _convert_id_to_token(self, index):
-        """Converts an index (integer) in a token (str) using the vocab."""
-        return self.decoder.get(index)
-
-    def convert_tokens_to_string(self, tokens):
-        """ Converts a sequence of tokens (string) in a single string. """
-        text = "".join(tokens)
-        text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
-        return text
-
-    def save_vocabulary(self, save_directory):
-        """Save the tokenizer vocabulary and merge files to a directory."""
-        if not os.path.isdir(save_directory):
-            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
-            return
-        vocab_file = os.path.join(save_directory, VOCAB_FILES_NAMES["vocab_file"])
-        merge_file = os.path.join(save_directory, VOCAB_FILES_NAMES["merges_file"])
-
-        with open(vocab_file, "w", encoding="utf-8") as f:
-            f.write(json.dumps(self.encoder, ensure_ascii=False))
-
-        index = 0
-        with open(merge_file, "w", encoding="utf-8") as writer:
-            writer.write("#version: 0.2\n")
-            for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
-                if index != token_index:
-                    logger.warning(
-                        "Saving vocabulary to {}: BPE merge indices are not consecutive."
-                        " Please check that the tokenizer is not corrupted!".format(merge_file)
-                    )
-                    index = token_index
-                writer.write(" ".join(bpe_tokens) + "\n")
-                index += 1
-
-        return vocab_file, merge_file
+        # save new sets of pruned heads as union of previously stored pruned heads and newly pruned heads
+        for layer, heads in heads_to_prune.items():
+            union_heads = set(self.config.pruned_heads.get(layer, [])) | set(heads)
+            self.config.pruned_heads[layer] = list(union_heads)  # Unfortunately we have to store it as list for JSON
 
-    @classmethod
-    def from_pretrained(cls, model_dir_or_name):
-        r"""
+        self.base_model._prune_heads(heads_to_prune)
+
+    def save_pretrained(self, save_directory):
+        """ Save a model and its configuration file to a directory, so that it
+            can be re-loaded using the `:func:`~transformers.PreTrainedModel.from_pretrained`` class method.
         """
-        return cls._from_pretrained(model_dir_or_name)
+        assert os.path.isdir(
+            save_directory
+        ), "Saving path should be a directory where the model and configuration can be saved"
+
+        # Only save the model itself if we are using distributed training
+        model_to_save = self.module if hasattr(self, "module") else self
+
+        # Save configuration file
+        model_to_save.config.save_pretrained(save_directory)
+
+        # If we save using the predefined names, we can load using `from_pretrained`
+        output_model_file = os.path.join(save_directory, "pytorch_model.bin")
+        torch.save(model_to_save.state_dict(), output_model_file)
+        logger.info("Model weights saved in {}".format(output_model_file))
 
-    # 将它修改一定传入文件夹
     @classmethod
-    def _from_pretrained(cls, model_dir_or_name):
-        """
+    def from_pretrained(cls, model_dir_or_name, *model_args, **kwargs):
+        r"""Instantiate a pretrained pytorch model from a pre-trained model configuration.
+
+        The model is set in evaluation mode by default using ``model.eval()`` (Dropout modules are deactivated)
+        To train the model, you should first set it back in training mode with ``model.train()``
+
+        The warning ``Weights from XXX not initialized from pretrained model`` means that the weights of XXX do not come pre-trained with the rest of the model.
+        It is up to you to train those weights with a downstream fine-tuning task.
+
+        The warning ``Weights from XXX not used in YYY`` means that the layer XXX is not used by YYY, therefore those weights are discarded.
+
+        Parameters:
+            model_dir_or_name: either:
+
+                - a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
+                - a string with the `identifier name` of a pre-trained model that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
+                - a path to a `directory` containing model weights saved using :func:`~transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
+                - a path or url to a `tensorflow index checkpoint file` (e.g. `./tf_model/model.ckpt.index`). In this case, ``from_tf`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the TensorFlow checkpoint in a PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
+                - None if you are both providing the configuration and state dictionary (resp. with keyword arguments ``config`` and ``state_dict``)
+
+        Examples::
+
+            model = BertModel.from_pretrained('bert-base-uncased')    # Download model and configuration from S3 and cache.
+            model = BertModel.from_pretrained('./test/saved_model/')  # E.g. model was saved using `save_pretrained('./test/saved_model/')`
+            model = BertModel.from_pretrained('bert-base-uncased', output_attention=True)  # Update configuration during loading
+            assert model.config.output_attention == True
+            # Loading from a TF checkpoint file instead of a PyTorch model (slower)
+            config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json')
+            model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_tf=True, config=config)
 
-        :param str model_dir_or_name: 目录或者缩写名
-        :param init_inputs:
-        :param kwargs:
-        :return:
         """
-        # 它需要两个文件，第一个是vocab.json，第二个是merge_file?
+        config = kwargs.pop("config", None)
+        state_dict = kwargs.pop("state_dict", None)
+
         model_dir = _get_gpt2_dir(model_dir_or_name)
-        # 里面会包含四个文件vocab.json, merge.txt, config.json, model.bin
-
-        tokenizer_config_file = _get_filepath_based_on_postfix(model_dir, 'config.json')
-        with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
-            init_kwargs = json.load(tokenizer_config_handle)
-        # Set max length if needed
-        if model_dir_or_name in PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES:
-            # if we're using a pretrained model, ensure the tokenizer
-            # wont index sequences longer than the number of positional embeddings
-            max_len = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES[model_dir_or_name]
-            if max_len is not None and isinstance(max_len, (int, float)):
-                init_kwargs["max_len"] = min(init_kwargs.get("max_len", int(1e12)), max_len)
-
-        # 将vocab, merge加入到init_kwargs中
-        init_kwargs['vocab_file'] = _get_filepath_based_on_postfix(model_dir, 'vocab.json')
-        init_kwargs['merges_file'] = _get_filepath_based_on_postfix(model_dir, 'merges.txt')
-
-        init_inputs = init_kwargs.pop("init_inputs", ())
-        # Instantiate tokenizer.
-        try:
-            tokenizer = cls(*init_inputs, **init_kwargs)
-        except OSError:
-            OSError(
-                "Unable to load vocabulary from file. "
-                "Please check that the provided vocabulary is accessible and not corrupted."
+
+        # Load config if we don't provide a configuration
+        model_kwargs = {}
+        if not isinstance(config, GPT2Config):
+            config = cls.config_class.from_pretrained(
+                model_dir,
+                *model_args,
+                **kwargs
+            )
+        else:
+            model_kwargs = kwargs
+
+        # Instantiate model.
+        model = cls(config, *model_args, **model_kwargs)
+
+        model_path = _get_file_name_base_on_postfix(model_dir, 'model.bin')
+        state_dict = torch.load(model_path, map_location="cpu")
+
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+
+        # Convert old format to new format if needed from a PyTorch state_dict
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if "gamma" in key:
+                new_key = key.replace("gamma", "weight")
+            if "beta" in key:
+                new_key = key.replace("beta", "bias")
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+
+        # copy state_dict so _load_from_state_dict can modify it
+        metadata = getattr(state_dict, "_metadata", None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants
+        # so we need to apply the function recursively.
+        def load(module, prefix=""):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs
+            )
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + ".")
+
+        # Make sure we are able to load base models as well as derived models (with heads)
+        start_prefix = ""
+        model_to_load = model
+        if not hasattr(model, cls.base_model_prefix) and any(
+            s.startswith(cls.base_model_prefix) for s in state_dict.keys()
+        ):
+            start_prefix = cls.base_model_prefix + "."
+        if hasattr(model, cls.base_model_prefix) and not any(
+            s.startswith(cls.base_model_prefix) for s in state_dict.keys()
+        ):
+            model_to_load = getattr(model, cls.base_model_prefix)
+
+        load(model_to_load, prefix=start_prefix)
+        if len(missing_keys) > 0:
+            logger.info(
+                "Weights of {} not initialized from pretrained model: {}".format(
+                    model.__class__.__name__, missing_keys
+                )
+            )
+        if len(unexpected_keys) > 0:
+            logger.info(
+                "Weights from pretrained model not used in {}: {}".format(
+                    model.__class__.__name__, unexpected_keys
+                )
+            )
+        if len(error_msgs) > 0:
+            raise RuntimeError(
+                "Error(s) in loading state_dict for {}:\n\t{}".format(
+                    model.__class__.__name__, "\n\t".join(error_msgs)
+                )
             )
 
-        return tokenizer
+        model.tie_weights()  # make sure word embedding weights are still tied if needed
 
-    def __len__(self):
-        """ Size of the full vocabulary with the added tokens """
-        return self.vocab_size + len(self.added_tokens_encoder)
-
-    def tokenize(self, text, add_prefix_space=True):
-        """ Converts a string in a sequence of tokens (string), using the tokenizer.
-            Split in words for word-based vocabulary or sub-words for sub-word-based
-            vocabularies (BPE/SentencePieces/WordPieces).
-
-            Take care of added tokens.
-            Args:
-                - text: The sequence to be encoded.
-                - add_prefix_space (boolean, default True):
-                    Begin the sentence with at least one space to get invariance to word order in GPT-2 (and RoBERTa) tokenizers.
-        """
-        all_special_tokens = self.all_special_tokens
-
-        def lowercase_text(t):
-            # convert non-special tokens to lowercase
-            escaped_special_toks = [re.escape(s_tok) for s_tok in all_special_tokens]
-            pattern = r'(' + r'|'.join(escaped_special_toks) + r')|' + \
-                      r'(.+?)'
-            return re.sub(
-                pattern,
-                lambda m: m.groups()[0] or m.groups()[1].lower(),
-                t)
-
-        if self.init_kwargs.get('do_lower_case', False):
-            text = lowercase_text(text)
-
-        def split_on_token(tok, text):
-            result = []
-            split_text = text.split(tok)
-            for i, sub_text in enumerate(split_text):
-                sub_text = sub_text.strip()
-                if i == 0 and not sub_text:
-                    result += [tok]
-                elif i == len(split_text) - 1:
-                    if sub_text:
-                        result += [sub_text]
-                    else:
-                        pass
-                else:
-                    if sub_text:
-                        result += [sub_text]
-                    result += [tok]
-            return result
-
-        def split_on_tokens(tok_list, text):
-            if not text.strip():
-                return []
-            if not tok_list:
-                return self._tokenize(text, add_prefix_space=add_prefix_space)
-
-            tokenized_text = []
-            text_list = [text]
-            for tok in tok_list:
-                tokenized_text = []
-                for sub_text in text_list:
-                    if sub_text not in self.added_tokens_encoder \
-                            and sub_text not in all_special_tokens:
-                        tokenized_text += split_on_token(tok, sub_text)
-                    else:
-                        tokenized_text += [sub_text]
-                text_list = tokenized_text
-
-            return list(itertools.chain.from_iterable((self._tokenize(token, add_prefix_space=add_prefix_space) if token not \
-                                                                                          in self.added_tokens_encoder and token not in all_special_tokens \
-                                                           else [token] for token in tokenized_text)))
-
-        added_tokens = list(self.added_tokens_encoder.keys()) + all_special_tokens
-        tokenized_text = split_on_tokens(added_tokens, text)
-        return tokenized_text
-
-    def convert_tokens_to_ids(self, tokens):
-        """ Converts a single token, or a sequence of tokens, (str) in a single integer id
-            (resp. a sequence of ids), using the vocabulary.
+        # Set model in evaluation mode to desactivate DropOut modules by default
+        model.eval()
+
+        return model
+
+    def prepare_inputs_for_generation(self, input_ids, **kwargs):
+        return {"input_ids": input_ids, **kwargs}
+
+    @torch.no_grad()
+    def generate(
+        self,
+        input_ids,
+        max_length=None,
+        do_sample=None,
+        num_beams=None,
+        temperature=None,
+        top_k=None,
+        top_p=None,
+        repetition_penalty=None,
+        bos_token_id=None,
+        pad_token_id=None,
+        eos_token_ids=None,
+        length_penalty=None):
+        """ Sequence generator for models with a LM head.
+
+        The method currently supports greedy or penalized greedy decoding, sampling with top-k or nucleus sampling
+        and beam-search.
+
+        Params:
+            **input_ids**: (`optional`) `torch.LongTensor` of shape (1, sequence_length)
+                The sequence used as a prompt for the generation. If `None` the method initializes
+                it as an empty `torch.LongTensor` of shape (1,)
+            **max_length**: (`optional`) int
+                The max length of the sequence to be generated.  Between 1 and infinity. Default to 20.
+            **do_sample**: (`optional`) bool
+                If set to `False` we use greedy decoding; otherwise sampling. Default to greedy sampling.
+            **num_beams**: (`optional`) int
+                Number of beams for beam search. 1 means no beam serach. Default to 1.
+            **temperature**: (`optional`) float
+                The value used to module the next token probabilities.
+            **top_k**: (`optional`) int
+                The number of highest probability vocabulary tokens to keep for top-k-filtering. Between 1 and infinity. Default to 50.
+            **top_p**: (`optional`) float
+                The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Must be between 0 and 1. Default to 1.
+            **repetition_penalty**: (`optional`) float
+                The parameter for repetition penalty. Between 1.0 and + infinity. 1.0 means no penalty. Default to 1.
+            **bos_token_id**: (`optional`) int
+                Beginning of sentence token if no prompt is provided. Default to 0.
+            **eos_token_ids**: (`optional`) int or list of int
+                End of sequence token or list of tokens to stop the generation. Default to 0.
+            **length_penalty**: (`optional`) int
+                Exponential penalty to the length. Default to 0.
+            **length_penalty**: (`optional`) float
+                Exponential penalty to the length. Default to 1.
         """
-        if tokens is None:
-            return None
+        decoder = _GPT2Decoder(self)
+        generator = SequenceGenerator(decoder=decoder, max_length=max_length, num_beams=num_beams,
+                                     do_sample=do_sample, temperature=temperature, top_k=top_k, top_p=top_p,
+                                     bos_token_id=bos_token_id, eos_token_id=eos_token_ids,
+                                     repetition_penalty=repetition_penalty, length_penalty=length_penalty,
+                                     pad_token_id=pad_token_id)
+        results = generator.generate(input_ids, past=None)
+        return results
+
+
+class GPT2Model(GPT2PreTrainedModel):
+    r"""
+    Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
+        **last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)``
+            Sequence of hidden-states at the last layer of the model.
+        **past**:
+            list of ``torch.FloatTensor`` (one for each layer) of shape ``(2, batch_size, num_heads, sequence_length, embed_size_per_head)``:
+            that contains pre-computed hidden-states (key and values in the attention blocks).
+            Can be used (see `past` input) to speed up sequential decoding. The token ids which have their past given to this model
+            should not be passed as input ids as they have already been computed.
+        **hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
+            list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
+            of shape ``(batch_size, sequence_length, hidden_size)``:
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        **attentions**: (`optional`, returned when ``config.output_attentions=True``)
+            list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
+
+    Examples::
+
+        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        model = GPT2Model.from_pretrained('gpt2')
+        input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
+        outputs = model(input_ids)
+        last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple
 
-        if isinstance(tokens, str):
-            return self._convert_token_to_id_with_added_voc(tokens)
+    """
 
-        ids = []
-        for token in tokens:
-            ids.append(self._convert_token_to_id_with_added_voc(token))
-        return ids
+    def __init__(self, config):
+        super().__init__(config)
 
-    def _convert_token_to_id_with_added_voc(self, token):
-        if token is None:
-            return None
+        self.wte = nn.Embedding(config.vocab_size, config.n_embd)
+        self.wpe = nn.Embedding(config.n_positions, config.n_embd)
+        self.drop = nn.Dropout(config.embd_pdrop)
+        self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
+        self.ln_f = nn.LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
 
-        if token in self.added_tokens_encoder:
-            return self.added_tokens_encoder[token]
-        return self._convert_token_to_id(token)
+        self.init_weights()
 
-    def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
-        """ Converts a single index or a sequence of indices (integers) in a token "
-            (resp.) a sequence of tokens (str), using the vocabulary and added tokens.
+    def get_input_embeddings(self):
+        return self.wte
 
-            Args:
-                skip_special_tokens: Don't decode special tokens (self.all_special_tokens). Default: False
+    def set_input_embeddings(self, new_embeddings):
+        self.wte = new_embeddings
+
+    def _prune_heads(self, heads_to_prune):
+        """ Prunes heads of the model.
+            heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
         """
-        if isinstance(ids, int):
-            return self._convert_id_to_token(ids)
-        tokens = []
-        for index in ids:
-            index = int(index)
-            if skip_special_tokens and index in self.all_special_ids:
-                continue
-            tokens.append(self._convert_id_to_token(index))
-        return tokens
-
-    def convert_id_to_tokens(self, token_ids, skip_special_tokens=False, clean_up_tokenization_spaces=True):
+        for layer, heads in heads_to_prune.items():
+            self.h[layer].attn.prune_heads(heads)
+
+    def forward(
+        self,
+        input_ids,
+        past=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        output_attentions=True
+    ):
         """
-        Converts a sequence of ids (integer) in a string, using the tokenizer and vocabulary
-        with options to remove special tokens and clean up tokenization spaces.
-        Similar to doing ``self.convert_tokens_to_string(self.convert_ids_to_tokens(token_ids))``.
 
-        Args:
-            token_ids: list of tokenized input ids. Can be obtained using the `encode` or `encode_plus` methods.
-            skip_special_tokens: if set to True, will replace special tokens.
-            clean_up_tokenization_spaces: if set to True, will clean up the tokenization spaces.
+        :param torch.LongTensor input_ids: batch_size x max_len or batch_size x beam_size x 1
+        :param GPT2Past past: 之前的状态
+        :param torch.ByteTensor attention_mask: batch_size x (pre_len+past_len)， 与input_ids与past的concat一样大。
+            为0的地方为padding。
+        :param torch.LongTensor token_type_ids:  batch_size x max_len。
+        :param torch.LongTensor position_ids: 与input_ids对应的位置
+        :param head_mask:
+        :param bool output_attentions: 是否输出attention状态
+        :return:
         """
-        filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
-
-        # To avoid mixing byte-level and unicode for byte-level BPT
-        # we need to build string separatly for added tokens and byte-level tokens
-        # cf. https://github.com/huggingface/transformers/issues/1133
-        sub_texts = []
-        current_sub_text = []
-        for token in filtered_tokens:
-            if skip_special_tokens and token in self.all_special_ids:
-                continue
-            if token in self.added_tokens_encoder:
-                if current_sub_text:
-                    sub_texts.append(self.convert_tokens_to_string(current_sub_text))
-                    current_sub_text = []
-                sub_texts.append(token)
-            else:
-                current_sub_text.append(token)
-        if current_sub_text:
-            sub_texts.append(self.convert_tokens_to_string(current_sub_text))
-        text = " ".join(sub_texts)
-
-        if clean_up_tokenization_spaces:
-            clean_text = self.clean_up_tokenization(text)
-            return clean_text
+        input_shape = input_ids.size()  # batch_size x max_len 或 batch_size x beam_size x 1
+        input_ids = input_ids.view(-1, input_shape[-1])  # input_shape是 batch_size' x max_len
+
+        if token_type_ids is not None:
+            token_type_ids = token_type_ids.view(-1, input_shape[-1])
+        if position_ids is not None:
+            position_ids = position_ids.view(-1, input_shape[-1])
+
+        if past is None or len(past)==0:
+            past_length = 0
+            past = [None] * len(self.h)  # len(self.h) 是layer的层数
+        else:
+            past_length = past[0][0].size(-2)
+        if position_ids is None:  # 如果没有position id则生成
+            device = input_ids.device
+            position_ids = torch.arange(past_length, input_shape[-1] + past_length, dtype=torch.long, device=device)
+            position_ids = position_ids.unsqueeze(0).view(-1, input_shape[-1])
+
+        # Attention mask.
+        if attention_mask is not None:
+            attention_mask = attention_mask.view(-1, input_shape[-1])
+            # We create a 3D attention mask from a 2D tensor mask.
+            # Sizes are [batch_size, 1, 1, to_seq_length]
+            # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
+            # this attention mask is more simple than the triangular masking of causal attention
+            # used in OpenAI GPT, we just need to prepare the broadcast dimension here.
+            attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+
+            # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+            # masked positions, this operation will create a tensor which is 0.0 for
+            # positions we want to attend and -10000.0 for masked positions.
+            # Since we are adding it to the raw scores before the softmax, this is
+            # effectively the same as removing these entirely.
+            attention_mask = attention_mask.to(dtype=next(self.parameters()).dtype)  # fp16 compatibility
+            attention_mask = (1.0 - attention_mask) * -10000.0
+            # attention_mask = attention_mask.masked_fill(attention_mask.eq(0), -10000.0)
+
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        # attention_probs has shape bsz x n_heads x N x N
+        # head_mask has shape n_layer x batch x n_heads x N x N
+        if head_mask is not None:
+            if head_mask.dim() == 1:
+                head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
+                head_mask = head_mask.expand(self.config.n_layer, -1, -1, -1, -1)
+            elif head_mask.dim() == 2:
+                head_mask = (
+                    head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1)
+                )  # We can specify head_mask for each layer
+            head_mask = head_mask.to(
+                dtype=next(self.parameters()).dtype
+            )  # switch to fload if need + fp16 compatibility
         else:
-            return text
+            head_mask = [None] * self.config.n_layer
 
-    @property
-    def special_tokens_map(self):
-        """ A dictionary mapping special token class attribute (cls_token, unk_token...) to their
-            values ('<unk>', '<cls>'...)
-        """
-        set_attr = {}
-        for attr in self.SPECIAL_TOKENS_ATTRIBUTES:
-            attr_value = getattr(self, "_" + attr)
-            if attr_value:
-                set_attr[attr] = attr_value
-        return set_attr
+        inputs_embeds = self.wte(input_ids)
+        position_embeds = self.wpe(position_ids)
+        if token_type_ids is not None:
+            token_type_embeds = self.wte(token_type_ids)
+        else:
+            token_type_embeds = 0
+        hidden_states = inputs_embeds + position_embeds + token_type_embeds
+        hidden_states = self.drop(hidden_states)
 
-    @property
-    def all_special_tokens(self):
-        """ List all the special tokens ('<unk>', '<cls>'...) mapped to class attributes
-            (cls_token, unk_token...).
-        """
-        all_toks = []
-        set_attr = self.special_tokens_map
-        for attr_value in set_attr.values():
-            all_toks = all_toks + (list(attr_value) if isinstance(attr_value, (list, tuple)) else [attr_value])
-        all_toks = list(set(all_toks))
-        return all_toks
+        # batch_size x max_len x embed_size
+        output_shape = input_shape + (hidden_states.size(-1),)
 
-    @property
-    def all_special_ids(self):
-        """ List the vocabulary indices of the special tokens ('<unk>', '<cls>'...) mapped to
-            class attributes (cls_token, unk_token...).
+        presents = ()
+        all_attentions = []
+        all_hidden_states = ()
+        for i, (block, layer_past) in enumerate(zip(self.h, past)):
+            all_hidden_states = all_hidden_states + (hidden_states.view(*output_shape),)
+
+            outputs = block(
+                hidden_states, layer_past=layer_past, attention_mask=attention_mask, head_mask=head_mask[i]
+            )
+
+            hidden_states, present = outputs[:2]
+            presents = presents + (present,)
+
+            all_attentions.append(outputs[2])
+
+        hidden_states = self.ln_f(hidden_states)
+
+        hidden_states = hidden_states.view(*output_shape)
+        # Add last hidden state
+        all_hidden_states = all_hidden_states + (hidden_states,)
+
+        outputs = (hidden_states,)
+        outputs = outputs + (presents,)
+
+        outputs = outputs + (all_hidden_states,)
+        if output_attentions:
+            # let the number of heads free (-1) so we can extract attention even after head pruning
+            attention_output_shape = input_shape[:-1] + (-1,) + all_attentions[0].shape[-2:]
+            all_attentions = tuple(t.view(*attention_output_shape) for t in all_attentions)
+            outputs = outputs + (all_attentions,)
+        # 写出所有输出的shape.
+        # last hidden states, Tensor: batch_size x max_len x embed_size
+        # presents, tuple: n_layer x 2 x batch_size x n_head x (max_len+past_len) x head_dim, 第二维前一半为key，后一半为value
+        # all hidden states, tuple: n_layer x batch_size x max_len x embed_size，
+        # attention, tuple: n_layer x batch_size x n_head' x src_len x tgt_len
+        return outputs  # last hidden state, (presents), (all hidden_states), (attentions)
+
+
+class GPT2Past(Past):
+    def __init__(self):
+        super().__init__()
+        self.past = None  # tuple [n_layer, 2 x batch_size x n_head x past_len x head_dim]
+
+    def num_samples(self):
+        if self.past is not None:
+            return self.past[0].size(1)
+        return None
+
+    def reorder_past(self, indices):
+        for i in range(len(self.past)):
+            assert self.past[i] is not None
+            self.past[i] = self.past[i].index_select(index=indices, dim=1)
+
+    def __iter__(self):
+        for p in self.past:
+            yield p
+
+    def __getitem__(self, item):
+        assert isinstance(item, int)
+        return self.past[item]
+
+    def __len__(self):
+        if self.past is not None:
+            return len(self.past)
+        return 0
+
+
+class _GPT2Decoder(Decoder):
+    def __init__(self, gpt_model):
+        super().__init__()
+        self.gpt_model = gpt_model
+
+    def decode(self, tokens, past=None) -> Tuple[torch.Tensor, Past]:
+        if past is None:
+            past = GPT2Past()
+        lm_logits, presents, _ = self.gpt_model(input_ids=tokens,
+                                    past=past,
+                                    attention_mask=None,
+                                    token_type_ids=None,
+                                    position_ids=None,
+                                    head_mask=None,
+                                    output_attentions=False)
+        past.past = list(presents)
+        return lm_logits[:, -1], past
+
+    def reorder_past(self, indices: torch.LongTensor, past: GPT2Past) -> GPT2Past:
+        past.reorder_past(indices)
+        return past
+
+
+class GPT2LMHeadModel(GPT2PreTrainedModel):
+    r"""
+        **labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
+            Labels for language modeling.
+            Note that the labels **are shifted** inside the model, i.e. you can set ``lm_labels = input_ids``
+            Indices are selected in ``[-1, 0, ..., config.vocab_size]``
+            All labels set to ``-100`` are ignored (masked), the loss is only
+            computed for labels in ``[0, ..., config.vocab_size]``
+
+    Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
+        **loss**: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
+            Language modeling loss.
+        **prediction_scores**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, config.vocab_size)``
+            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
+        **past**:
+            list of ``torch.FloatTensor`` (one for each layer) of shape ``(2, batch_size, num_heads, sequence_length, embed_size_per_head)``:
+            that contains pre-computed hidden-states (key and values in the attention blocks).
+            Can be used (see `past` input) to speed up sequential decoding. The token ids which have their past given to this model
+            should not be passed as input ids as they have already been computed.
+        **hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``)
+            list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings)
+            of shape ``(batch_size, sequence_length, hidden_size)``:
+            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
+        **attentions**: (`optional`, returned when ``config.output_attentions=True``)
+            list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
+    """
+
+    def __init__(self, config):
+        super(GPT2LMHeadModel, self).__init__(config)
+        self.transformer = GPT2Model(config)
+        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+
+        self.init_weights()
+
+    def get_output_embeddings(self):
+        return self.lm_head
+
+    def get_input_embeddings(self):
+        return self.transformer.wte
+
+    def forward(
+        self,
+        input_ids,
+        past=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        labels=None,
+        output_attentions=False
+    ):
         """
-        all_toks = self.all_special_tokens
-        all_ids = self.convert_tokens_to_ids(all_toks)
-        return all_ids
 
-    @staticmethod
-    def clean_up_tokenization(out_string):
-        """ Clean up a list of simple English tokenization artifacts like spaces before punctuations and abreviated forms.
+        :param torch.LongTensor input_ids: batch_size x max_len or batch_size x beam_size x 1
+        :param tuple past: num_layers x 2 x batch_size x n_head x max_len' x head_dim. 可以将前一个时刻的presents作为输入
+        :param torch.ByteTensor attention_mask: batch_size x max_len， 与input_ids一样大。为0的地方为padding。
+        :param torch.LongTensor token_type_ids:  batch_size x max_len。
+        :param torch.LongTensor position_ids: 与input_ids对应的位置
+        :param head_mask:
+        :param labels: language model应该预测的值。如果为None，则没有language model的额外loss。最好把padding位置设置为-100
+            使得language model不要计算这部分的loss
+        :param output_attentions: 是否输出output_attentions
+        :return:
         """
-        out_string = (
-            out_string.replace(" .", ".")
-            .replace(" ?", "?")
-            .replace(" !", "!")
-            .replace(" ,", ",")
-            .replace(" ' ", "'")
-            .replace(" n't", "n't")
-            .replace(" 'm", "'m")
-            .replace(" do not", " don't")
-            .replace(" 's", "'s")
-            .replace(" 've", "'ve")
-            .replace(" 're", "'re")
+        transformer_outputs = self.transformer(
+            input_ids,
+            past=past,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            output_attentions=output_attentions
         )
-        return out_string
+        hidden_states = transformer_outputs[0]
+
+        lm_logits = self.lm_head(hidden_states)
+
+        outputs = (lm_logits,) + transformer_outputs[1:]
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = lm_logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+            outputs = (loss,) + outputs
+
+        # 返回值
+        # loss: torch.FloatTensor, 如果labels为None则没有该loss
+        # lm_logits: batch_size x max_len x vocab_size
+        # presents, tuple: n_layer x 2 x batch_size x n_head x (max_len+past_len) x head_dim, 第二维前一半为key，后一半为value
+        # all hidden states, tuple: n_layer x batch_size x max_len x embed_size，
+        # attention, tuple: n_layer x batch_size x n_head' x src_len x tgt_len
+        return outputs  # (loss), lm_logits, presents, all hidden_states, (attentions)
+
+
+
+
+
+# 输出每个位置的
+
diff --git a/fastNLP/modules/encoder/roberta.py b/fastNLP/modules/encoder/roberta.py
index af8795c6..02b9df42 100644
--- a/fastNLP/modules/encoder/roberta.py
+++ b/fastNLP/modules/encoder/roberta.py
@@ -1,13 +1,19 @@
 
-from typing import List, Optional
-import json
+r"""undocumented
+这个页面的代码很大程度上参考(复制粘贴)了https://github.com/huggingface/pytorch-pretrained-BERT的代码， 如果你发现该代码对你
+    有用，也请引用一下他们。
+"""
+
+__all__ = [
+    'RobertaModel'
+]
 
 import torch
 import torch.nn as nn
 
-from .bert import BertEmbeddings, BertModel, BertConfig, _get_bert_dir
-from .gpt2 import GPT2Tokenizer
-from ..utils import create_position_ids_from_input_ids, _get_file_name_base_on_postfix
+from .bert import BertEmbeddings, BertModel, BertConfig
+from ..utils import _get_file_name_base_on_postfix
+from ...io.file_utils import _get_roberta_dir
 from ...core import logger
 
 PRETRAINED_ROBERTA_POSITIONAL_EMBEDDINGS_SIZES = {
@@ -33,30 +39,24 @@ class RobertaEmbeddings(BertEmbeddings):
             config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
         )
 
-    def forward(self, input_ids=None, token_type_ids=None, position_ids=None, words_embeddings=None):
-        if position_ids is None:
-            if input_ids is not None:
-                # Create the position ids from the input token ids. Any padded tokens remain padded.
-                position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx).to(input_ids.device)
-            else:
-                position_ids = self.create_position_ids_from_inputs_embeds(words_embeddings)
+    def forward(self, input_ids, token_type_ids, words_embeddings=None):
+        position_ids = self.create_position_ids_from_input_ids(input_ids)
 
         return super().forward(
             input_ids, token_type_ids=token_type_ids, position_ids=position_ids, words_embeddings=words_embeddings
         )
 
-    def create_position_ids_from_inputs_embeds(self, inputs_embeds):
-        """
-        :param torch.Tensor inputs_embeds:
+    def create_position_ids_from_input_ids(self, x):
+        """ Replace non-padding symbols with their position numbers. Position numbers begin at
+        padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
+        `utils.make_positions`.
+
+        :param torch.Tensor x:
         :return torch.Tensor:
         """
-        input_shape = inputs_embeds.size()[:-1]
-        sequence_length = input_shape[1]
-
-        position_ids = torch.arange(
-            self.padding_idx + 1, sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
-        )
-        return position_ids.unsqueeze(0).expand(input_shape)
+        mask = x.ne(self.padding_idx).long()
+        incremental_indicies = torch.cumsum(mask, dim=1) * mask
+        return incremental_indicies + self.padding_idx
 
 
 class RobertaModel(BertModel):
@@ -70,12 +70,6 @@ class RobertaModel(BertModel):
         self.embeddings = RobertaEmbeddings(config)
         self.apply(self.init_bert_weights)
 
-    def get_input_embeddings(self):
-        return self.embeddings.word_embeddings
-
-    def set_input_embeddings(self, value):
-        self.embeddings.word_embeddings = value
-
     @classmethod
     def from_pretrained(cls, model_dir_or_name, *inputs, **kwargs):
         state_dict = kwargs.get('state_dict', None)
@@ -84,7 +78,7 @@ class RobertaModel(BertModel):
         kwargs.pop('from_tf', None)
 
         # get model dir from name or dir
-        pretrained_model_dir = _get_bert_dir(model_dir_or_name)
+        pretrained_model_dir = _get_roberta_dir(model_dir_or_name)
 
         # Load config
         config_file = _get_file_name_base_on_postfix(pretrained_model_dir, 'config.json')
@@ -186,172 +180,3 @@ class RobertaModel(BertModel):
         return model
 
 
-class RobertaTokenizer(GPT2Tokenizer):
-
-    vocab_files_names = {
-        "vocab_file": "vocab.json",
-        "merges_file": "merges.txt",
-    }
-
-    def __init__(
-        self,
-        vocab_file,
-        merges_file,
-        errors="replace",
-        bos_token="<s>",
-        eos_token="</s>",
-        sep_token="</s>",
-        cls_token="<s>",
-        unk_token="<unk>",
-        pad_token="<pad>",
-        mask_token="<mask>",
-        **kwargs
-    ):
-        super().__init__(
-            vocab_file=vocab_file,
-            merges_file=merges_file,
-            errors=errors,
-            bos_token=bos_token,
-            eos_token=eos_token,
-            unk_token=unk_token,
-            sep_token=sep_token,
-            cls_token=cls_token,
-            pad_token=pad_token,
-            mask_token=mask_token,
-            **kwargs,
-        )
-        self.max_len_single_sentence = self.max_len - 2  # take into account special tokens
-        self.max_len_sentences_pair = self.max_len - 4  # take into account special tokens
-
-    def build_inputs_with_special_tokens(
-        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
-    ) -> List[int]:
-        """
-        Build model inputs from a sequence or a pair of sequence for sequence classification tasks
-        by concatenating and adding special tokens.
-        A RoBERTa sequence has the following format:
-
-        - single sequence: ``<s> X </s>``
-        - pair of sequences: ``<s> A </s></s> B </s>``
-
-        Args:
-            token_ids_0 (:obj:`List[int]`):
-                List of IDs to which the special tokens will be added
-            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
-                Optional second list of IDs for sequence pairs.
-
-        Returns:
-            :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
-        """
-        if token_ids_1 is None:
-            return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
-        cls = [self.cls_token_id]
-        sep = [self.sep_token_id]
-        return cls + token_ids_0 + sep + sep + token_ids_1 + sep
-
-    def get_special_tokens_mask(
-        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
-    ) -> List[int]:
-        """
-        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
-        special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods.
-
-        Args:
-            token_ids_0 (:obj:`List[int]`):
-                List of ids.
-            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
-                Optional second list of IDs for sequence pairs.
-            already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
-                Set to True if the token list is already formatted with special tokens for the model
-
-        Returns:
-            :obj:`List[int]`: A list of integers in the range [0, 1]: 0 for a special token, 1 for a sequence token.
-        """
-        if already_has_special_tokens:
-            if token_ids_1 is not None:
-                raise ValueError(
-                    "You should not supply a second sequence if the provided sequence of "
-                    "ids is already formated with special tokens for the model."
-                )
-            return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
-
-        if token_ids_1 is None:
-            return [1] + ([0] * len(token_ids_0)) + [1]
-        return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
-
-    def create_token_type_ids_from_sequences(
-        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
-    ) -> List[int]:
-        """
-        Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
-        RoBERTa does not make use of token type ids, therefore a list of zeros is returned.
-
-        Args:
-            token_ids_0 (:obj:`List[int]`):
-                List of ids.
-            token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
-                Optional second list of IDs for sequence pairs.
-
-        Returns:
-            :obj:`List[int]`: List of zeros.
-
-        """
-        sep = [self.sep_token_id]
-        cls = [self.cls_token_id]
-
-        if token_ids_1 is None:
-            return len(cls + token_ids_0 + sep) * [0]
-        return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
-
-    def prepare_for_tokenization(self, text, add_special_tokens=False, **kwargs):
-        if "add_prefix_space" in kwargs:
-            add_prefix_space = kwargs["add_prefix_space"]
-        else:
-            add_prefix_space = add_special_tokens
-        if add_prefix_space and not text[0].isspace():
-            text = " " + text
-        return text
-
-    @classmethod
-    def from_pretrained(cls, model_dir_or_name, *inputs, **kwargs):
-        """
-
-        :param str model_dir_or_name: 目录或者缩写名
-        :param kwargs:
-        :return:
-        """
-        # 它需要两个文件，第一个是vocab.json，第二个是merge_file?
-        model_dir = _get_bert_dir(model_dir_or_name)
-        # 里面会包含四个文件vocab.json, merge.txt, config.json, model.bin
-
-        tokenizer_config_file = _get_file_name_base_on_postfix(model_dir, 'config.json')
-        with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
-            init_kwargs = json.load(tokenizer_config_handle)
-        # Set max length if needed
-        if model_dir_or_name in PRETRAINED_ROBERTA_POSITIONAL_EMBEDDINGS_SIZES:
-            # if we're using a pretrained model, ensure the tokenizer
-            # wont index sequences longer than the number of positional embeddings
-            max_len = PRETRAINED_ROBERTA_POSITIONAL_EMBEDDINGS_SIZES[model_dir_or_name]
-            if max_len is not None and isinstance(max_len, (int, float)):
-                init_kwargs["max_len"] = min(init_kwargs.get("max_len", int(1e12)), max_len)
-
-        # 将vocab, merge加入到init_kwargs中
-        if 'vocab_file' in kwargs:  # 如果指定了词表则用指定词表
-            init_kwargs['vocab_file'] = kwargs['vocab_file']
-        else:
-            init_kwargs['vocab_file'] = _get_file_name_base_on_postfix(model_dir, 'vocab.json')
-        init_kwargs['merges_file'] = _get_file_name_base_on_postfix(model_dir, 'merges.txt')
-
-        init_inputs = init_kwargs.pop("init_inputs", ())
-        # Instantiate tokenizer.
-        try:
-            tokenizer = cls(*init_inputs, **init_kwargs)
-        except OSError:
-            OSError(
-                "Unable to load vocabulary from file. "
-                "Please check that the provided vocabulary is accessible and not corrupted."
-            )
-
-        return tokenizer
-
-
diff --git a/fastNLP/modules/generator/__init__.py b/fastNLP/modules/generator/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/fastNLP/modules/generator/seq2seq_generator.py b/fastNLP/modules/generator/seq2seq_generator.py
new file mode 100755
index 00000000..d332cc2f
--- /dev/null
+++ b/fastNLP/modules/generator/seq2seq_generator.py
@@ -0,0 +1,444 @@
+import torch
+from ..decoder.seq2seq_decoder import Decoder
+import torch.nn.functional as F
+from fastNLP.core.utils import _get_model_device
+from functools import partial
+
+
+class SequenceGenerator:
+    def __init__(self, decoder: Decoder, max_length=20, num_beams=1,
+                 do_sample=True, temperature=1.0, top_k=50, top_p=1.0, bos_token_id=None, eos_token_id=None,
+                 repetition_penalty=1, length_penalty=1.0, pad_token_id=0):
+        if do_sample:
+            self.generate_func = partial(sample_generate, decoder=decoder, max_length=max_length, num_beams=num_beams,
+                                         temperature=temperature, top_k=top_k, top_p=top_p, bos_token_id=bos_token_id,
+                                         eos_token_id=eos_token_id, repetition_penalty=repetition_penalty,
+                                         length_penalty=length_penalty, pad_token_id=pad_token_id)
+        else:
+            self.generate_func = partial(greedy_generate, decoder=decoder, max_length=max_length, num_beams=num_beams,
+                                         bos_token_id=bos_token_id, eos_token_id=eos_token_id,
+                                         repetition_penalty=repetition_penalty,
+                                         length_penalty=length_penalty, pad_token_id=pad_token_id)
+        self.do_sample = do_sample
+        self.max_length = max_length
+        self.num_beams = num_beams
+        self.temperature = temperature
+        self.top_k = top_k
+        self.top_p = top_p
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        self.repetition_penalty = repetition_penalty
+        self.length_penalty = length_penalty
+        self.decoder = decoder
+
+    @torch.no_grad()
+    def generate(self, tokens=None, past=None):
+        """
+
+        :param torch.LongTensor tokens: batch_size x length, 开始的token
+        :param past:
+        :return:
+        """
+        # TODO 需要查看如果tokens长度不是1，decode的时候是否还能够直接decode？
+        return self.generate_func(tokens=tokens, past=past)
+
+
+@torch.no_grad()
+def greedy_generate(decoder, tokens=None, past=None, max_length=20, num_beams=1,
+                    bos_token_id=None, eos_token_id=None, pad_token_id=0,
+                    repetition_penalty=1, length_penalty=1.0):
+    """
+    贪婪地搜索句子
+
+    :param Decoder decoder: Decoder对象
+    :param torch.LongTensor tokens: batch_size x len, decode的输入值，如果为None，则自动从bos_token_id开始生成
+    :param Past past: 应该包好encoder的一些输出。
+    :param int max_length: 生成句子的最大长度。
+    :param int num_beams: 使用多大的beam进行解码。
+    :param int bos_token_id: 如果tokens传入为None，则使用bos_token_id开始往后解码。
+    :param int eos_token_id: 结束的token，如果为None，则一定会解码到max_length这么长。
+    :param int pad_token_id:
+    :param float repetition_penalty: 对重复出现的token多大的惩罚。
+    :param float length_penalty: 对每个token（除了eos）按照长度进行一定的惩罚。
+    :return:
+    """
+    if num_beams == 1:
+        token_ids = _no_beam_search_generate(decoder, tokens, past, max_length, temperature=1, top_k=50, top_p=1,
+                                             bos_token_id=bos_token_id, eos_token_id=eos_token_id, do_sample=False,
+                                             repetition_penalty=repetition_penalty, length_penalty=length_penalty,
+                                             pad_token_id=pad_token_id)
+    else:
+        token_ids = _beam_search_generate(decoder, tokens, past, max_length, num_beams=num_beams,
+                                          temperature=1, top_k=50, top_p=1,
+                                          bos_token_id=bos_token_id, eos_token_id=eos_token_id, do_sample=False,
+                                          repetition_penalty=repetition_penalty, length_penalty=length_penalty,
+                                          pad_token_id=pad_token_id)
+
+    return token_ids
+
+
+@torch.no_grad()
+def sample_generate(decoder, tokens=None, past=None, max_length=20, num_beams=1, temperature=1.0, top_k=50,
+                    top_p=1.0, bos_token_id=None, eos_token_id=None, pad_token_id=0, repetition_penalty=1.0,
+                    length_penalty=1.0):
+    """
+    使用采样的方法生成句子
+
+    :param Decoder decoder: Decoder对象
+    :param torch.LongTensor tokens: batch_size x len, decode的输入值，如果为None，则自动从bos_token_id开始生成
+    :param Past past: 应该包好encoder的一些输出。
+    :param int max_length: 生成句子的最大长度。
+    :param int num_beam: 使用多大的beam进行解码。
+    :param float temperature: 采样时的退火大小
+    :param int top_k: 只在top_k的sample里面采样
+    :param float top_p: 介于0,1的值。
+    :param int bos_token_id: 如果tokens传入为None，则使用bos_token_id开始往后解码。
+    :param int eos_token_id: 结束的token，如果为None，则一定会解码到max_length这么长。
+    :param int pad_token_id: pad的token id
+    :param float repetition_penalty: 对重复出现的token多大的惩罚。
+    :param float length_penalty: 对每个token（除了eos）按照长度进行一定的惩罚。
+    :return:
+    """
+    # 每个位置在生成的时候会sample生成
+    if num_beams == 1:
+        token_ids = _no_beam_search_generate(decoder, tokens, past, max_length, temperature=temperature,
+                                             top_k=top_k, top_p=top_p,
+                                             bos_token_id=bos_token_id, eos_token_id=eos_token_id, do_sample=True,
+                                             repetition_penalty=repetition_penalty, length_penalty=length_penalty,
+                                             pad_token_id=pad_token_id)
+    else:
+        token_ids = _beam_search_generate(decoder, tokens, past, max_length, num_beams=num_beams,
+                                          temperature=temperature, top_k=top_k, top_p=top_p,
+                                          bos_token_id=bos_token_id, eos_token_id=eos_token_id, do_sample=True,
+                                          repetition_penalty=repetition_penalty, length_penalty=length_penalty,
+                                          pad_token_id=pad_token_id)
+    return token_ids
+
+
+def _no_beam_search_generate(decoder: Decoder, tokens=None, past=None, max_length=20, temperature=1.0, top_k=50,
+                             top_p=1.0, bos_token_id=None, eos_token_id=None, do_sample=True,
+                             repetition_penalty=1.0, length_penalty=1.0, pad_token_id=0):
+    device = _get_model_device(decoder)
+    if tokens is None:
+        if bos_token_id is None:
+            raise RuntimeError("You have to specify either `tokens` or `bos_token_id`.")
+        if past is None:
+            raise RuntimeError("You have to specify either `past` or `tokens`.")
+        batch_size = past.num_samples()
+        if batch_size is None:
+            raise RuntimeError("Cannot infer the number of samples from `past`.")
+        tokens = torch.full([batch_size, 1], fill_value=bos_token_id, dtype=torch.long).to(device)
+    batch_size = tokens.size(0)
+    if past is not None:
+        assert past.num_samples() == batch_size, "The number of samples in `tokens` and `past` should match."
+
+    if eos_token_id is None:
+        _eos_token_id = float('nan')
+    else:
+        _eos_token_id = eos_token_id
+
+    # for i in range(tokens.size(1)):
+    #     scores, past = decoder.decode_one(tokens[:, :i + 1], past)  # batch_size x vocab_size, Past
+    scores, past = decoder.decode(tokens, past)
+
+    token_ids = tokens.clone()
+    cur_len = token_ids.size(1)
+    dones = token_ids.new_zeros(batch_size).eq(1)
+    # tokens = tokens[:, -1:]
+
+    while cur_len < max_length:
+        # scores, past = decoder.decode_one(tokens, past)  # batch_size x vocab_size, Past
+        scores, past = decoder.decode(tokens, past)  # batch_size x vocab_size, Past
+
+        if repetition_penalty != 1.0:
+            token_scores = scores.gather(dim=1, index=token_ids)
+            lt_zero_mask = token_scores.lt(0).float()
+            ge_zero_mask = lt_zero_mask.eq(0).float()
+            token_scores = lt_zero_mask * repetition_penalty * token_scores + ge_zero_mask / repetition_penalty * token_scores
+            scores.scatter_(dim=1, index=token_ids, src=token_scores)
+
+        if eos_token_id is not None and length_penalty != 1.0:
+            token_scores = scores / cur_len ** length_penalty  # batch_size x vocab_size
+            eos_mask = scores.new_ones(scores.size(1))
+            eos_mask[eos_token_id] = 0
+            eos_mask = eos_mask.unsqueeze(0).eq(1)
+            scores = scores.masked_scatter(eos_mask, token_scores)  # 也即除了eos，其他词的分数经过了放大/缩小
+
+        if do_sample:
+            if temperature > 0 and temperature != 1:
+                scores = scores / temperature
+
+            scores = top_k_top_p_filtering(scores, top_k, top_p, min_tokens_to_keep=2)
+            probs = F.softmax(scores, dim=-1)
+
+            # 保证至少有一个不是eos的值
+            next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)  # batch_size
+        else:
+            next_tokens = torch.argmax(scores, dim=-1)  # batch_size
+
+        next_tokens = next_tokens.masked_fill(dones, pad_token_id)  # 对已经搜索完成的sample做padding
+        tokens = next_tokens.unsqueeze(1)
+
+        token_ids = torch.cat([token_ids, tokens], dim=-1)  # batch_size x max_len
+
+        end_mask = next_tokens.eq(_eos_token_id)
+        dones = dones.__or__(end_mask)
+        cur_len += 1
+
+        if dones.min() == 1:
+            break
+
+    if eos_token_id is not None:
+        if cur_len == max_length:
+            token_ids[:, -1].masked_fill_(~dones, eos_token_id)  # 若到最长长度仍未到EOS，则强制将最后一个词替换成eos
+
+    return token_ids
+
+
+def _beam_search_generate(decoder: Decoder, tokens=None, past=None, max_length=20, num_beams=4, temperature=1.0,
+                          top_k=50, top_p=1.0, bos_token_id=None, eos_token_id=None, do_sample=True,
+                          repetition_penalty=1.0, length_penalty=None, pad_token_id=0) -> torch.LongTensor:
+    # 进行beam search
+    device = _get_model_device(decoder)
+    if tokens is None:
+        if bos_token_id is None:
+            raise RuntimeError("You have to specify either `tokens` or `bos_token_id`.")
+        if past is None:
+            raise RuntimeError("You have to specify either `past` or `tokens`.")
+        batch_size = past.num_samples()
+        if batch_size is None:
+            raise RuntimeError("Cannot infer the number of samples from `past`.")
+        tokens = torch.full([batch_size, 1], fill_value=bos_token_id, dtype=torch.long).to(device)
+    batch_size = tokens.size(0)
+    if past is not None:
+        assert past.num_samples() == batch_size, "The number of samples in `tokens` and `past` should match."
+
+    # for i in range(tokens.size(1) - 1):  # 如果输入的长度较长，先decode
+    #     scores, past = decoder.decode_one(tokens[:, :i + 1],
+    #                                       past)  # (batch_size, vocab_size), Past
+    # scores, past = decoder.decode_one(tokens, past)  # 这里要传入的是整个句子的长度
+    scores, past = decoder.decode(tokens, past)  # 这里要传入的是整个句子的长度
+    vocab_size = scores.size(1)
+    assert vocab_size >= num_beams, "num_beams should be smaller than the number of vocabulary size."
+
+    if do_sample:
+        probs = F.softmax(scores, dim=-1)
+        next_tokens = torch.multinomial(probs, num_samples=num_beams)  # (batch_size, num_beams)
+        logits = probs.log()
+        next_scores = logits.gather(dim=1, index=next_tokens)  # (batch_size, num_beams)
+    else:
+        scores = F.log_softmax(scores, dim=-1)  # (batch_size, vocab_size)
+        # 得到(batch_size, num_beams), (batch_size, num_beams)
+        next_scores, next_tokens = torch.topk(scores, num_beams, dim=1, largest=True, sorted=True)
+
+    indices = torch.arange(batch_size, dtype=torch.long).to(device)
+    indices = indices.repeat_interleave(num_beams)
+    decoder.reorder_past(indices, past)
+
+    tokens = tokens.index_select(dim=0, index=indices)  # batch_size * num_beams x length
+    # 记录生成好的token (batch_size', cur_len)
+    token_ids = torch.cat([tokens, next_tokens.view(-1, 1)], dim=-1)
+    dones = [False] * batch_size
+    tokens = next_tokens.view(-1, 1)
+
+    beam_scores = next_scores.view(-1)  # batch_size * num_beams
+
+    #  用来记录已经生成好的token的长度
+    cur_len = token_ids.size(1)
+
+    hypos = [
+        BeamHypotheses(num_beams, max_length, length_penalty, early_stopping=False) for _ in range(batch_size)
+    ]
+    # 0,num_beams, 2*num_beams, ...
+    batch_inds_with_numbeams_interval = (torch.arange(batch_size) * num_beams).view(-1, 1).to(token_ids)
+
+    while cur_len < max_length:
+        # scores, past = decoder.decode_one(tokens, past)  # batch_size * num_beams x vocab_size, Past
+        scores, past = decoder.decode(tokens, past)
+        if repetition_penalty != 1.0:
+            token_scores = scores.gather(dim=1, index=token_ids)
+            lt_zero_mask = token_scores.lt(0).float()
+            ge_zero_mask = lt_zero_mask.eq(0).float()
+            token_scores = lt_zero_mask * repetition_penalty * token_scores + ge_zero_mask / repetition_penalty * token_scores
+            scores.scatter_(dim=1, index=token_ids, src=token_scores)
+
+        if do_sample:
+            if temperature > 0 and temperature != 1:
+                scores = scores / temperature
+
+            # 多召回一个防止eos
+            scores = top_k_top_p_filtering(scores, top_k, top_p, min_tokens_to_keep=num_beams + 1)
+            probs = F.softmax(scores, dim=-1)
+
+            # 保证至少有一个不是eos的值
+            _tokens = torch.multinomial(probs, num_samples=num_beams + 1)  # batch_size' x (num_beams+1)
+
+            logits = probs.log()
+            # 防止全是这个beam的被选中了，且需要考虑eos被选择的情况
+            _scores = logits.gather(dim=1, index=_tokens)  # batch_size' x (num_beams+1)
+            _scores = _scores + beam_scores[:, None]  # batch_size' x (num_beams+1)
+            # 从这里面再选择top的2*num_beam个
+            _scores = _scores.view(batch_size, num_beams * (num_beams + 1))
+            next_scores, ids = _scores.topk(2 * num_beams, dim=1, largest=True, sorted=True)
+            _tokens = _tokens.view(batch_size, num_beams * (num_beams + 1))
+            next_tokens = _tokens.gather(dim=1, index=ids)  # (batch_size, 2*num_beams)
+            from_which_beam = ids // (num_beams + 1)  # (batch_size, 2*num_beams)
+        else:
+            scores = F.log_softmax(scores, dim=-1)  # (batch_size * num_beams, vocab_size)
+            _scores = scores + beam_scores[:, None]  # (batch_size * num_beams, vocab_size)
+            _scores = _scores.view(batch_size, -1)  # (batch_size, num_beams*vocab_size)
+            next_scores, ids = torch.topk(_scores, 2 * num_beams, dim=1, largest=True, sorted=True)
+            from_which_beam = ids // vocab_size  # (batch_size, 2*num_beams)
+            next_tokens = ids % vocab_size  # (batch_size, 2*num_beams)
+
+        #  接下来需要组装下一个batch的结果。
+        #  需要选定哪些留下来
+        next_scores, sorted_inds = next_scores.sort(dim=-1, descending=True)
+        next_tokens = next_tokens.gather(dim=1, index=sorted_inds)
+        from_which_beam = from_which_beam.gather(dim=1, index=sorted_inds)
+
+        not_eos_mask = next_tokens.ne(eos_token_id)  # 为1的地方不是eos
+        keep_mask = not_eos_mask.cumsum(dim=1).le(num_beams)  # 为1的地方需要保留
+        keep_mask = not_eos_mask.__and__(keep_mask)  # 为1的地方是需要进行下一步search的
+
+        _next_tokens = next_tokens.masked_select(keep_mask).view(-1, 1)
+        _from_which_beam = from_which_beam.masked_select(keep_mask).view(batch_size, num_beams)  # 上面的token是来自哪个beam
+        _next_scores = next_scores.masked_select(keep_mask).view(batch_size, num_beams)
+        beam_scores = _next_scores.view(-1)
+
+        # 更改past状态, 重组token_ids
+        reorder_inds = (batch_inds_with_numbeams_interval + _from_which_beam).view(-1)  # flatten成一维
+        decoder.reorder_past(reorder_inds, past)
+
+        flag = True
+        if cur_len + 1 == max_length:
+            eos_batch_idx = torch.arange(batch_size).to(next_tokens).repeat_interleave(repeats=num_beams, dim=0)
+            eos_beam_ind = torch.arange(num_beams).to(token_ids).repeat(batch_size)  # 表示的是indice
+            eos_beam_idx = from_which_beam[:, :num_beams].reshape(-1)  # 表示的是从哪个beam获取得到的
+        else:
+            # 将每个batch中在num_beam内的序列添加到结束中, 为1的地方需要结束了
+            effective_eos_mask = next_tokens[:, :num_beams].eq(eos_token_id)  # batch_size x num_beams
+            if effective_eos_mask.sum().gt(0):
+                eos_batch_idx, eos_beam_ind = effective_eos_mask.nonzero(as_tuple=True)
+                # 是由于from_which_beam是 (batch_size, 2*num_beams)的，所以需要2*num_beams
+                eos_beam_idx = eos_batch_idx * num_beams * 2 + eos_beam_ind
+                eos_beam_idx = from_which_beam.view(-1)[eos_beam_idx]  # 获取真实的从哪个beam获取的eos
+            else:
+                flag = False
+        if flag:
+            for batch_idx, beam_ind, beam_idx in zip(eos_batch_idx.tolist(), eos_beam_ind.tolist(),
+                                                     eos_beam_idx.tolist()):
+                if not dones[batch_idx]:
+                    score = next_scores[batch_idx, beam_ind].item()
+                    hypos[batch_idx].add(token_ids[batch_idx * num_beams + beam_idx, :cur_len].clone(), score)
+
+        # 重新组织token_ids的状态
+        tokens = _next_tokens
+        token_ids = torch.cat([token_ids.index_select(index=reorder_inds, dim=0), tokens], dim=-1)
+
+        for batch_idx in range(batch_size):
+            dones[batch_idx] = dones[batch_idx] or hypos[batch_idx].is_done(next_scores[batch_idx, 0].item())
+
+        cur_len += 1
+
+        if all(dones):
+            break
+
+    # select the best hypotheses
+    tgt_len = token_ids.new(batch_size)
+    best = []
+
+    for i, hypotheses in enumerate(hypos):
+        best_hyp = max(hypotheses.hyp, key=lambda x: x[0])[1]
+        tgt_len[i] = len(best_hyp) + 1  # +1 for the <EOS> symbol
+        best.append(best_hyp)
+
+    # generate target batch
+    decoded = token_ids.new(batch_size, tgt_len.max().item()).fill_(pad_token_id)
+    for i, hypo in enumerate(best):
+        decoded[i, :tgt_len[i] - 1] = hypo
+        if eos_token_id is not None:
+            decoded[i, tgt_len[i] - 1] = eos_token_id
+
+    return decoded
+
+
+class BeamHypotheses(object):
+    def __init__(self, num_beams, max_length, length_penalty, early_stopping):
+        """
+        Initialize n-best list of hypotheses.
+        """
+        self.max_length = max_length - 1  # ignoring bos_token
+        self.length_penalty = length_penalty
+        self.early_stopping = early_stopping
+        self.num_beams = num_beams
+        self.hyp = []
+        self.worst_score = 1e9
+
+    def __len__(self):
+        """
+        Number of hypotheses in the list.
+        """
+        return len(self.hyp)
+
+    def add(self, hyp, sum_logprobs):
+        """
+        Add a new hypothesis to the list.
+        """
+        score = sum_logprobs / len(hyp) ** self.length_penalty
+        if len(self) < self.num_beams or score > self.worst_score:
+            self.hyp.append((score, hyp))
+            if len(self) > self.num_beams:
+                sorted_scores = sorted([(s, idx) for idx, (s, _) in enumerate(self.hyp)])
+                del self.hyp[sorted_scores[0][1]]
+                self.worst_score = sorted_scores[1][0]
+            else:
+                self.worst_score = min(score, self.worst_score)
+
+    def is_done(self, best_sum_logprobs):
+        """
+        If there are enough hypotheses and that none of the hypotheses being generated
+        can become better than the worst one in the heap, then we are done with this sentence.
+        """
+        if len(self) < self.num_beams:
+            return False
+        elif self.early_stopping:
+            return True
+        else:
+            return self.worst_score >= best_sum_logprobs / self.max_length ** self.length_penalty
+
+
+def top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-float("Inf"), min_tokens_to_keep=1):
+    """
+    根据top_k, top_p的值，将不满足的值置为filter_value的值
+
+    :param torch.Tensor logits: bsz x vocab_size
+    :param int top_k: 如果大于0，则只保留最top_k的词汇的概率，剩下的位置被置为filter_value
+    :param int top_p: 根据(http://arxiv.org/abs/1904.09751)设置的筛选方式
+    :param float filter_value:
+    :param int min_tokens_to_keep: 每个sample返回的分布中有概率的词不会低于这个值
+    :return:
+    """
+    if top_k > 0:
+        top_k = min(max(top_k, min_tokens_to_keep), logits.size(-1))  # Safety check
+        # Remove all tokens with a probability less than the last token of the top-k
+        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
+        logits[indices_to_remove] = filter_value
+
+    if top_p < 1.0:
+        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+
+        # Remove tokens with cumulative probability above the threshold (token with 0 are kept)
+        sorted_indices_to_remove = cumulative_probs > top_p
+        if min_tokens_to_keep > 1:
+            # Keep at least min_tokens_to_keep (set to min_tokens_to_keep-1 because we add the first one below)
+            sorted_indices_to_remove[..., :min_tokens_to_keep] = 0
+        # Shift the indices to the right to keep also the first token above the threshold
+        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+        sorted_indices_to_remove[..., 0] = 0
+
+        # scatter sorted tensors to original indexing
+        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+        logits[indices_to_remove] = filter_value
+    return logits
diff --git a/fastNLP/modules/tokenizer/__init__.py b/fastNLP/modules/tokenizer/__init__.py
new file mode 100644
index 00000000..f3c4faae
--- /dev/null
+++ b/fastNLP/modules/tokenizer/__init__.py
@@ -0,0 +1,14 @@
+r"""
+
+"""
+__all__=[
+    'BertTokenizer',
+
+    "GPT2Tokenizer",
+
+    "RobertaTokenizer"
+]
+
+from .bert_tokenizer import BertTokenizer
+from .gpt2_tokenizer import GPT2Tokenizer
+from .roberta_tokenizer import RobertaTokenizer
\ No newline at end of file
diff --git a/fastNLP/modules/tokenizer/bert_tokenizer.py b/fastNLP/modules/tokenizer/bert_tokenizer.py
new file mode 100644
index 00000000..7df6b52d
--- /dev/null
+++ b/fastNLP/modules/tokenizer/bert_tokenizer.py
@@ -0,0 +1,447 @@
+r"""
+
+"""
+
+__all__ = [
+    'BertTokenizer'
+]
+
+import os
+import collections
+import unicodedata
+from ...core import logger
+from ..utils import _get_file_name_base_on_postfix
+from ...io.file_utils import _get_bert_dir
+
+VOCAB_NAME = 'vocab.txt'
+
+PRETRAINED_INIT_CONFIGURATION = {
+    "en": {"do_lower_case": False},
+    "en-base-uncased": {'do_lower_case': True},
+    'en-base-cased': {'do_lower_case':False},
+    "en-large-cased-wwm": {"do_lower_case": False},
+    'en-large-cased': {'do_lower_case':False},
+    'en-large-uncased': {'do_lower_case':True},
+    'en-large-uncased-wwm': {'do_lower_case':True},
+    'cn': {'do_lower_case':True},
+    'cn-base': {'do_lower_case': True},
+    'cn-wwm-ext': {'do_lower_case': True},
+    'multi-base-cased': {'do_lower_case': False},
+    'multi-base-uncased': {'do_lower_case': True},
+}
+
+def _is_control(char):
+    r"""Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    r"""Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if (((cp >= 33) and (cp <= 47)) or ((cp >= 58) and (cp <= 64)) or
+       ((cp >= 91) and (cp <= 96)) or ((cp >= 123) and (cp <= 126))):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
+
+
+def _is_whitespace(char):
+    r"""Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def whitespace_tokenize(text):
+    r"""Runs basic whitespace cleaning and splitting on a piece of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BasicTokenizer(object):
+    r"""Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self,
+                 do_lower_case=True,
+                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        r"""Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+        self.never_split = never_split
+
+    def tokenize(self, text):
+        r"""Tokenizes a piece of text."""
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in self.never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        r"""Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        r"""Splits punctuation on a piece of text."""
+        if text in self.never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        r"""Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        r"""Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if (((cp >= 0x4E00) and (cp <= 0x9FFF)) or  #
+            ((cp >= 0x3400) and (cp <= 0x4DBF)) or  #
+            ((cp >= 0x20000) and (cp <= 0x2A6DF)) or  #
+            ((cp >= 0x2A700) and (cp <= 0x2B73F)) or  #
+            ((cp >= 0x2B740) and (cp <= 0x2B81F)) or  #
+            ((cp >= 0x2B820) and (cp <= 0x2CEAF)) or
+            ((cp >= 0xF900) and (cp <= 0xFAFF)) or  #
+            ((cp >= 0x2F800) and (cp <= 0x2FA1F))):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        r"""Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+
+def load_vocab(vocab_file):
+    r"""Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        while True:
+            token = reader.readline()
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+
+class WordpieceTokenizer(object):
+    r"""Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        r"""Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        if len(output_tokens) == 0:  # 防止里面全是空格或者回车符号
+            return [self.unk_token]
+        return output_tokens
+
+
+class BertTokenizer(object):
+    r"""Runs end-to-end tokenization: punctuation splitting + wordpiece"""
+
+    def __init__(self, vocab_file, do_lower_case=True, max_len=None, do_basic_tokenize=True,
+                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        r"""Constructs a BertTokenizer.
+
+        Args:
+          vocab_file: Path to a one-wordpiece-per-line vocabulary file
+          do_lower_case: Whether to lower case the input
+                         Only has an effect when do_wordpiece_only=False
+          do_basic_tokenize: Whether to do basic tokenization before wordpiece.
+          max_len: An artificial maximum length to truncate tokenized sequences to;
+                         Effective maximum length is always the minimum of this
+                         value (if specified) and the underlying BERT model's
+                         sequence length.
+          never_split: List of tokens which will never be split during tokenization.
+                         Only has an effect when do_wordpiece_only=False
+        """
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+        self.do_basic_tokenize = do_basic_tokenize
+        if do_basic_tokenize:
+            self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case,
+                                                  never_split=never_split)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+        self.max_len = max_len if max_len is not None else int(1e12)
+
+    @property
+    def unk_index(self):
+        return self.vocab['[UNK]']
+
+    @property
+    def pad_index(self):
+        return self.vocab['[PAD]']
+
+    @property
+    def cls_index(self):
+        return self.vocab['[CLS]']
+
+    @property
+    def sep_index(self):
+        return self.vocab['[SEP]']
+
+    def _reinit_on_new_vocab(self, vocab):
+        r"""
+        在load bert之后，可能会对vocab进行重新排列。重新排列之后调用这个函数重新初始化与vocab相关的性质
+
+        :param vocab:
+        :return:
+        """
+        self.vocab = vocab
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+    def tokenize(self, text):
+        split_tokens = []
+        if self.do_basic_tokenize:
+            for token in self.basic_tokenizer.tokenize(text):
+                for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                    split_tokens.append(sub_token)
+        else:
+            split_tokens = self.wordpiece_tokenizer.tokenize(text)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        r"""Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            ids.append(self.vocab[token])
+        if len(ids) > self.max_len:
+            logger.warning(
+                "Token indices sequence length is longer than the specified maximum "
+                " sequence length for this BERT model ({} > {}). Running this"
+                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
+            )
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        r"""将token ids转换为一句话"""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return self._convert_tokens_to_string(tokens)
+
+    def _convert_tokens_to_string(self, tokens):
+        """ Converts a sequence of tokens (string) in a single string. """
+        out_string = " ".join(tokens).replace(" ##", "").strip()
+        return out_string
+
+    def save_vocabulary(self, vocab_path):
+        r"""Save the tokenizer vocabulary to a directory or file."""
+        index = 0
+        if os.path.isdir(vocab_path):
+            vocab_file = os.path.join(vocab_path, VOCAB_NAME)
+        else:
+            vocab_file = vocab_path
+        with open(vocab_file, "w", encoding="utf-8") as writer:
+            for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
+                if index != token_index:
+                    logger.warning("Saving vocabulary to {}: vocabulary indices are not consecutive."
+                                   " Please check that the vocabulary is not corrupted!".format(vocab_file))
+                    index = token_index
+                writer.write(token + u'\n')
+                index += 1
+        return vocab_file
+
+    @classmethod
+    def from_pretrained(cls, model_dir_or_name, *inputs, **kwargs):
+        r"""
+        给定模型的名字或者路径，直接读取vocab.
+        """
+        model_dir = _get_bert_dir(model_dir_or_name)
+        pretrained_model_name_or_path = _get_file_name_base_on_postfix(model_dir, '.txt')
+        logger.info("loading vocabulary file {}".format(pretrained_model_name_or_path))
+        max_len = 512
+        kwargs['max_len'] = min(kwargs.get('max_position_embeddings', int(1e12)), max_len)
+        # Instantiate tokenizer.
+        if 'do_lower_case' not in kwargs:
+            if model_dir_or_name in PRETRAINED_INIT_CONFIGURATION:
+                kwargs['do_lower_case'] = PRETRAINED_INIT_CONFIGURATION[model_dir_or_name]['do_lower_case']
+            else:
+                if 'case' in model_dir_or_name:
+                    kwargs['do_lower_case'] = False
+                elif 'uncase' in model_dir_or_name:
+                    kwargs['do_lower_case'] = True
+
+        tokenizer = cls(pretrained_model_name_or_path, *inputs, **kwargs)
+        return tokenizer
+
+    def encode(self, text, add_special_tokens=True):
+        """
+        给定text输入将数据encode为index的形式。
+
+        Example::
+
+            >>> from fastNLP.modules import BertTokenizer
+            >>> bert_tokenizer = BertTokenizer.from_pretrained('en')
+            >>> print(bert_tokenizer.encode('from'))
+            >>> print(bert_tokenizer.encode("This is a demo sentence"))
+            >>> print(bert_tokenizer.encode(["This", "is", 'a']))
+
+
+        :param List[str],str text: 输入的一条认为是一句话。
+        :param bool add_special_tokens: 是否保证句首和句尾是cls和sep。
+        :return:
+        """
+
+        word_pieces = []
+        if isinstance(text, str):
+            words = text.split()
+        elif isinstance(text, list):
+            words = text
+        else:
+            raise TypeError("Only support str or List[str]")
+        for word in words:
+            _words = self.basic_tokenizer._tokenize_chinese_chars(word).split()
+            tokens = []
+            for word in _words:
+                tokens.extend(self.wordpiece_tokenizer.tokenize(word))
+            word_piece_ids = self.convert_tokens_to_ids(tokens)
+            word_pieces.extend(word_piece_ids)
+        if add_special_tokens:
+            if word_pieces[0] != self.cls_index:
+                word_pieces.insert(0, self.cls_index)
+            if word_pieces[-1] != self.sep_index:
+                word_pieces.append(self.sep_index)
+        return word_pieces
diff --git a/fastNLP/modules/tokenizer/gpt2_tokenizer.py b/fastNLP/modules/tokenizer/gpt2_tokenizer.py
new file mode 100644
index 00000000..08675a23
--- /dev/null
+++ b/fastNLP/modules/tokenizer/gpt2_tokenizer.py
@@ -0,0 +1,758 @@
+r"""undocumented
+这个页面的代码很大程度上参考(复制粘贴)了https://github.com/huggingface/pytorch-pretrained-BERT的代码， 如果你发现该代码对你
+    有用，也请引用一下他们。
+"""
+
+__all__ = [
+    'GPT2Tokenizer'
+]
+
+from functools import lru_cache
+import json
+import regex as re
+import itertools
+
+
+from ...io.file_utils import _get_gpt2_dir
+from ...core import logger
+from ..utils import _get_file_name_base_on_postfix
+
+
+import os
+
+PRETRAINED_GPT2_MODEL_DIR = PRETRAINED_BERT_MODEL_DIR = {
+    'en-small': 'gpt2-small.zip',
+    'en-median': 'gpt2-medium.zip',
+    'en': 'gpt2-medium.zip'
+}
+
+
+@lru_cache()
+def bytes_to_unicode():
+    """
+    Returns list of utf-8 byte and a mapping to unicode strings.
+    We specifically avoids mapping to whitespace/control characters the bpe code barfs on.
+
+    The reversible bpe codes work on unicode strings.
+    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
+    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
+    This is a signficant percentage of your normal, say, 32K bpe vocab.
+    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
+    """
+    bs = (
+        list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1))
+    )
+    cs = bs[:]
+    n = 0
+    for b in range(2 ** 8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2 ** 8 + n)
+            n += 1
+    cs = [chr(n) for n in cs]
+    return dict(zip(bs, cs))
+
+
+def get_pairs(word):
+    """Return set of symbol pairs in a word.
+
+    Word is represented as tuple of symbols (symbols being variable-length strings).
+    """
+    pairs = set()
+    prev_char = word[0]
+    for char in word[1:]:
+        pairs.add((prev_char, char))
+        prev_char = char
+    return pairs
+
+
+VOCAB_FILES_NAMES = {
+    "vocab_file": "vocab.json",
+    "merges_file": "merges.txt",
+}
+
+
+PRETRAINED_VOCAB_FILES_MAP = {
+    "vocab_file": {
+        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
+        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json",
+        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json",
+        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json",
+        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json",
+    },
+    "merges_file": {
+        "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
+        "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt",
+        "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt",
+        "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt",
+        "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt",
+    },
+}
+
+
+PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
+    "en-small": 1024,
+    'en': 1024,
+    "en-medium": 1024,
+    "en-large": 1024,
+    "en-xl": 1024,
+    "en-distilgpt2": 1024,
+}
+
+PATTERN = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""")
+
+
+def gpt2_tokenize(text, add_prefix_space=True):
+    """
+
+    :param str text:
+    :param bool add_prefix_space: 是否在句子前面加上space，如果加上才能保证与GPT2训练时一致
+    :return: []
+    """
+    if text is '':
+        return []
+    if add_prefix_space:
+        text = ' ' + text
+    tokens = []
+    for token in re.findall(PATTERN, text):
+        tokens.append(token)
+    return tokens
+
+
+class GPT2Tokenizer:
+    """
+    GPT-2 BPE tokenizer. Peculiarities:
+        - Byte-level Byte-Pair-Encoding
+        - Requires a space to start the input string => the encoding and tokenize methods should be called with the
+          ``add_prefix_space`` flag set to ``True``.
+          Otherwise, this tokenizer's ``encode``, ``decode``, and ``tokenize`` methods will not conserve
+          the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"`
+    """
+
+    vocab_files_names = VOCAB_FILES_NAMES
+    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+
+    SPECIAL_TOKENS_ATTRIBUTES = [
+        "bos_token",
+        "eos_token",
+        "unk_token",
+        "pad_token",
+        "cls_token",
+        "mask_token",
+        "sep_token",
+    ]
+
+    padding_side = "right"
+
+    def __init__(
+        self,
+        vocab_file,
+        merges_file,
+        errors="replace",
+        unk_token="<|endoftext|>",
+        bos_token="<|endoftext|>",
+        eos_token="<|endoftext|>",
+        **kwargs
+    ):
+        self._bos_token = None
+        self._eos_token = None
+        self._unk_token = None
+        self._sep_token = None
+        self._pad_token = None
+        self._cls_token = None
+        self._mask_token = None
+        self._pad_token_type_id = 0
+
+        self.bos_token = bos_token
+        self.eos_token = eos_token
+        self.unk_token = unk_token
+
+        self.max_len = int(1e12)
+        self.padding_side = kwargs.pop("padding_side", self.padding_side)
+        self.added_tokens_encoder = {}
+        self.unique_added_tokens_encoder = set()
+        self.added_tokens_decoder = {}
+        # inputs and kwargs for saving and re-loading (see ``from_pretrained`` and ``save_pretrained``)
+        self.init_inputs = ()
+        self.init_kwargs = {}
+
+        for key, value in kwargs.items():
+            if key in self.SPECIAL_TOKENS_ATTRIBUTES:
+                if key == "additional_special_tokens":
+                    assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
+                else:
+                    assert isinstance(value, str)
+                setattr(self, key, value)
+
+        self.max_len_single_sentence = (
+            self.max_len
+        )  # no default special tokens - you can update this value if you add special tokens
+        self.max_len_sentences_pair = (
+            self.max_len
+        )  # no default special tokens - you can update this value if you add special tokens
+
+        with open(vocab_file, encoding="utf-8") as vocab_handle:
+            self.encoder = json.load(vocab_handle)
+        self.decoder = {v: k for k, v in self.encoder.items()}
+        self.errors = errors  # how to handle errors in decoding
+        self.byte_encoder = bytes_to_unicode()
+        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
+        with open(merges_file, encoding="utf-8") as merges_handle:
+            bpe_merges = merges_handle.read().split("\n")[1:-1]
+        bpe_merges = [tuple(merge.split()) for merge in bpe_merges]
+        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
+        self.cache = {}
+
+    def _reinit_on_new_vocab(self, vocab):
+        self.encoder = {k:v for k,v in vocab.items()}
+        self.decoder = {v:k for k,v in vocab.items()}
+        self.cache = {}
+
+    @property
+    def bos_token(self):
+        """ Beginning of sentence token (string). Log an error if used while not having been set. """
+        if self._bos_token is None:
+            logger.error("Using bos_token, but it is not set yet.")
+        return self._bos_token
+
+    @property
+    def eos_token(self):
+        """ End of sentence token (string). Log an error if used while not having been set. """
+        if self._eos_token is None:
+            logger.error("Using eos_token, but it is not set yet.")
+        return self._eos_token
+
+    @property
+    def unk_token(self):
+        """ Unknown token (string). Log an error if used while not having been set. """
+        if self._unk_token is None:
+            logger.error("Using unk_token, but it is not set yet.")
+        return self._unk_token
+
+    @property
+    def pad_token(self):
+        """ Padding token (string). Log an error if used while not having been set. """
+        if self._pad_token is None:
+            logger.error("Using pad_token, but it is not set yet.")
+        return self._pad_token
+
+    @property
+    def cls_token(self):
+        """ Classification token (string). E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
+        if self._cls_token is None:
+            logger.error("Using cls_token, but it is not set yet.")
+        return self._cls_token
+
+    @property
+    def sep_token(self):
+        if self._sep_token is None:
+            logger.error("Using sep_token, but it is not set yet.")
+        return self._sep_token
+
+    @property
+    def mask_token(self):
+        """ Mask token (string). E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
+        if self._mask_token is None:
+            logger.error("Using mask_token, but it is not set yet.")
+        return self._mask_token
+
+    @bos_token.setter
+    def bos_token(self, value):
+        self._bos_token = value
+
+    @eos_token.setter
+    def eos_token(self, value):
+        self._eos_token = value
+
+    @unk_token.setter
+    def unk_token(self, value):
+        self._unk_token = value
+
+    @pad_token.setter
+    def pad_token(self, value):
+        self._pad_token = value
+
+    @cls_token.setter
+    def cls_token(self, value):
+        self._cls_token = value
+
+    @sep_token.setter
+    def sep_token(self, value):
+        self._sep_token = value
+
+    @mask_token.setter
+    def mask_token(self, value):
+        self._mask_token = value
+
+    @property
+    def bos_index(self):
+        """ Id of the beginning of sentence token in the vocabulary. Log an error if used while not having been set. """
+        return self.convert_tokens_to_ids(self.bos_token)
+
+    @property
+    def sep_index(self):
+        return self.convert_tokens_to_ids(self.sep_token)
+
+    @property
+    def eos_index(self):
+        """ Id of the end of sentence token in the vocabulary. Log an error if used while not having been set. """
+        return self.convert_tokens_to_ids(self.eos_token)
+
+    @property
+    def unk_index(self):
+        """ Id of the unknown token in the vocabulary. Log an error if used while not having been set. """
+        return self.convert_tokens_to_ids(self.unk_token)
+
+    @property
+    def pad_index(self):
+        """ Id of the padding token in the vocabulary. Log an error if used while not having been set. """
+        return self.convert_tokens_to_ids(self.pad_token)
+
+    @property
+    def pad_token_type_id(self):
+        """ Id of the padding token type in the vocabulary."""
+        return self._pad_token_type_id
+
+    @property
+    def cls_index(self):
+        """ Id of the classification token in the vocabulary. E.g. to extract a summary of an input sequence leveraging self-attention along the full depth of the model. Log an error if used while not having been set. """
+        return self.convert_tokens_to_ids(self.cls_token)
+
+    @property
+    def mask_index(self):
+        """ Id of the mask token in the vocabulary. E.g. when training a model with masked-language modeling. Log an error if used while not having been set. """
+        return self.convert_tokens_to_ids(self.mask_token)
+
+    @property
+    def vocab_size(self):
+        return len(self.encoder)
+
+    def bpe(self, token):
+        # 如果token没有找到，会被拆分成字母返回
+        if token in self.cache:
+            return self.cache[token]
+        word = tuple(token)
+        pairs = get_pairs(word)  # 如果word是abcd，则((a,b), (b,c), (c, d), (e,f))
+
+        if not pairs:
+            return token
+
+        while True:
+            # 首先找到最常的pair
+            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
+            if bigram not in self.bpe_ranks:
+                break
+            first, second = bigram
+            new_word = []
+            i = 0
+            while i < len(word):
+                try:
+                    j = word.index(first, i)
+                except ValueError:
+                    new_word.extend(word[i:])
+                    break
+                else:
+                    new_word.extend(word[i:j])  #最先找的
+                    i = j
+
+                if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
+                    new_word.append(first + second)
+                    i += 2
+                else:
+                    new_word.append(word[i])
+                    i += 1
+            new_word = tuple(new_word)
+            word = new_word
+            if len(word) == 1:
+                break
+            else:
+                pairs = get_pairs(word)
+        word = " ".join(word)
+        self.cache[token] = word
+        return word
+
+    def _tokenize(self, text, add_prefix_space=False):
+        """ Tokenize a string.
+            Args:
+                - add_prefix_space (boolean, default False):
+                    Begin the sentence with at least one space to get invariance to word order in GPT-2 (and RoBERTa) tokenizers.
+        """
+        bpe_tokens = []
+        for token in gpt2_tokenize(text, add_prefix_space=add_prefix_space):
+            token = "".join(
+                self.byte_encoder[b] for b in token.encode("utf-8")
+            )  # Maps all our bytes to unicode strings, avoiding controle tokens of the BPE (spaces in our case)
+            bpe_tokens.extend(bpe_token for bpe_token in self.bpe(token).split(" "))
+        return bpe_tokens
+
+    def _convert_token_to_id(self, token):
+        """ Converts a token (str) in an id using the vocab. """
+        return self.encoder.get(token, self.encoder.get(self.unk_token))
+
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        return self.decoder.get(index)
+
+    def convert_tokens_to_string(self, tokens):
+        """ Converts a sequence of tokens (string) in a single string. """
+        text = "".join(tokens)
+        text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
+        return text
+
+    def save_vocabulary(self, save_directory):
+        """Save the tokenizer vocabulary and merge files to a directory."""
+        if not os.path.isdir(save_directory):
+            logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
+            return
+        vocab_file = os.path.join(save_directory, VOCAB_FILES_NAMES["vocab_file"])
+        merge_file = os.path.join(save_directory, VOCAB_FILES_NAMES["merges_file"])
+
+        with open(vocab_file, "w", encoding="utf-8") as f:
+            f.write(json.dumps(self.encoder, ensure_ascii=False))
+
+        index = 0
+        with open(merge_file, "w", encoding="utf-8") as writer:
+            writer.write("#version: 0.2\n")
+            for bpe_tokens, token_index in sorted(self.bpe_ranks.items(), key=lambda kv: kv[1]):
+                if index != token_index:
+                    logger.warning(
+                        "Saving vocabulary to {}: BPE merge indices are not consecutive."
+                        " Please check that the tokenizer is not corrupted!".format(merge_file)
+                    )
+                    index = token_index
+                writer.write(" ".join(bpe_tokens) + "\n")
+                index += 1
+
+        return vocab_file, merge_file
+
+    @classmethod
+    def from_pretrained(cls, model_dir_or_name):
+        r"""
+        """
+        return cls._from_pretrained(model_dir_or_name)
+
+    # 将它修改一定传入文件夹
+    @classmethod
+    def _from_pretrained(cls, model_dir_or_name):
+        """
+
+        :param str model_dir_or_name: 目录或者缩写名
+        :param init_inputs:
+        :param kwargs:
+        :return:
+        """
+        # 它需要两个文件，第一个是vocab.json，第二个是merge_file?
+        model_dir = _get_gpt2_dir(model_dir_or_name)
+        # 里面会包含四个文件vocab.json, merge.txt, config.json, model.bin
+
+        tokenizer_config_file = _get_file_name_base_on_postfix(model_dir, 'config.json')
+        with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
+            init_kwargs = json.load(tokenizer_config_handle)
+        if 'max_len' not in init_kwargs:
+            init_kwargs['max_len'] = 1024
+        # Set max length if needed
+        if model_dir_or_name in PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES:
+            # if we're using a pretrained model, ensure the tokenizer
+            # wont index sequences longer than the number of positional embeddings
+            max_len = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES[model_dir_or_name]
+            if max_len is not None and isinstance(max_len, (int, float)):
+                init_kwargs["max_len"] = min(init_kwargs.get("max_len", int(1e12)), max_len)
+
+        # 将vocab, merge加入到init_kwargs中
+        init_kwargs['vocab_file'] = _get_file_name_base_on_postfix(model_dir, 'vocab.json')
+        init_kwargs['merges_file'] = _get_file_name_base_on_postfix(model_dir, 'merges.txt')
+
+        init_inputs = init_kwargs.pop("init_inputs", ())
+        # Instantiate tokenizer.
+        try:
+            tokenizer = cls(*init_inputs, **init_kwargs)
+        except OSError:
+            OSError(
+                "Unable to load vocabulary from file. "
+                "Please check that the provided vocabulary is accessible and not corrupted."
+            )
+
+        return tokenizer
+
+    def __len__(self):
+        """ Size of the full vocabulary with the added tokens """
+        return self.vocab_size + len(self.added_tokens_encoder)
+
+    def tokenize(self, text, add_prefix_space=True):
+        """ Converts a string in a sequence of tokens (string), using the tokenizer.
+            Split in words for word-based vocabulary or sub-words for sub-word-based
+            vocabularies (BPE/SentencePieces/WordPieces).
+
+            Take care of added tokens.
+            Args:
+                - text: The sequence to be encoded.
+                - add_prefix_space (boolean, default True):
+                    Begin the sentence with at least one space to get invariance to word order in GPT-2 (and RoBERTa) tokenizers.
+        """
+        all_special_tokens = self.all_special_tokens
+
+        def lowercase_text(t):
+            # convert non-special tokens to lowercase
+            escaped_special_toks = [re.escape(s_tok) for s_tok in all_special_tokens]
+            pattern = r'(' + r'|'.join(escaped_special_toks) + r')|' + \
+                      r'(.+?)'
+            return re.sub(
+                pattern,
+                lambda m: m.groups()[0] or m.groups()[1].lower(),
+                t)
+
+        if self.init_kwargs.get('do_lower_case', False):
+            text = lowercase_text(text)
+
+        def split_on_token(tok, text):
+            result = []
+            split_text = text.split(tok)
+            for i, sub_text in enumerate(split_text):
+                sub_text = sub_text.strip()
+                if i == 0 and not sub_text:
+                    result += [tok]
+                elif i == len(split_text) - 1:
+                    if sub_text:
+                        result += [sub_text]
+                    else:
+                        pass
+                else:
+                    if sub_text:
+                        result += [sub_text]
+                    result += [tok]
+            return result
+
+        def split_on_tokens(tok_list, text):
+            if not text.strip():
+                return []
+            if not tok_list:
+                return self._tokenize(text, add_prefix_space=add_prefix_space)
+
+            tokenized_text = []
+            text_list = [text]
+            for tok in tok_list:
+                tokenized_text = []
+                for sub_text in text_list:
+                    if sub_text not in self.added_tokens_encoder \
+                            and sub_text not in all_special_tokens:
+                        tokenized_text += split_on_token(tok, sub_text)
+                    else:
+                        tokenized_text += [sub_text]
+                text_list = tokenized_text
+
+            return list(itertools.chain.from_iterable((self._tokenize(token, add_prefix_space=add_prefix_space) if token not \
+                                                                                          in self.added_tokens_encoder and token not in all_special_tokens \
+                                                           else [token] for token in tokenized_text)))
+
+        added_tokens = list(self.added_tokens_encoder.keys()) + all_special_tokens
+        tokenized_text = split_on_tokens(added_tokens, text)
+        return tokenized_text
+
+    def convert_tokens_to_ids(self, tokens):
+        """ Converts a single token, or a sequence of tokens, (str) in a single integer id
+            (resp. a sequence of ids), using the vocabulary.
+        """
+        if tokens is None:
+            return None
+
+        if isinstance(tokens, str):
+            return self._convert_token_to_id_with_added_voc(tokens)
+
+        ids = []
+        for token in tokens:
+            ids.append(self._convert_token_to_id_with_added_voc(token))
+        return ids
+
+    def _convert_token_to_id_with_added_voc(self, token):
+        if token is None:
+            return None
+
+        if token in self.added_tokens_encoder:
+            return self.added_tokens_encoder[token]
+        return self._convert_token_to_id(token)
+
+    def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
+        """ Converts a single index or a sequence of indices (integers) in a token "
+            (resp.) a sequence of tokens (str), using the vocabulary and added tokens.
+
+            Args:
+                skip_special_tokens: Don't decode special tokens (self.all_special_tokens). Default: False
+        """
+        if isinstance(ids, int):
+            return self._convert_id_to_token(ids)
+        tokens = []
+        for index in ids:
+            index = int(index)
+            if skip_special_tokens and index in self.all_special_ids:
+                continue
+            tokens.append(self._convert_id_to_token(index))
+        return tokens
+
+    def convert_id_to_tokens(self, token_ids, skip_special_tokens=False, clean_up_tokenization_spaces=True):
+        """
+        Converts a sequence of ids (integer) in a string, using the tokenizer and vocabulary
+        with options to remove special tokens and clean up tokenization spaces.
+        Similar to doing ``self.convert_tokens_to_string(self.convert_ids_to_tokens(token_ids))``.
+
+        Args:
+            token_ids: list of tokenized input ids. Can be obtained using the `encode` or `encode_plus` methods.
+            skip_special_tokens: if set to True, will replace special tokens.
+            clean_up_tokenization_spaces: if set to True, will clean up the tokenization spaces.
+        """
+        filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
+
+        # To avoid mixing byte-level and unicode for byte-level BPT
+        # we need to build string separatly for added tokens and byte-level tokens
+        # cf. https://github.com/huggingface/transformers/issues/1133
+        sub_texts = []
+        current_sub_text = []
+        for token in filtered_tokens:
+            if skip_special_tokens and token in self.all_special_ids:
+                continue
+            if token in self.added_tokens_encoder:
+                if current_sub_text:
+                    sub_texts.append(self.convert_tokens_to_string(current_sub_text))
+                    current_sub_text = []
+                sub_texts.append(token)
+            else:
+                current_sub_text.append(token)
+        if current_sub_text:
+            sub_texts.append(self.convert_tokens_to_string(current_sub_text))
+        text = " ".join(sub_texts)
+
+        if clean_up_tokenization_spaces:
+            clean_text = self.clean_up_tokenization(text)
+            return clean_text
+        else:
+            return text
+
+    @property
+    def special_tokens_map(self):
+        """ A dictionary mapping special token class attribute (cls_token, unk_token...) to their
+            values ('<unk>', '<cls>'...)
+        """
+        set_attr = {}
+        for attr in self.SPECIAL_TOKENS_ATTRIBUTES:
+            attr_value = getattr(self, "_" + attr)
+            if attr_value:
+                set_attr[attr] = attr_value
+        return set_attr
+
+    @property
+    def all_special_tokens(self):
+        """ List all the special tokens ('<unk>', '<cls>'...) mapped to class attributes
+            (cls_token, unk_token...).
+        """
+        all_toks = []
+        set_attr = self.special_tokens_map
+        for attr_value in set_attr.values():
+            all_toks = all_toks + (list(attr_value) if isinstance(attr_value, (list, tuple)) else [attr_value])
+        all_toks = list(set(all_toks))
+        return all_toks
+
+    @property
+    def all_special_ids(self):
+        """ List the vocabulary indices of the special tokens ('<unk>', '<cls>'...) mapped to
+            class attributes (cls_token, unk_token...).
+        """
+        all_toks = self.all_special_tokens
+        all_ids = self.convert_tokens_to_ids(all_toks)
+        return all_ids
+
+    @staticmethod
+    def clean_up_tokenization(out_string):
+        """ Clean up a list of simple English tokenization artifacts like spaces before punctuations and abreviated forms.
+        """
+        out_string = (
+            out_string.replace(" .", ".")
+            .replace(" ?", "?")
+            .replace(" !", "!")
+            .replace(" ,", ",")
+            .replace(" ' ", "'")
+            .replace(" n't", "n't")
+            .replace(" 'm", "'m")
+            .replace(" do not", " don't")
+            .replace(" 's", "'s")
+            .replace(" 've", "'ve")
+            .replace(" 're", "'re")
+        )
+        return out_string
+
+    def encode(self, text, add_special_tokens=False, add_prefix_space=True):
+        """
+        给定text输入将数据encode为index的形式。
+
+        Example::
+
+            >>> from fastNLP.modules import GPT2Tokenizer
+            >>> gpt2_tokenizer = GPT2Tokenizer.from_pretrained('en')
+            >>> print(gpt2_tokenizer.encode('from'))
+            >>> print(gpt2_tokenizer.encode("This is a demo sentence"))
+            >>> print(gpt2_tokenizer.encode(["This", "is", 'a']))
+
+
+        :param List[str],str text: 输入的一条认为是一句话。
+        :param bool add_special_tokens: 是否保证句首和句尾是cls和sep。GPT2没有cls和sep这一说
+        :return:
+        """
+        if isinstance(text, str):
+            words = text.split()
+        elif isinstance(text, list):
+            words = text
+        else:
+            raise TypeError("Only support str or List[str]")
+
+        word_pieces = []
+        for word in words:
+            tokens = self.tokenize(word, add_prefix_space=add_prefix_space)
+            word_piece_ids = self.convert_tokens_to_ids(tokens)
+            word_pieces.extend(word_piece_ids)
+        if add_special_tokens:
+            if self._cls_token is not None and word_pieces[0] != self.cls_index:
+                word_pieces.insert(0, self.cls_index)
+            if self._sep_token is not None and word_pieces[-1] != self.sep_index:
+                word_pieces.append(self.eos_index)
+        return word_pieces
+
+    def get_used_merge_pair_vocab(self, token):
+        # 如果token没有找到，会被拆分成字母返回  TODO need comment
+        used_pairs = {}
+        word = tuple(token)
+        pairs = get_pairs(word)  # 如果word是abcd，则((a,b), (b,c), (c, d), (e,f))
+
+        if not pairs:
+            return token, used_pairs
+
+        while True:
+            # 首先找到最常的pair
+            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
+            if bigram not in self.bpe_ranks:
+                break
+            used_pairs[bigram] = self.bpe_ranks[bigram]
+            first, second = bigram
+            new_word = []
+            i = 0
+            while i < len(word):
+                try:
+                    j = word.index(first, i)
+                except ValueError:
+                    new_word.extend(word[i:])
+                    break
+                else:
+                    new_word.extend(word[i:j])  #最先找的
+                    i = j
+
+                if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
+                    new_word.append(first + second)
+                    i += 2
+                else:
+                    new_word.append(word[i])
+                    i += 1
+            new_word = tuple(new_word)
+            word = new_word
+            if len(word) == 1:
+                break
+            else:
+                pairs = get_pairs(word)
+        word = " ".join(word)
+        return word, used_pairs
\ No newline at end of file
diff --git a/fastNLP/modules/tokenizer/roberta_tokenizer.py b/fastNLP/modules/tokenizer/roberta_tokenizer.py
new file mode 100644
index 00000000..ee2e5e97
--- /dev/null
+++ b/fastNLP/modules/tokenizer/roberta_tokenizer.py
@@ -0,0 +1,102 @@
+r"""
+
+"""
+
+__all__ = [
+    "RobertaTokenizer"
+]
+
+import json
+from .gpt2_tokenizer import GPT2Tokenizer
+from ..utils import _get_file_name_base_on_postfix
+from ...io.file_utils import _get_roberta_dir
+
+PRETRAINED_ROBERTA_POSITIONAL_EMBEDDINGS_SIZES = {
+    "roberta-base": 512,
+    "roberta-large": 512,
+    "roberta-large-mnli": 512,
+    "distilroberta-base": 512,
+    "roberta-base-openai-detector": 512,
+    "roberta-large-openai-detector": 512,
+}
+
+
+class RobertaTokenizer(GPT2Tokenizer):
+
+    vocab_files_names = {
+        "vocab_file": "vocab.json",
+        "merges_file": "merges.txt",
+    }
+
+    def __init__(
+        self,
+        vocab_file,
+        merges_file,
+        errors="replace",
+        bos_token="<s>",
+        eos_token="</s>",
+        sep_token="</s>",
+        cls_token="<s>",
+        unk_token="<unk>",
+        pad_token="<pad>",
+        mask_token="<mask>",
+        **kwargs
+    ):
+        super().__init__(
+            vocab_file=vocab_file,
+            merges_file=merges_file,
+            errors=errors,
+            bos_token=bos_token,
+            eos_token=eos_token,
+            unk_token=unk_token,
+            sep_token=sep_token,
+            cls_token=cls_token,
+            pad_token=pad_token,
+            mask_token=mask_token,
+            **kwargs,
+        )
+        self.max_len_single_sentence = self.max_len - 2  # take into account special tokens
+        self.max_len_sentences_pair = self.max_len - 4  # take into account special tokens
+
+    @classmethod
+    def from_pretrained(cls, model_dir_or_name, *inputs, **kwargs):
+        """
+
+        :param str model_dir_or_name: 目录或者缩写名
+        :param kwargs:
+        :return:
+        """
+        # 它需要两个文件，第一个是vocab.json，第二个是merge_file?
+        model_dir = _get_roberta_dir(model_dir_or_name)
+        # 里面会包含四个文件vocab.json, merge.txt, config.json, model.bin
+
+        tokenizer_config_file = _get_file_name_base_on_postfix(model_dir, 'config.json')
+        with open(tokenizer_config_file, encoding="utf-8") as tokenizer_config_handle:
+            init_kwargs = json.load(tokenizer_config_handle)
+        # Set max length if needed
+        if model_dir_or_name in PRETRAINED_ROBERTA_POSITIONAL_EMBEDDINGS_SIZES:
+            # if we're using a pretrained model, ensure the tokenizer
+            # wont index sequences longer than the number of positional embeddings
+            max_len = PRETRAINED_ROBERTA_POSITIONAL_EMBEDDINGS_SIZES[model_dir_or_name]
+            if max_len is not None and isinstance(max_len, (int, float)):
+                init_kwargs["max_len"] = min(init_kwargs.get("max_len", int(1e12)), max_len)
+
+        # 将vocab, merge加入到init_kwargs中
+        if 'vocab_file' in kwargs:  # 如果指定了词表则用指定词表
+            init_kwargs['vocab_file'] = kwargs['vocab_file']
+        else:
+            init_kwargs['vocab_file'] = _get_file_name_base_on_postfix(model_dir, RobertaTokenizer.vocab_files_names['vocab_file'])
+        init_kwargs['merges_file'] = _get_file_name_base_on_postfix(model_dir, RobertaTokenizer.vocab_files_names['merges_file'])
+
+        init_inputs = init_kwargs.pop("init_inputs", ())
+        # Instantiate tokenizer.
+        try:
+            tokenizer = cls(*init_inputs, **init_kwargs)
+        except OSError:
+            OSError(
+                "Unable to load vocabulary from file. "
+                "Please check that the provided vocabulary is accessible and not corrupted."
+            )
+
+        return tokenizer
+
diff --git a/fastNLP/modules/utils.py b/fastNLP/modules/utils.py
index 79e2a7de..061cd8ae 100644
--- a/fastNLP/modules/utils.py
+++ b/fastNLP/modules/utils.py
@@ -144,18 +144,8 @@ def _get_file_name_base_on_postfix(dir_path, postfix):
     """
     files = list(filter(lambda filename: filename.endswith(postfix), os.listdir(os.path.join(dir_path))))
     if len(files) == 0:
-        raise FileNotFoundError(f"There is no file endswith *{postfix} file in {dir_path}")
+        raise FileNotFoundError(f"There is no file endswith {postfix} file in {dir_path}")
     elif len(files) > 1:
         raise FileExistsError(f"There are multiple *{postfix} files in {dir_path}")
     return os.path.join(dir_path, files[0])
 
-
-def create_position_ids_from_input_ids(input_ids, padding_idx=0):
-    r""" Replace non-padding symbols with their position numbers. Position numbers begin at
-    padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
-    `utils.make_positions`.
-    """
-    # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
-    mask = input_ids.ne(padding_idx).int()
-    incremental_indicies = torch.cumsum(mask, dim=1).type_as(mask) * mask
-    return incremental_indicies.long() + padding_idx
diff --git a/reproduction/Summarization/Baseline/train_origin.py b/reproduction/Summarization/Baseline/train_origin.py
index 36a2b716..7c4d2f12 100644
--- a/reproduction/Summarization/Baseline/train_origin.py
+++ b/reproduction/Summarization/Baseline/train_origin.py
@@ -687,16 +687,16 @@ def main():
     if hps.mode == 'train':
         trainset = dataInfo.datasets["train"]
         train_sampler = BucketSampler(batch_size=hps.batch_size, seq_len_field_name=Const.INPUT)
-        train_batch = DataSetIter(batch_size=hps.batch_size, dataset=trainset, sampler=train_sampler)
+        train_batch = DataSetIter(dataset=trainset, batch_size=hps.batch_size, sampler=train_sampler)
         validset = dataInfo.datasets["valid"]
         validset.set_input("text", "summary")
-        valid_batch = DataSetIter(batch_size=hps.batch_size, dataset=validset)
+        valid_batch = DataSetIter(dataset=validset, batch_size=hps.batch_size)
         setup_training(model, train_batch, valid_batch, hps)
     elif hps.mode == 'test':
         logger.info("[INFO] Decoding...")
         testset = dataInfo.datasets["test"]
         testset.set_input("text", "summary")
-        test_batch = DataSetIter(batch_size=hps.batch_size, dataset=testset)
+        test_batch = DataSetIter(dataset=testset, batch_size=hps.batch_size)
         run_test(model, test_batch, hps, limited=hps.limited)
     else:
         logger.error("The 'mode' flag must be one of train/eval/test")
diff --git a/reproduction/multi-criteria-cws/main.py b/reproduction/multi-criteria-cws/main.py
index 049a1974..8ee1f81e 100644
--- a/reproduction/multi-criteria-cws/main.py
+++ b/reproduction/multi-criteria-cws/main.py
@@ -406,18 +406,8 @@ if not options.test:
     logger.info("Number training instances: {}".format(len(train_set)))
     logger.info("Number dev instances: {}".format(len(dev_set)))
 
-    train_batch = DataSetIter(
-        batch_size=options.batch_size,
-        dataset=train_set,
-        sampler=train_sampler,
-        num_workers=4,
-    )
-    dev_batch = DataSetIter(
-        batch_size=options.batch_size,
-        dataset=dev_set,
-        sampler=dev_sampler,
-        num_workers=4,
-    )
+    train_batch = DataSetIter(dataset=train_set, batch_size=options.batch_size, sampler=train_sampler, num_workers=4)
+    dev_batch = DataSetIter(dataset=dev_set, batch_size=options.batch_size, sampler=dev_sampler, num_workers=4)
 
     best_f1 = 0.0
     for epoch in range(int(options.num_epochs)):
diff --git a/test/core/test_batch.py b/test/core/test_batch.py
index 18cbf59d..6a340d36 100644
--- a/test/core/test_batch.py
+++ b/test/core/test_batch.py
@@ -279,7 +279,7 @@ class TestCase1(unittest.TestCase):
 
         data.add_collate_fn(concat_collate_fn)
 
-        for batch_x, batch_y in DataSetIter(data, sampler=SequentialSampler(), batch_size=2):
+        for batch_x, batch_y in DataSetIter(data, batch_size=2, sampler=SequentialSampler()):
             print("batch_x:", batch_x)
             print("batch_y:", batch_y)
             # batch_x: {'x': tensor([[0, 1, 3, 0],
@@ -302,7 +302,7 @@ class TestCase1(unittest.TestCase):
                 return b_x, b_y
         data.delete_collate_fn()  # 删除之前的collate_fn
         data.add_collate_fn(ConCollateFn(max_len=3))
-        for batch_x, batch_y in DataSetIter(data, sampler=SequentialSampler(), batch_size=2):
+        for batch_x, batch_y in DataSetIter(data, batch_size=2, sampler=SequentialSampler()):
             print("batch_x:", batch_x)
             print("batch_y:", batch_y)
             # batch_x: {'x': tensor([[0, 1, 3],
@@ -362,10 +362,9 @@ class TestCase1(unittest.TestCase):
 
         batch_sampler = BatchSampler(ds)
 
-        data_iter = DataSetIter(ds, batch_size=10, sampler=batch_sampler, as_numpy=False,
-                 num_workers=0, pin_memory=False, drop_last=False,
-                 timeout=0, worker_init_fn=None, collate_fn=None,
-                 batch_sampler=batch_sampler)
+        data_iter = DataSetIter(ds, batch_size=10, sampler=batch_sampler, as_numpy=False, num_workers=0,
+                                pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None,
+                                batch_sampler=batch_sampler)
         num_samples = [len(ds)//2, len(ds)-len(ds)//2]
         for idx, (batch_x, batch_y) in enumerate(data_iter):
             self.assertEqual(num_samples[idx], len(batch_x['1']))
diff --git a/test/core/test_dataset.py b/test/core/test_dataset.py
index d048191f..03f24ad1 100644
--- a/test/core/test_dataset.py
+++ b/test/core/test_dataset.py
@@ -264,7 +264,6 @@ class TestDataSetMethods(unittest.TestCase):
         self.assertEqual(ans.content, [[5, 6]] * 10)
 
     def test_add_null(self):
-        # TODO test failed because 'fastNLP\core\field.py:143: RuntimeError'
         ds = DataSet()
         with self.assertRaises(RuntimeError) as RE:
             ds.add_field('test', [])
diff --git a/test/data_for_tests/embedding/small_gpt2/config.json b/test/data_for_tests/embedding/small_gpt2/config.json
new file mode 100644
index 00000000..b2f61bdc
--- /dev/null
+++ b/test/data_for_tests/embedding/small_gpt2/config.json
@@ -0,0 +1 @@
+{"architectures": ["GPT2LMHeadModel"], "initializer_range": 0.02, "layer_norm_epsilon": 1e-05, "n_ctx": 20, "n_embd": 16, "n_head": 4, "n_layer": 2, "n_positions": 20, "vocab_size": 64}
\ No newline at end of file
diff --git a/test/data_for_tests/embedding/small_gpt2/merges.txt b/test/data_for_tests/embedding/small_gpt2/merges.txt
new file mode 100644
index 00000000..5e4f2b9b
--- /dev/null
+++ b/test/data_for_tests/embedding/small_gpt2/merges.txt
@@ -0,0 +1,39 @@
+#version: small
+a b
+c e
+e l
+e m
+e n
+en ce
+en t
+h e
+he r
+i s
+o c
+o d
+o t
+ot her
+x t
+Ġ T
+Ġ a
+Ġ d
+Ġ is
+Ġ m
+Ġ s
+Ġ t
+Ġ v
+ĠT h
+ĠTh is
+Ġa n
+Ġan other
+Ġd em
+Ġdem o
+Ġm od
+Ġmod el
+Ġs ent
+Ġsent ence
+Ġt e
+Ġt h
+Ġte xt
+Ġth is
+Ġv oc
diff --git a/test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin b/test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ec2f48d77b641cc69f579013e32071d8be9714b8
GIT binary patch
literal 40749
zcmV({pVDA}0*VT3{G3MlAZn;k87_bVP3Z$JfC7DS04P`x0001Ra&L5RV{dF<c4cyN
zX>V?E0Zr)xSPcLG0BmV=bZli`Wo~3?VQz5(h*$~$004A(aAjX}X?kUIaRYsE1SnVq
z0001UXm4_KaRo~PSOWk60BLS?aRy5SSOfq70Bmn=XK@Eh1a);TfC6J<Z)|L3V{~b6
zZgUDxa%5$4Wn@HYV{{5}04Y*&0Vr4&0001Va$#<BW^ZzBWpXZebY(7gWoc(<ba4V>
zbZ>HFXf9uMbZKmJ3SV+%Vs&Y3WM6b;ZgX#PUv@GIaRVqQSO)+A0CRM2a$#p>aRg&@
zZ*pU33Px;iVRTb;Z*pO0WeRZxSPuXI05LQ$H!(LjFgG(dGBPwUaRyie0001EaCLD9
zO#lQ=ba4n#O8`qiOAv-}2}=-50fuo3iD&>RQgI7(aST#%4OkWc004AyVQzC~Z*pyA
zaxQmpWiEGRX=iA3aSmt$C@5$IXa!gg0000nG%z<YH#jgiGc`ChG&FG!Xa`L|0Zw#r
z5K&72OB72GhH()~5K94uaT1AW04Y*&6LfJDQgIbn8vp<RbaG*Cb7pUHZDn#UXf7}=
zY;IpME_Y>VXJ~YB7H9(~C};#|1y~ON001#GFgGzbI59FbI5;*mFmV@X2TKr6ba5C_
zO8`p{g>e~60fliIiD&>RQgIt}aU4={9atFv004AyVQzC~Z*pyAaxQ2tFfMFvUokFX
zX<>759%ut7C};#|1y~ON001#GFgGzbI50RiGd4FeFmWGf2TKr6ba5b2O8`p{g>fNE
z0fliQiD&>RQgI`6aU@c4C0H2%004AyVQzC~Z*pyAaxQ2tFfL(qbZ#zUX<>75CTIgF
zC};#|1y~ON001#GFgGzbI50ReIW{siHgP9t2ThOxPIPf7QA+?QO94v(OB72Kba5#t
zO^^XikO4~+O96CoDv4+SDN=DOba5<FaV=OP0001Va$#<BW^ZzBWpXZPE-)@(baZYm
zV_#u(bZ#zpWoc(<ba5_d11KnH1ZV|V4*&oFF*GnYF*i6cI5jabGc+`DFK7o%00T~R
zaWGL!080={Fotn4OE60ThH)~9XaFfvaWiyrG*WRjSReoZ0CaL;ZgXaDa&2XDE@&<=
zE@5<ZZZ2bAVRUqEE@Ek6b8$9k11KnH1ZV|V4*&oFF*GnYF*i6cI5ajmF*Y!9H)sb-
zFiv!FI8jRgOE86TIZFYBaXN`;04Y*&J9Ke8QgJ<4A^-pYbaG*Cb7pUHZDn#UXf7}=
zVRUqEE@NMCa&KxbcV%g3XmoKtXaguHXar~lSPuXI05LQ$H!(LjFgP?YIWjajaX)AW
zO#lH-ba6mYO8`p{OAv-}K}!%z0fuoxiD&>RQgK6caYRyaMOYvJ004AyVQzC~Z*pyA
zaxQ2tFfL(qbZ#zVUvP47YA#}FVRLatXaguHXar~lSPuXI05LQ$H!(LjFgP<XF*!3f
zaYtweOAt<UaY#{1080>saY;)7g>g!WXaFfvaZ7Y@Oj2=8SQ`KU0CaL;ZgXaDa&2XD
zE@&<=E^KaJGA?&zX=iA3aZYFhC@5$IXa!gg0000nG%z<YH#jgjGB7hWFf?&bXa`FW
zPIPflQA+?z5QTA3O96#(Qi*5)DN=D$ba7NtaaC9u0001Va$#<BW^ZzBWpXZPE-)@^
zZeKDkVrgM>aaL#pC@5$IXa!gg0000nG%z<YH#jgiIX5>jG%#^jXa`FWPIPftQA+?z
z5QTABO96#(T8U@?DN=D;ba7l#aa~v+0001Va$#<BW^ZzBWpXZPE-)@_Y;Z1PUuI)2
zcV%g3XmoL2XaguHXar~lSPuXI05LQ$H!(LjFgP$XG&C?UabIW$O#lQ=ba7x&O8`p{
zOF)KkVM{<u0fup6iD&>RQgLH+ab!|)Wmp{m004AyVQzC~Z*pyAaxQ2tFfMIua4utC
zW@9d5X<>75W@rN_C};#|1y~ON001#GFgGzbI50OkI5sdbHgRWY2TMRsba7}=O8`qi
zg>h+10fljDiD&>RQgLf^acoj?ZCD`y004AyVQzC~Z*pyAaxQ2tFfMIua4utCaB^>I
zE_Y>VXJ~YBZfFB2C};#|1y~ON001#GFgGzbI50OjH8VCiFmZ2a2TcG3PIPf_QA+?z
zKuZvYadAr!O96&)a*1dFDN=EBba8Z2adlW90001Va$#<BW^ZzBWpXZPE-)@_Y;Z1P
zUvP47YA#}FVRLbIXaguHXar~lSPuXI05LQ$H!(LjFgG_gGB-Fhad&73OAt<Uad=Tn
z080>sad}Gtg>ia`XaFfvaeH)ed{S|JSQ`KU0CaL;ZgXaDa&2XDE@&<>E^KaJF)nvy
zX=iA3aein6C@5$IXa!gg0000nG%z<YH#jgiH#9UhI5=^CXa`FWPIPgAQA+?z5QTAp
zO96#(f{ADVDN=ERba8}IafMhJ0001Va$#<BW^ZzBWpXZPE-@}_ZeKAjVrgM>afWCE
zC@5$IXa!gg0000nG%z<YH#jjeFgY_eF*b3BXa`FWPIPgIQA+?z5QTAxO96#(iiv0d
zDN=EZba9MQagA6R0001Va$#<BW^ZzBWpXZPE-@})baZYmVrgM>agJyMC@5$IXa!gg
z0000nG%z<YH#jgjFg7qbIWlpNXa`M@0Zw#rkWotjC`$oL0ZSB16m)TsC{2(7O^^Xg
z6iWegagvE>04Y*&lXP*EQgM}7A^-pYbaG*Cb7pUHZDn#UXf81>VRUqEE@NL|baZYm
zcV%g3XmoLwXaguHXar~lSPuXI05LQ$H!(LjF)=taF*PtWahGTZO#lN<ba9wbO8`p{
zOE89UnM*KB0fup!iD&>RQgNGfahy_dome0M004AyVQzC~Z*pyAaxQ2tF)m?rbZ#zV
zUtx4~ZZ2YJVRLbwXaguHXar~lSPuXI05LQ$H!(LjFgH0cGcz|fai3@hOE6A!aiCF4
z08221aiL2Ag>j;ZXaFfvaier`q*8IESRw!b0CaL;ZgXaDa&2XDE@&<>E@5<ZZZ2bA
zaB^>IE_Y>VXJ~YBrf35wC};#|1y~ON001#GFgGzbI59FZGBq$bIB}<F2TcG0PIPgo
zQA+?z5K9n-aj8oXO96&)s)=X-DN=E(baAXwajjS&0001Va$#<BW^ZzBWpXZPE-@})
zbaZYmV_$G`Z)z@LX<>75u4n@&C};#|1y~ON001#GFgGzbI59FaH8(gkFmbPF2TKr6
zbaAjzO8`p{g>kV<0flk0iD&>RQgO3%akNr#wOAVf004AyVQzC~Z*pyAaxQ2tF)nOw
zUotLtWoc(<baA$511KnH1ZV|V4*&oFF*GnYF*i6cI5ILdI5jqLw`d1T5KeS)xKT?0
zOAv)|xk~|sak`0U04Y*&yL559QgOXl82|tPbaG*Cb7pUHZDn#UXf81>Y;IpNE@Ek6
zb8)_C11KnH1ZV|V4*&oFF*GnYF*i6cI5jphIX5<Ozi0<b5KeS)z)?#8OAv)|!Ak*!
zal(mc04Y*&!*p@PQgOvt9{>OVbaG*Cb7pUHZDn#UXf81>ZESEZV_#-tE_Y>VXJ~YB
z#%Kd5C};#|1y~ON001#GFgGzbI50OkG&MCiG;zmh2TcG3PIPg|QA+?z5KBOYamh<S
zO96&)%86(IDN=FEbaBj5am`pA0001Va$#<BW^ZzBWpXZPE-@}`Y;Z1PUuI)2VrgM>
zan5K1C@5$IXa!gg0000nG%z<YH#jjeG&DCeG&FI~Xa`F`PIPh5QA+?zK!tJ9O96#(
z(ursQDN=FMbaB*Dan)EM0001Va$#<BW^ZzBWpXZPE-@}`Y;Z1PUvP47YA$zWX=iA3
zan@)9C@5$IXa!gg0000nG%z<YH#jgiI5jdkIWlqAXa`LI1Wt5u*ilOWOF&ByhH=?T
z5K94uaoUM!04Y*&+jMc<QgPi_9{>OVbaG*Cb7pUHZDn#UXf81>ZESEZV_$G`Z)z@L
zX<>75-e?0TC};#|1y~ON001#GFgGzbI59CfI5{~rGI8H%2TKr6baCKOO8`p{g>m6a
z0flkmiD&>RQgP#SapY2Q<yaR0004AyVQzC~Z*pyAaxQFcUuG_MWoc(<baCcr11KnH
z1ZV|V4*&oFF*GnYF*i6cI5IaeGchu8=V%8@5KeS)=ut}mOAv)|=}Q5Haq5X^04Y*&
z>vVDKQgQ8A6#xJLbaG*Cb7pUHZDn#UY;Ip>E@Ek6b8+ry11KnH1ZV|V4*&oFF*GnY
zF*i6cH#RgjHa9YH?`Q{05KeS)@KH+uOAv)|@k;@Paq@|104Y*&^K^0aQgQWI4gdfE
zY;9j?WnpA4cV%g3XmoM*XaguHXar~lSPuXI05LQ$H!(LjFgG(dGBPwUarbBkO#lQ=
zbaD7mO8`qiOAv-}`AZN>0fuq<iD&>RQgQoqar{zo{dIkD{#XeB003WYWprU=VRT_}
z|7ZXyQgQ$R001ah00000asdGV0DW=-0RRA42LJ#7c4cyNX>V?F0|5X4O969O3jhEB
zbaG*Cb7pUHZDn$D1OWg5eR2f>003$O0RR9?0drUn0001Va$#<BW^ZzBWpXZebY*e|
z0RRAfat8qb0BQpP002t?b65`m004AyVQzC~Z*pyAaxQmpWpW4s004b*2>}2AY6AfP
z080ULSP%dJ0CaL;ZgXaDa&2XDE@X0VaB>O(004b*3jqKCY6AfP080ULSPcLG0CaL;
zZgXaDa&2XDE@*NL0RRAfat#3h0BQpP002t?b65`m004AyVQzC~Z*pyAaxQ2tFmet7
z004b*4*>uGY6AfP080ULSQG#N0CaL;ZgXaDa&2XDE@&<=E^KaJF>(+A004b*5di=I
zY6AfP080ULSQG#N0CaL;ZgXaDa&2XDE@&<=E@5<ZZgLU<004b*69E7KY6AfP080UL
zSQ`KU0CaL;ZgXaDa&2XDE@&<=E@5<ZZZ2bAVRUqEaufjo0DW>50RRAM0|5X4O969O
z8vp<RbaG*Cb7pUHZDn#UXf7}=VRUqEE@NMCa&KyK76AYNeR3B8003$O0RR9?0drU(
z0001Va$#<BW^ZzBWpXZPE-)@(baZYmVRUqEUu1G`aBp>Vau@*s0DW>90RRAM0|5X4
zO969OA^-pYbaG*Cb7pUHZDn#UXf7}=VRUqEE^=jaX=Gnya&K^Nb#!tX0RRAfavK2v
z0BQpP002t?b66Ar004AyVQzC~Z*pyAaxQ2tFfMFvUovtW0RRAfavcEx0BQpP002t?
zb667q004AyVQzC~Z*pyAaxQ2tFfMIuaB?02004b*9{~UWY6AfP080ULSQr2R0CaL;
zZgXaDa&2XDE@&<=E^TaZE@NM2V{#w?004b*AprmYY6AfP080ULSQ-ET0CaL;ZgXaD
za&2XDE@&<=E^TaZE@NMCa&KyKA^`vZeR3lK003$O0RR9?0drUz0001Va$#<BW^ZzB
zWpXZPE-)@_Y;Z1Qa&K^Nb#!th0RRAfawP!(0BQpP002t?b65`m004AyVQzC~Z*pyA
zaxQ2tF>)pW004b*CjkHeY6AfP080ULSQG#N0CaL;ZgXaDa&2XDE@&<>E^KaJF>)vY
z004b*DFFZgY6AfP080ULSQG#N0CaL;ZgXaDa&2XDE@&<>E@5<ZZgMIC004b*D**ri
zY6AfP080ULSQ`KU0CaL;ZgXaDa&2XDE@&<>E@5<ZZZ2bAVRUqEax4J=0DW>T0RRAM
z0|5X4O969O8vp<RbaG*Cb7pUHZDn#UXf81>VRUqEE@NMCa&KyKE&%`leR3}W003$O
z0RR9?0drU(0001Va$#<BW^ZzBWpXZPE-@})baZYmVRUqEUu1G`aBp>Vaxeh^0DW>X
z0RRAM0|5X4O969OA^-pYbaG*Cb7pUHZDn#UXf81>VRUqEE^=jaX=Gnya&K^Nb#!tv
z0RRAfax(z{0BQpP002t?b66Ar004AyVQzC~Z*pyAaxQ2tF)nOwUovtu0RRAfay0<}
z0BQpP002t?b667q004AyVQzC~Z*pyAaxQ2tF)nRvaB?;Q004b*Hvs?uY6AfP080UL
zSQr2R0CaL;ZgXaDa&2XDE@&<>E^TaZE@NM2V{$kF004b*IRO9wY6AfP080ULSQ-ET
z0CaL;ZgXaDa&2XDE@&<>E^TaZE@NMCa&KyKIspIxeR4Yi003$O0RR9?0drUz0001V
za$#<BW^ZzBWpXZPE-@}`Y;Z1Qa&K^Nb#!t(0RRAfay<b60BQpP002t?b65}n004Ay
zVQzC~Z*pyAaxQFcUuJSX0RRAfaz6n80BQpP002t?b65ue003-lUub1vWO6_O004b*
zK>+{&Y6AfP080ULb#r1afC61{04P`w0000nG%z<YH#jgiGc`ChG&FGmSPuXI05LQ$
zH!(LjFgG(dGBPwUaROKm0000nG%z<YH#jgiHZ(RiH!^VpSPuXI05LQ$H!(LjFgG_e
zG&VRmaRgWo0000nG%z<YH#jgiH#RaiI5cquSPuXI05LQ$H!(LjFgG|gGC4UiaRyiq
z0000nG%z<YH#jgiI5jgiI52SsSPuXI05LQ$H!(LjFgH0cGcz|faR^us0000nG%z<Y
zH#jgiIW#pjI5cqySPuXI05LQ$H!(LjFgH0jH!w6XaSB)u0000nG%z<YH#jgiIXE^j
zF*b1vSPuXI05LQ$H!(LjFgP$XG&C?UaST`w0000nG%z<YH#jgjFg7qbIWln#SPuXI
z05LQ$H!(LjFgP+WGc_<YaSm7y0000nG%z<YH#jgjGBPzdH8yb%SPuXI05LQ$H!(Lj
zFgP+dF*7kTaS&J!0000nG%z<YH#jgjGC4LfH8yb(SPuXI05LQ$H!(LjFgP<XF*!3f
zaS~V$0000nG%z<YH#jgjG%z_bG&pe+SPuXI05LQ$H!(LjFgP?eI59RbaTHh&0000n
zG%z<YH#jgjH8C+WG&FG)SPuXI05LQ$H!(LjFgP_fGC4OkaTZt)0000nG%z<YH#jgj
zHZwLiGB9x$SPuXI05LQ$H!(LjF)=taF*PtWaTr(+0000nG%z<YH#jjdIXF2vH8OD-
zSPuXI05LQ$H!(LjF)}bYGd3|caT-_;0000nG%z<YH#jjeGBPzVI5=?|SPuXI05LQ$
zH!(LjF)}kXH#jsfaU56=0000nG%z<YH#jjeG&DCeG&FG?SPuXI05LQ$H!(LjF)}nb
zI5speaUNwZKmh;%00000y{VPFs_F4N9Sl3Vmvg{8-I0vD2N5;Av}lw&5UBUOutyTS
ztnlDG87uNT+=FVoma(Y2+pufA%ke2ZvQhH9a>0(gGB@5lX-pBkK9wCh!A%mqFu8s_
zsx1k<Sh5a0Q@aJdz{%UZV3a{TWLd7fxe6M+y+sZ@piwJ5#CpIyq9KO4yQAa0e29TO
zgfIv@sC9)rFPTER6<u094i{~_3BTPuV63-0rOD|#sufy1iJs}ae0oPbew=PR$_=!<
z7q}2T(>*pl^t8acVTVb(*N5u5h;0VFB&f-|l8xBAuMHGEW&N_eIB)yBL6!5oRl<?H
z;n8Wl<^w@IAN7kmA_A+tv7{WlOx2#eWVg&b-244ITZbn-L8acj;NX8e*Cxz7kMwmq
zuZS%?W@|jWjrOQJS8uSpDo0d0-_#(z^krVVvYE)bN<Sn#9R`HEDtZJw^wA+b6c7kK
zsYltn%{DN+>amEt#^5Bplq07*z<n&dmFG{q%-~wQtr-fu@1%=7<>R}&aPmOBU6lO1
zC8Hs|3|nHmSfQ)DoS|a72|*;ifY%{BDOJ=w$#(%f7Oy9~;J>*%qLec|l6#A~Wws2x
zmOrPw{JxSqAkB)rT%UA1$|6|1Hqsrui*|#%yMe*HXI<C3o(wL$o(8153bj`}OR^fg
zKv45LV4$!(^;7aZ68s!H&5^Xcfrx-R!YDO7K7vR*gnSJ><J^lpPb3yRVgzYCVn92)
zf*k(4G$#SQtq+I1$NHbViL+6>JtHPNIRbXO>2HoajWwaXQ@<9x%EDbd0!Ao3ulzB(
zFSo-y*ayJ8xYH3lfvPpTd5~MYaTJt0n-2cFu!N_)MH~)1sp1N|f{M(%Jq(7p)s@ye
z9O~S<)%GE~Rj=T@Pr7crMi$Sz_{?5B%WWIHR40?X=N*x|b&f$hQyE1)fQ?K%mKQF&
z1Y9h=1R_$rqN$p@oh;D0GguS6PB+3lj`K=9uMhq_ol2@af1RAX$@Q1KS=!~hRzIn{
z6s?WB{N<;+RH_BN0Y$33faZfcyr~eqG}BW&bUOLFbz@CDr=0;gj>{dq^2WlvJ7Au?
zDcga&_?%e0X9!Nb()|~_LFQ|`#ML;wsfGDGl+_kHP)F}OR}hc9&jjMUNq8&0vhO%N
zDhS9rJ4gOJIFzP4Q+PAH*Ck`SjJ)eSQtOdBQq;mbGTk#h*k2_)9l)o%iTeP(9*jvm
z?<{sX;Z4lE-U9l(QX+u7FY@!ez2$8@hareOMBL@P%04H(?!mJ>X@94@J>B#?P##yk
zT-)9}VpCweyw7sH`1M@8R>G$|m~pYXFwN$?`(!XVF6a3?dDpGH_xAt0ySZyTKGRFR
zmV%x<$HqB4L5kWub3vs$;oXG1QXF=?4ca?78@SIrbN-aNH1!I*tqLkVc&G0?17zqt
z$sScX>Mkw3Ff9+gYJ(5F&h9)shWe|#*V-PtHgku({5B~&7tbcVWXl6Qmq744=cfrh
z3Q(`R|0j&RDZ8J%WYjIZ3|bz%S#{RDiO~?feVj<WUeoHk?3@q0CK%s5aGv43*A=Y1
zP%*o_=GSMtO08Er__YQ*u*Ez)V4BA~J{591PmhPYkAAH@Am`D%E#JPpOIr;*YECS?
z)#2<r_m=~`)KjTE@Tfk#AF&a==|kVVHBn2u1qFCK4n)AaceSEB>0=E%pe=7a00aO4
z00000y>h+0G6Dv@%R_`b{8yB_V8v#-&|U+*MNuxil^RAp;mZZQLwyH2lKUsTNj}B7
zWb;+Lcslev>{|dmRiOSn9^~RYdsTnD9hwh3!K-ULO0e`ixNamos{$K6CuvH$)s(|I
zzI+$FP`!S;5UX`NOe1u>U2;&o1V@-VLM#zI6!D5Y)S$M!@Y!oSdj|BoE1n8HFtBYr
zT!{m{X#qeyz}JvGuP#fw&>9T9s^Ve1z4(zlcT_YzgaxR)5TKhpjM!PcWa@#uf=J@K
z@)$0>Ci^<Q+1A^;?l+UX0GWzB>JCP|?7N7$DoRDXbMrU6stFK1=HZyUf|pOccZj<?
zwehpOtajqO4;R!s%6h>&{D2?40i*vslNjf_4wACE>mZaoX*{~Tg<x&G`OvC7>Q>=A
z&gTO>QZ0qMMjp>QM+u6&l~DP+Q>gwt^dbj6ueC!wuT!c!Hti<60HsGfTT6^QiYs2b
zF;#-Q0>)!0Jd6AI1<)SCOef(rSVsjU~Cq_^_8D5b(_~+NTM=HfUu6Cch@1HC>
zF8;4P9q1uDIU5l@RerHNYgt#kWSKL(p@cKLvYKnW+3KLXr{1BtG88|(YOlULHKuyJ
z&%#<f?A6#i-mN^m_@#?HTcE7G9la<$oTD8)J%>6x;#RUeQt%198BZBJ5>OevJPy9Q
z?2qQWPYa~Gy;P?=k8MP}_e=@AIVuS}oR0Fmq1Fey?(IUn;1xl<<iQ-h>nbR{ykn-k
zEZXBdkZXxN_hFbjM&B5`%MTJddy%6%$3-+eD;<zKv}#H_{h7Et0en6?B%Rj0RMF?W
zS7fz2YfFzjuznr8X~3>L7t>8V<Uf);g)WY~X`Avq+nC<DE0w}J{BQ+5dmcu-ge$(g
zK(J^$7*N1G6L+{fRqnex=1dwqcW)CsF0b%Ao3R$WUG(+5tc~h9Eqleg5CgZo0Z}hJ
zkA2NNqCO)$+Q68+eg`l-mk7|i?C1JCv=*|v7l8dcAlTA70$d_J30@7n-Klds8(gP6
z3L%m^meZcQxb#y!09gUOjN2DH=OP%rAn|g%+^9x8`#`+BMaCXHjNt#g6o9`x5gUU$
zGql;c0tXzt^ASwCOB|&;kYC!o##UN8riUXupDv2LVRH_=c)DV|P5?5!_~l)_H788G
zFuxf)#}BT&CshGGkktXa8Ydk+^$Cl+RP-%7YwY{H1{kS3{UejS+~_{L))KHhM4_6x
zI|8u0Tw$ubDwuJ+U2zRPv_+b`YUx%xv^9=9VJud>XIBC}mjxlb;-#3pv?MLOz-kt~
z+jH!@71RPf*vWRhs~dwnev0F~B(&4JMs~ZrsyDK{5)H3BwT;TW@<*{e>^AE>kB5ak
zw6YvMG&qYpSuN<ise{0~E;`FQcD$Io4*O%g=K74h84E%@Ewm3k#O4Y-JrM*v#;OoI
zMv+XtW}75D@X~}lMN^QxH<9kUAlt+|tACTcuxd0s*}H%`65SxXbX_mJ9>2`I4pI-h
zN>$vv5NG(jeMFKxfn&M5KVSvDb6YOGxtO23DI?K4N^HJ6q<bDcSlu%{tAOD;CCtve
zDqzaI*KF)O1~Cym!J3V`rL}ZBeYK~$HU73dK^H^3H(+-;u$#rZCkODnPhfI9_{@*I
zCWb4$4N6SB;n4^@uEiid6(&i%r~4l~e$a8do7*-#4x}EscWo=Y<yG6drgI^@{l|Pf
zq2e&Tew9bPoS;8EM6eD#@4gQ@=6^f9PdALb$<dEHmt`C~x0iFgt|%is2vdN(p_)>>
zhdP-&QfHXFQqJ5w4?530lo+<W2GR(;@85?!M$!>HV~={gZ=$-qNogay0NX@7e6!O!
zJFwh5g%AHcTjk3<=Zp<JCf{DXXBkbr7>l7f3We~yJ(iEWSHB884>vQsL`C8}p0i6l
zkD^sOrj!RelcwxEJ<U|RoO!uCIrHVZ`>+<d4=X0T4kw;F?r}A{x4<C0GS-(mPWkJ+
zM1a4$*gieHot2HdY9K2-^w@?wnAtkKkc-*7oAe#LYYvq>MhOl)TpZ`TbdA0|Cf=ev
zhZvbW@cDK*w|<PgJFS7c?WY2|H=m=t(FX!N?y8$Q_Gu113hUN9M#kW}OHa!@!E}l{
z!Q;!kO4<)Rd<KuaEU;iaTR;80kRsJQRiwJSqK6*6Fd%0<m7w}O^xmGlYO?%1J)@$$
zzVo#_4>>}-MG>pIyamuaIFa_d1dTU5(pok=8Yvh&?xKP_O*;TRsj@#ir8_=7XHD!p
zXB!keL^%e%)0><;8Ru6#ez+(+%3MahF2v2dauNu<9?B9s(3Uj3!3xkjeGW%F*35!D
z!C%_E`E|EDCk9?T^5fJzuzU_YYSm!8$+qpgjF)%3`GYz<XD|)Cc`W+8DY(A7d4Wef
zQ$^ytyG?1ii87-+28SO!VisXMrT^HxX5*Z^m*_~lE#iba`w_l8^T(<@XZ1k5Wz}4~
z>YOOOPME_xq4g|1_v(l{U!RG)=v@%IRuh7}m!!zMG98>exT$eG!VW>a8D#goN{f^`
zy@^P?LQ1^5E4V_uzURR_APfXMZ)YmJNhs4h0?3BE5A{U7P(y}1CBNi5IQLDuSP2Zg
zmoGoN;H>()GS&*bf(hk3`)w||hVU3XyRgYS&N_d*Dx#&li|dR$s<4_nV-JnFx6t4`
zNwogFm^(<kGm<eq+oE^8EkOglM{WzdQkg+JopQ@OD*^31+SL!dU{qE-OW>nC@@6zV
zAQAn$!?y0cu9r8vEZ*_F#nN6pCz;^8oL%8NW5p1?3Li*3F%FVE=C!%IMKLP84-F5z
z^8Q6Tv~~(Tor9P=48oGUZp>A@&a&7%OENmVTNur}SplWH*Z+e&_pvcO?U$y!$s%4n
z#iC_A!fQsmLID^&jg_ptV1v}W)n|3Q4go(qWJeP{W^*FCqO_4ZmlFxS*|Jx>lp_(m
z`Imvb8ATqw`=gaR+wMs`4_-_<>}J+GK}}XWum7;TI4yQPO9zd-y|ab9d*}weN~`xg
zuvkdEmiq#{y}z<N;?|ZtYz_OoFvl7_9Lt$ID(e2cTa6?=<2-x1kFTe^I|S&wpymWT
z=7bMCV$mVJ<qFa|#NJuFeD>2jEsh*KeydNomoBinumEa2z2K_6Wjabd)UvHSqq+V&
z#*0b3K>hQ)Z>Q_Kn*<cSTUH>vyS`^UJSkDTZ_(2`)?kM`JdG^6@=1rh2Mmck{h!J_
z+1iLaPD*4vnD8FFK@e@cn;uBJQ(H+pCb~5{dS4W}Hjk1#gp3lrJLHkPArh0k^cK;)
z1Wc+t43e8X7hdMPHAot~W#Z+$ru92Ku6JfTTpJ6z4Yz~5YGZplmd6x5B1xjVUKgo6
zA-zz%9e>`uyPS)=TwZuQGf{lJUM1W-sAY`27+%3UK8J-oI6*8tbmj=WWx=<+{^U=*
z*A<96oQQ}#yKJ^Ph?5CD&p9-_BzvK{z{YnwO#WfJu35P}G)<p8=~N!P5?ijk2CyZ(
z1${3%Zd&2H1lni2++E~5Y#xET!dwizA&AC1XKUp=`3$u@;ur=z)!RtBTgYC#V4D`b
zZh*`?3A2ejD)5NArZT3xAa}*QXUc}V3YH8#^PJndv~Rb(RPNil`@c>+fqk92XCMl^
z6xGMP4b33Dh3y);srGQZ-8R)auZ{LRFHgF>x6>XwcZtZnU>gEG9W@0#yVoGR&`*fG
zq>Roxy9&cRbYxdNOymH&i(MAGn|O!3RdN_Tuz1irSID?LbDAQ$CkF|=_}QJjLXp2a
zowz1E_dtrg0G$jyFD6JlwtNvizS91@zX1z9AUN1OOhY0(VbQQVU9kW?UO4eQ!TZ%a
zqH^;)smT^R+TVh_nJR@l&S5vari_ET=X$ZcWuGX#4vXi!otBWii3=7yUM*%kjGvr3
zpzLzHUB8dKBGV1MJ2T_D3ZJb!O$VzxHold+7qX_ifoX)g5`@h=5+X;u!!_u;>u44{
z6L>GWjJBLS6nqpsMXqB!gYAqwHph@TL52doSM|5KinTF1$=$@f&=2Z6*&YkMsY3ZY
z><gm0Cp|npH#9*!n{QgW{z-DU#q=V)OuW~-y<SK?tN!Y|9?N9Bl3lnwFUm-~8*aF~
zuKprDjt;TB)4k9<Q*LWKN|DjLOZ88@G@|%C`M0dQHPR3}Pnejz;^fFY5}E8g+u8~}
z=E?~?E;A&%x%kezc1{MnOW$)mholp|1TI@UAObYKUV}b7DFu=|-K`TmMYoH)R_KJh
zKr!MxSO~T|Ad|klw-=8*W>YY{Sw#9glm6yB;s3Y0fKd25CZV%Dv~`v{tBVi4EP7l#
zVy^PMu3r*7Zb99<4@*Womm$48mB4#F4@+~r4iPoHi~<n7tLsd?eL&Sb;p|Vn#>Y3j
z7L)+IyzRBR<jhCBp<Vww0#%(nB-~iM8<|Etjl>N+dF}x`SAoAf3MutF>_1~X>2dG7
zekyA_KD%_h_o%SEt$vBTGJvu?*g5pPNh`y=oR><yzZ{c0xP?=@N;ZbPTTc_bPoiJE
z>)3xh@NEY?SEu4U$rp*dChHMBoZs)diEPq3g|w(V#^Ij4u6Nu!M6m(AMY+v8wmK0!
zdDWOa*fYAkBeVa!7o`onTgDu`Sl**LSTuG!Pd-68_n8X4ZD*9cxMDCo84)eKWjQT8
zVfrS#{t>)8Fx;-Zduy7zDN7=|+8Tj9yRG0nBohWZ5sVDI^<RR#)w5c>&OD_$2&xsl
z^1fR;r0KD{Sbdi~m`>un#Mj%rSAdzjghB;8;X60G=c_6_*DUnAT@t*!y$%^Xw@)>^
zx0sx~k7x%y?kq|?=W3rj1@>dTB-Rl;ob=~BOTkw>{X70UE6&lo203^<1nz;m0G5S3
zf-|W*>>Fr29rS{{>in>~Z)lS|!3ZY3r>up$rzWYqt^=h!z=Qd_l#>X(_9LM>ba@uK
zY6}uPCmEf*m>aS@;}MO!Q*GWl`EL@uFhvi&O7G&l3vrLSPKCp~9*26oCkNp?lOMdh
z$&3O$_ZuC&H?Rr2mnOKpQA7#4q{*v1#kFQUBSVV3KrFyK@ZzpJR=g0rQa+hHmG?cn
z;7cJrPk=|fq-YO4)PQa~&h_BD1tK@S6eoGR!U+4jpp=2U{#qeC``H7%0aW2U4#@vI
z$JGS7$=@41COXkPE;O_{yMv{?Qy@aS1{UPIv|oKZUjd@K&);;s{J+aQ0{E&tb+ySm
zLJQSAv~S@%@*@<ykSVIV$Hu`thi$sM%vS3>>E5WkzR$5dG^~)kDOlJ%>;IuV4j!>Q
zd5eiT-1WA+40hx^``_U_G_e`Ia~6s`=JvZheN+!TLQ6`!)6BO#A1;u)JJ*{#*ZZA3
zH`RnayN&?8{t}!#dEAmbH&mg#Ngbj(YDPOfBH1y#YDQDM#>+IjS!5)<5C8xG00000
z00000000000000000000000000000000000000000000000000000000000000000
z0000000000000005C8xG00000004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL5C8xG00000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00aO400000;EawuLJWXBp?AbQ8rGvc*IO~W^4K=KaNE^9#d24>N3E(n&;U+7ZW>ZN
zbcgJ`vZ(pIMhFBwwn3LXRPdU*n9)(Zo}1=8xR01T#wPE)$L5JVo3ji&Oy`?Cl3lL6
z2_u6$Z>5VlTz-K$1!7`6IS{Noj&Pa0K@!!vE^`XJ+vu%3eek}!tRc_5-D#CP{doSo
zV(Pg()~W=(t+_uuYE5=LxhLK>FNqF2D@AfVn<*JRjyo|t2@ifdjLZHz4<rCR<T1fK
ztg&OeLZ2HvVmTkZBfbhf*^%zMQrNFM9F7jW($BHGnBt{8D*s=+sdgc}83%m3j^hix
zPC~mob;{N}H3YJ}BwoC{7e4^K2U?~)fOi8stwhVagvqNsO2JG#UBNy*hB<+~4RH}X
zkiY4>2UVrKGjt`qtFK->Vo*4|PV?rw%C%5C;!3eRKwv>T(0M?-+mCKM)^dzIrDV;#
z0EbJw%Sz`wot>GxVTJ5G)I@T<=GW~zPNKv*$;Zk%INwe>?;a^VPTNE~X9xv76r{2|
zK8s#FGI6B50VFZJlXPM`_*Ub(60#0Gsau&m{gQ;c{m`d8{1~FUd1S{tf#*g%>Q}Hk
z9$c6_=FOoxOUfg=@)7jB((r0K$~Q_pMll7v;hWbwMU#3w2_V<J`+PDy6kLov;u|=;
zKM9n()l=iU!N&%@c}dYc3@CoQGpPGKb<#FF8w4IZ>n*%ISNd=}Y&i%zAP~+x)3~HO
z=V2hdhWI(XEn0!RIc*oZ&I-voE83+y^o!%U^EjHkv-MrPOZG%Pc&R_T8%NK)_gqrE
zk6{D6LimWh)M@ZLNt>}eMjH%0P30fGU_CdzMeLtC1ynpd-VP|e$D60TySXjB{UA*}
zS;2w3KQ9zI*(cb$$TpokA?D>f1pk4&gL`s36KBsmzL3wm3JWhis#%@8SG2LbWm%Uz
z@JX4wJC+Z;Ro$FC+YyAj6Hxv<m92xkA}|a*vnkR%_KGRIHTKXv?zvpM^A_d2O;e9N
zWFnqCh0c9D)8)cERB`~kl#D*S-{FwFekh;3=DvJ9t%V~zW^D(&<i6uP?nSq|N`3)6
z?AndIr1TNGzO~XhOtjj(;eGhLa$y2JhW4C1b0`Ts3voO=6gUw)0)PR%3UsSH_>>L2
zzN9TZr*}}iDXF|XfZ!OsjbVwrwm>bsGJdc;)BRCA4D*dUR33Fa683&P9hAL1lN`o8
zAtZ@BY=D+J9JKGd2@{IEq~fu<XA&|zeG1>aN9gA~F(CvzIX2xqcp|4fE>{3Ot|B44
z+3p0q$)ec2l<Ob7f0h`%qImebM9sK7so!k8%iMT8MZ+;Xa_J^L#E93sdq9M{tbcyH
zJae`?*3+WAOq^>xrkYJWmXxPFE;*jOis@6kOSK_89G*M8sgMypva*gm7ZpXl&ev8u
zj%J>`T~SuM*HwW#Pe$^*fX0-(m5J*-&soYmLvQrEu%%Qwa1M97D(s6p>wdXA<%NH|
zHFDLspsaO0^r}rfNz(y5##sZss$fq%<=oRdaC4_TWTpi@CZ89(P)v!r5@-Uwx65?A
z7U-)vxm$xg#v+uwFO?8Ic3Flz6LDlb+B^We50T_N*I2SV%^;V&KgohSP^1yOo9uHu
zY4jkxOss=BGeIgm-VYN!TTn;5=Q{_y(__IrCuEmA_E?5J)sn)yauL<LmFmbmKYfV2
z;#kSM7Yy1w;T`+CLTs75YES+=-4qeLxU=0l(jJaGl~sj2Wo(5z`&8yR359vPPp?+I
z{Gb;-7$j@F_1K&|SWq#!SI3LH>LmQUa$Xa>&GV@{n>*4xzN2M2-j9YmGSVA7`U9an
zorL5&>JO5<awI^!6<~0@;M9&h6PRYa1Cj4LkZ~%#d}m<1GD>bdUi7~_nKTT%Ojvxo
zMqm@WC7D4y__ZUvauhDSQcP7m+in#+yv8s*WpPcs=`@eL;EgoAS0CU!Avpj%SPJI5
zFc`5ss(u_i4;S@1V4*iWLZ8Y!gSYZLN8BU4ZhA~S5DgJLZD71Su3(V6P*QZeRNNN4
zPPgj31YDN9%l@u90*m#$Mkt9qSd0d}GO$y;)pj1du8Ju<exq@`#ax3t!9g><N=iMu
z1uFEqY%(6ac!GJnzI!!2N<>sU?{qRcOY+6KvEdNAl#M98OQ>tR=WRW_`J)p&JVCEJ
z=E_pMJlSHq?6zDwGiVIF)HTAqEA$7wL)-?vev3i8?ZJmUAO*ZUPT#J&yOlCL{~`#z
z7`=bI;bP-F{bm!q-C(gh04bilwkfK-UDa>A0C@|%Hy#~5#c-{>xO_;wR7p5I=WMaO
z_b4De!maK*;+8nQ_4G)&`FT3Lf%u9%;5*nnQj;RP)^3nI@&ahPSRHgcL4Yj0$N0Ov
z=Ub9Hl@+r(f$sOZRD?&o>zsePCX$i5QZ9o$0=5l3w0H<TtPJ5htwol+ZkIW{xlggY
zxXcASf$~4Rr{v{3bQ6)hRS}ZB7u9Jz@|q^RL|u!#1YDUsH|kwGdEbk?^i3l>rTD!&
z#U^6A97ixbOG;)teMyA7zDJn6jX;1qP|D@I9J2Pi0mAb;Du?+vO!D$Lh8U4NhF}Rj
zMX``PLc2daI9jB<8AvfbuVya1rT;#><|8sY_0+Yy!G$(GgvpIO$6|K8N`pH*-trwg
z%$$h4fK=~1c1H=lAi$11gwHU%*s>u!<O>eG!mu$r0AC`!BtHQ?K!Axn_>eq0z)_97
z6n2O_hlvC{eDFHEQv^*seL<GIRbY8M_@3grLRkep9zv}=I**^ak7NlwiqPh|lxgR?
zlbQg%Lx}r4yJ{x9lN4<{Xapg=zRp#=X)cSr6#+85{@;B)4?WpCcW<e>Z{w}JN6PfO
zFfa-`DLZ?-&q_Buowmii(lS51*fN4V+{H?|{<Xq8-fl`f^_HHzaK>P~!eKkSZXCrs
zrIPEs0Fx#=w3M+tejBVj&;$xR?%{wv#ynFzqFU#>;<t{x>>g~qH{!Frjcb8BF#E2&
zj}nAD2OWgFERyHDm;TPXuc_X=Ne&CVrXxYSln_h2f<+lUb6o4Z9R;;KJ-uAK{DKQS
zZ!S~2M5|;xjE3?%lU>uiCByW)Q#d_5U`E@#l2PWn5Ki7a-VaVZPbG4^c^u+AK(ZXX
z&Vi~ruFaP_q9+SG-W!R$W*enEifyqvl&uOq6tvH}57Z_+b1F$ZJ*SJjlZ1>smvx;y
z(6Kc=E<ijyhE+p6Pb37rsypAieEwUzp1yZGg>EW5%R(AFk2Mp$(x@4^(h@j4k94Cu
zw4~*_SNQ(B-nFf}nA=mkLhWNb_Z~hx+vN<rAIKCu{l6eRShG^R=Onj0sw4Y5S#w>x
zn`=3{p6!D?>UNJjaOpBVH)El@a4G$~VdJ^HUWy^StN0{6l#wbt$wK%%<<gJ5vXytd
zv^9sk*31CCZZ>kf$F0LWH{JfcQ~#{I5YY%d*E);3Mjw|v5;3Vfwuvb`-mJYk-~{Qr
zhIpnsf<y5-dmqZYYVl#b;r=kaf{Q&n;HQeb2Io7x(u#z<+=b=5>e$jd-3Fb!Jq{N<
zHzEDJ?dCqcGJ3W;`+0u52}#>K7O)??FIy!$q}i{$SGFfSysq~=6wnyFN%W+<RN%F|
z7?%t^;f}I9*R2S>oSHqoFSFCU*u(q01U;d=!ORpqO?ittk{%E{!b0{t3&|lnVt9$X
zBR%RotGcE-&J$w0ka>?hGi2C2cTp8Q05In}hk$}Q+~>4B#<U<kkGD*`0+SRyqK<ex
za-=4@z-*?x5@$8NUSxkfz1!wH^l4MOnT9jG=`5W)q;xd8kV{THa5$AZ;^yYNevjum
z5#u5}H64<?92Vv}>uv@--|rDScfiNHcie$Hd9+A8YJYaR?NmIyY#H6W`>s4Zq04W)
zO@hh1OJEf{={86`!e<*h&`!5Hr<W(ZyZ8saJm)#QP@{povP{stvXo)H5Bmo^`%O|j
z$7y6cWzFro{hH@IUPdWAG@|D_R(y`TaQ1vW*N~|@r6x1HJe#q+YW1Q#MU1aIOPJL>
z#|sg?k|&uwB{vYgaKj-ws|tHOxM}UX<J<u}9dwU7P(va-bIm5aMR!fRpnfekhCzxu
zHV_8AA`1Y#zZ{Y|Zc7|I4U7P~GNN=mx2jRR*A05R{D5q{7ow3oYxkBsRp!gQyxvzk
zs?LkKglmzzi1C6vUH>aRu$h87x4u}srii4xv_g}*CIr_!Kn4apcHES^gwU(J;D(62
z+bt42Yml5g5JNLOb#qxfBN6kulgDMes~dv5<lxS`Rg>~NzLGdRNkxIX$H=g|H=EtO
z#Y(L^#9Q6GTra#lRe`=d?j)GIYmn4Dtxd(edyRlR&;SHI#6E^R-kIRMB(e*=w0?~|
z=L;dc*qhY6{IFTP+YggGs~t8xM)KOb-2Mf)e9xOa$^QSm(a-`sQ<rl*K-dhuePuJa
zv_UaEStS`fUNp%(^)Z7u!4}87%ZQIW<=@RaYrGr1P5uJCFd_iG(4XqN>5eKq3x~5h
zOI>6<dn8&sZu*}*jXn&$<HYMaHEA?G@cO$uqxl~^Vit}(Eb?_d5^=peBZPN5$?TfE
zKAr(Q{G>NL&SRN8uH#BPWuwwOcd-h)FD~1>xyDvK0`reN7r!68xqOy89z#^ToSq~*
zH`E)vO^J%UPhL<w>AMg-3Cth8#)VnDJ1G4-C6(Q}%KRZbi%n5I#=}Cr4-*Q#Ce{Ky
zoy@a4asmE3PaGsW-d>G7#Zh^@bYTX)=|i?W{qlgkIkX)-RiDy3qH%>h2IsB3i@H5L
zEB%bTvzUH6V$51Q7HpBbS?l<_*;@lVBC$X`w(GAvb9~mk72O2A>Bm()G92(b7j>e$
z2Z)BeMm)<q5V5wr6!i!^;x42+N3>47Z5fk1n?e)32?YH+0!i<@#u!4p=ddO`#(FS3
z%oLowz2C*Wa6P3wkBYQB2I?NYT?8n+{>67Z-kpZL_Iw<?@4AdUCp{p&O-ZvlrZ;&!
zJuyGLrg*|UzbA>kePww(`8#jCIG|g+w`3>0xNfSu2vQWiuEXWHk`%wZmCePw9w_s?
zd?-J<0uwbn9q*vK$4Sb&xcN>z=1^(9Nr=uo#8L`9CU+*hm*AN@GhKH(Qf3gmOQ6&|
zsW<$)QvkR;cK{DO*#h0XK}r=nMWanTfDPcg4;Tu(#N-z{yhXJ<51#HkDe#Ru1#l_4
zZ9M`!Q+e<_;L<$3OaAmcV%3?u-qy{$nc%s*PY~ZcXA_gWGT4MW1r*Y{<YuS4hq5fZ
z)P{Jx=Mft`a^q|~l&K&(Lgd=Krt~Jeko8kMj`hX7^DKfqAlb9KcZ$Kez<GVVN&n!y
zt;(OflYz=RIcxtsZg=Q9x4Q?u&D)&32X)ZAS06CFeX7vA0qx5?GZ~$`XM4pv00aO4
z00000PIp~AdvC?O19ot{Sp{aiF#e}H8On&f>$Um3o*%rt-rM~=H$xS?Ux1D~_$b`F
zcP1u09hqCZB5ztd+Dsn3xte)A;Z4dsk7dd{th|#v>+dQ(993bwCBe`<3qJU}nWXHx
zY%cXY7*jbtEq)xlS>uwsr&WYJR)3*9j}r&HTr8u!d7{p|@_HLQaLJ9lFBhk}D<jlA
zKw^$O7U+e%Z;$`FtGo+46wb)JS-Qr%LKwKbbm`zbIFtfBEjy^ZR4}8wOAO4qdG?FD
z_Yf(&l^m43BEqUW*fOxZ{#rIWCUxFAQPc@MdDMu!S@hq#&OH6RTmllk{U8^+o)oh@
z;4?xzq0etTZrH86u$G#<_nrW}=DTjYZ-`Jlv^)<xL{y<X`;Lvfoh7zBw+)ayR&y4-
z_w5Ngj!?ZjUO2(LHujRd!g^S}oaVtiIUJU|DyEaX-1DeB4ke^KEia6_1!RLecT*m{
zBdh(qfobVHi29;C1(PnkC(YhG58Ng@?~RN+YA<Izl%cLXF6=bCdJO`+M-Ra}(98?F
zS?F~<1RyRvPppl+usHv`vgH>&kJqF;0`T-ZXDsWy;p8hje%?C0>%V9`100;ZOq}t&
zq_Idmrk%gL8}b)C;)=OE{1&Uc7>FP|h=YMV%w7e(6Z!Kz{}UlR^@RC503Z=NMlG+r
z4tpRx{fHYpI|oTT;zNBr;E~I`GSU@2vq~*I&BYNtJ$913!v7;Yy@z(Y>4*Y5y_NyH
ztzWn~$!Wj40>B_V20^5|$@bzrC%e!*$T^$5D>??fLUyjZ2CN>u;BqLugpDw~&3T!;
zyv<!a`uPhzR)ViQS;nBeCPbyYXFvwMjxQ{|+;!)>n-BxNNmnF2>_m*aju5jv>n_DS
zMk15E4P+I&iU7R4?xY#L+5;fF)DD6?YL5rJVCMn6nMx%+cbetA7)7tUM?u28Em?#-
z`Cp5?yt<}5mn~g9Ob38F*^mu9?K7=BIV)MaLJ06Xxo?C#Wz74$j7Mm@2iojBMolri
zMv+)No{$7Ry}?#I^w4j-W-oxe&i*aD0;PpKu5VnrImZyZ0O!~|wTKA3iUs()%A(*q
z;%hd%1y(XV$6DvSfa&$Tf4mGlg8!mAp-9fWqzz`gBGKx*F>D6BgMt@2iS0Hz(*zE@
zbVN8iQ<Ymh!Mdfqov-t|u1>kUGc}~VmeCQsQehuFijBv-CQXUFO~;PBB*aX+K4F18
zozk7W@*qLE#h=JL>jzZ3My>_D;m|-lBZ3t?!f1{=^YFPl{#wDjL_eH6pjE#-g3cej
zPh_UNZ&ai_`-(Tbg3{<ag07-FUOq0oBtFf$0Po>EprA**7~uy!og2Hmab^QMYUZ=M
zPtMJ{-Ubl8q+=dEyHdJ4Z|*_7$_}i&-I!TC_2`wm+~`3(O@zEU@b26@u7zPc<Y243
zM?!16EZ23svLnvBCR|`Vp(+BrSEAfI_OJxI678qFm$ZAkopRwjMyugGZ^jioW{K}S
z;nHI}3Dl}Pji>~=PboXR`4cs~CE9B|tR7Um;N;D^^Mho!>m7?c^Cn_E<<zM>(@pWa
z+gfzI&mB5EW+0h7Nr{8J{bZTDzmXC=dp9#X{@jW@S-}6i!Q$dNX4v<;_uzoM!mlVj
zzQ?z_@}!}=A!g+}T^3rqRL8wMFOxvKin8Ili<YK5ABz(_kMN8<8NklE=9Bk3gOlhw
zfjy17l8mCfxpIm;`JctR;#n*``iLDpT>7XyQd;ah{)3%4lrjxG0vHNBE?MNfqU}*U
zUG-Qy!QAn>E|)R9U}2CvW^-V@xhSJN&;$j&oVJ!c?Jm1JVi*)XG!f1`mnHkV8KoLL
z>6YL;(34m^3VR7WxI36Ua8xrpRQ3ctE)%u8lPkfz)4y~)hv`i`XXJyttUOJ;Jw*~b
z>iCqr0Q2p;nBHqVPA9EA{Db7Y;(l_xRJPB#-69w~2T`j$l6*}&3t=}s7cjWIF>f3^
zl8ei`L6Gjb;icF-olmg3!5a>|su!5N-DMU$8kL1SBRL1W+-L{9Hs_N({eJVj56Yyx
zGzC07$*?><UkHXg=Bf+4@&!`8pd}N%`?8O`&|F|V$s!Is^s=A4(qIO=uO+TK1^oFu
zORJ_k*;$>uI`<Yl9TNDvWYa7=Qc{S#e8fvSru^?Zh4wzWJU#V0_=6<8HdQISqA}9E
zD;BA{MJ5M55w?lDH$e8h=kzB%a%c5CP~ML`b1y2pd|?s0s&#t1YTvE9w?(nM_~#_N
z<lK-v&>t?lm6Aa^!33l`h=qW=S4}>=&S(0&32-AgyxGOO9}tf`4+m*H*p2hNxaKxI
z=l}V;3GtG<^HUo<kKs_faAKW2t3wby4_sTh*G9>@F4!5p62jX&6vYy}CIuY5c7Pi_
z6oP;|Y5bMEz51HGbj*=F?qid@9?WVy#g~dai#1+5Cy=VVI2%MfX53A@{3HpzYeUhz
zGG)NK*MW&VU$~yU*|Uf`YcGtvtGoofU}uUv06Aa18{$g4&wkvyM(cAtbu2|ZB#+!X
z{vIDW9Vgy8<ZY$Ao}$e>X>ISjMmHQiu$^?fq1Y+C)zmz>64V~NxB^c*XQSx6hAv6G
zBzuB9rtHK#OIe~k6%z<Nr`NDNb%EWy_icted%#t^2|W2b)8cbHKyVU0j}REV?kHls
z%;4U<9vPK8((d;=9o&z+=WEeCn*z8ygb}qoa3mGI`YjASRIwF3Apa@6C3=ax28A=c
zys+Oo<zTbB@2vnm;+DR=Qb3Tqz)VlP_!ZH-ltg_ze4^yMutWsC6XNB(utBZ6uVT17
zZdOY?cgwH3p&BN<s(~-PdtY$8TXI!AD)gy3a6ak0S)m5J)(@b(#+3X!BKSbOokjY)
zhsQC!ZkF=7_t*Qp$^ZyFVDw(RYez)AA`m#dAJ~LEv|yLKeFb#9S(R}-t}?VdY-rs)
ze4;oy5-MiAu;5)ga`R9;Rzn!OqZn|!lD$s6SM0RBo0Y7)rKs6E^Ms|m(%BWf31RlT
zZWY43XuJfwX6lT*e&5EtMpa3>m9p5pZL<$OSMf5vZ$76x)Y9?16Wne)!g?RPaB>B`
zoh(K?!s*Yv{d2#%+6o{%*o@LUiuDgXutNE~E)ixpq!BDUH_mT73GIixG(4R<$Gl{`
z%eNW4{A#N_G1iSdyX=*`GKC$z@LgBD(d-2~t2EWTfQyQ|StqQ#FXeu`aY&22eX&J6
z^gUO*6#<O7w`U)`l5QTl=bIzB%3_Z^Pz>O^bru&rXO=TNxrBN=%JZ$f#r3GWbhU&#
z@HHblz3CP^50~q_kRJHD_?crnuAxx8w;@zJm)A$VGuSjdCTQn7E@8Afm2-1Em1^%h
zgIMjm+2OD|bu9C`z*ROpBGOj90<vd3PPEWF2@$Zoxk@y>-w=Sj*S6lgjdz1Q+418$
zrzF8U@Ff_$uz4aqW>cIzBFCFK;a0RfCy#bJ^vY|zFqDA2c-coh&w*0B0jRFLr5y3R
zC};t_2LzxzAC`<fvNRApd%pF%UWUQD7f&9&Y0!~8pQVC4fneUdGP$HY8{@>fd%a1!
z>a#w*Smw?=*QUoj%g0?jG&Wg06waqSasdrJk~meoe(4H3)pQNKj=q*W!|(yUSseyF
zZvoRhh0)18hl(t`>qtC3CZ>SB9$BconVx?<3<;vVQ1OhqkNp@sN{}GDUG_>lJ^L6v
zBX9OP=S+jVXC@E4q?z74uR0_=QhpM=(Zr*?v!f+F&(6iWs{jx^S3H%xw$B<pVE#Nj
zo)50P=*JJe_)OTmV8a`|Ra;@Zjg%rh@YORt40qYQ2TU+Mm#vPx*Cfn5066%(cX$Uq
zrv|^gpg`3+*q32En}yK3#eUbkzMCyOqRDPN;muGwewZRX*_(B|1XBP!tC#}3dqtr<
z;E0&Ka=&l8Mn+6L6krlPrLl#)m0mWxYm{KTIY9<J?i^;kLZNTH4*GgLK3eTOcE6!K
zr=k=*k5K<QyGR+m>qJmIOG>yrFE?AeDmD;1RH!~YMYRPyQ>w2#uBMB;yQks2tGB;A
z8`g(B>(M4W>&?VG0!Q>b=Xb(9YKD-!>G!KTTbB|&q4y=cTlMQakVauSdxbkZMcEU*
zg>x)BXU9{$3Qk8nTlK5FCmn7)jp?gA;M7aJL{JF5H>m48V)_z13znt4o@K$jveuM5
zJyRJynaRJr|KAQhvf6MxEf}>tIfsHgq`i_n;Hnh7J%fR~lz9I;N@V;xSpA*64|gp*
zU^J#XdE`vIwnwA8sfS9tZ$po~nHJ4EAkG;*GUO9H9kH~#0*)ZO{LaO@yHnacyiNBz
z*KDIa9=kj|_n+asnV3U7H{4=8>{3@eJltG7l6sXq>7iS^lw0OI>u4E0PLG1Tq_JH*
zMgb%|T0;Uo(DvQDz(RVu2}$)jG;RSrsb_~f$M}dmS!Jm_rb?1Je37_0b*9QZnLJ55
z`|}UFQU>lik5vM_F232k(08o78G@BN@&-G+tg6&J%__LN)YnlwMyqH$jmI24ObzHf
z4%gnilHrBB2Y7-z@Lv}^*F}pwV79Y7{CK6i0>9|H<P3p4zqSWFMeKXLDUr!MPJkM`
zFqVP50&1H)Es=w~kjvD&knJzLNoMxBX@0ys_VAxPuE-!f1dR;6N#;vDQZ{hBtF;8Y
zSg5u<De9)YWp6}0ucQq-(ns99J|P`Dt6rwOa~rEW<y4})#_7U3=WUO>o#&Q3I7jb0
zz%9r-jL(ide4l*0Au3wDRB0AH&iB|nM~4wTRFKcR!=iA!heQgzXrux?dBGjMrF#%O
zJH-^eS=^C4$zdxz3~L}faIFTtZe&|KsFRetFMY?n7w?cf>%~Gn75B6{YDKxa&X-a>
z{raQ4S+AcvaGcjX-H$mtunDZZ+;;*xu>MrL*r3KdIlVr-I+Y>3rfU4W?HRT?g3htL
zE^nl~_@dst-Cy#%#QF`r*tuvsRrfNz#pQoIiVEpFg7LjPJ!(X}5+u_(3(-0}kVkjA
z6`fSP$CimYC$Or!kYwAu(GHos!}t%o(4S(wkM1WuNp1+e;RAv^kLyD_pkwJfj~`pT
z0_bVHT6BlJVO>PL-8S00_xtTTJ3<pX?%<`o@?HJAFW~MxeH%`^>-W?<vkf@Ba8>F%
z)Q<5ydH5MTJv<3LM0mzLDK<vDihh#1?t#I(GY6kM_w$pw>1{c?UgjJ;62_}LtBfYS
zMqsGCa2Cfr((LIxH_GumJzxSn0C3~Hq8d=U7G5X3^Q$bpVz`yOUaVic-51on8RBod
zVIQu#?M5iPrjdfY%EDB<CQ!S)rN+Oz;csC)on-*LMZe&@`K+KkPvpNlGGUFojYA$i
z<A8#@b<R7xpV<VupqD1S+_C$+-B)ru?_3|fBzhq{@&PBj{H>)t09OvYFaQ7m00000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000aO400000<kW+_Ew~H4
z)81b@v_dbvN~dqUrH?1Pp>ajLD&niUtGi#kf8d<FBA3a#_@uVHMQOx5;NqaW7Qvo8
z^ARsRP<4O1^a+hTud`9Sq=7d)9I*$zv`dh@N`o*vO(-wBcSd=<v*CTaR}nP4vH*xZ
ziMYKyeMG4|vNn>u{mFDZrW|)X=XZa+t_a&ZRc+|JAde8er4TPV#5kQhsb8f$lsE6Z
z_PC+EQ1ib#nVRxEx{C$9ivIk(y%N+q{#)HU3uM+j{y)jO+=CH2HF7yTjlqt*=-ZRL
z@QDjNyx*=og?U4}*ABWmqINAjHpi<wDHGQ`)7<$yGD({|^|lqgO~H@6Gzp(Pl03US
zUHJ~Xg|*GRsn?D>fwMF{=4qw8A{MVZkUoXI1qQ>r|0*E69=OCjYu&{<*Oi4lBmw2T
zliSU@-k+O1{tc!)&<K<~*}hRc7Q78TSCdgZYkdd3p1yUwF#0e%*U0cYA}p~yi7zue
zM18Qk=fAByUC+HdP|<t3y#&F$SV>{KW^Bnk0E<bzvs#Zk@-%ilUOUu09?!5m2Sf(D
z*viN|^liL6RK$tAWmLaBodjk)rHB8#@N|njo?-{R!4C&KNsSe~SD-+=LVCQsK`fWM
zVVZTjCLrm&kz9y5U=7#2I~X{;wo#KiX$)Ds9=D7<z6G<qu4KeJXVA2}iQcX}R2j>;
z*>PLEoXa#lU`<&(*IuVQ$%u@+{D0y*#5}{iLB_H?<I;+}NAE1XE2M`xGkhXEc!v<X
zKK6?{(+niN$NBm^z_EBdV64MDtcIYxWKC8)X|~!tAHN|zwAQD*lVLBs^lO+p{2qTh
zvNVCb{Cqb&5P4F&J9j)h_xK~dAzscrqRBx#{FS}D+efQAoO;YULI?M}&R#gZp-T0;
z4<dayKY0wi;RG2x<1`{XLL6AUMT>ts*LG1n-q>ioTa!e*zI9JJnUz<(C*VN6(vdK{
zJ2dw>dq#n~LagIFVle={k`<FY4Zt}&$(S%YDaIW<goOdUa(X_!$AP-Lnba>ms+^U(
zW`o1Lhj>4{>QKr&-6sG%IogIi&nFN(Dqrn8U<Go#2K3=N@Drjt&<~Zl!Q-;Lv&2oj
z)#WiehL^-Spta*X`E#PYup@&!TMEfMMAn+TVue^dM}7)B^wvl`*>;$_qgbLmFZ`i9
znJY)UMq_+DY$%mHrxrxK0Ni4_Qiegii}t^}*=&M40}%Q=n-7(`YU!i9!KFmJBAY?H
z+Z7Bvf(U}Vq(D2oJ>EAwC1X&$t~39<VlVnTu@Vry@F<qNby(Fp4-Kn43^&cYp}lFm
z#rdtgR9_Lj`%@{r%xWOKB``U?$RnV;$M%jq-lCtpPA20#OO*6GT-HuJVtsBryl&jQ
zv@X%SBzu6o0)vLT%v|9+y4M^$eVzxr<I90OyuH!9Ny`2_lf$;UKpPgi!mpydiPK@c
zufY_%{Uk9wkH%2EBYYM-@M5DnV@~KivKY;~Noh&Ei!h$Njx6Xq&WT>UOtA~RrViq~
zJrGJf^uoZqy>0rt!(x5BiwS-_7dLl2g4_4J7B!(f;e--BYg4y8`(l7RWPXi2gaXC9
zCK!!8u%&RkZv7WKMq_ilqdot;5X6f-T;}dP;jAmX`=0E(35KD&h4G3!reXlSa^CDa
zd`=@eT@dU%u<nGrq61Dm9X7AL(0?bq63Vo^AUtNgoBs|x?6+*Z*ckP@#mKBX#L%g{
zCI_Coaxsy--j5@^bEt(p{SCZ3{j<@$2Vhpbr46P$E9hLjl;Q(CB+VW?qwWj4=40<X
zl1aEaiI8DD7md6;^W&~M_p+HhwAz%rlG)?BZuJ?wuCq0}d>=KtA+9{U)*7cg_XHw6
zJp2YbQr|^9R}C0F!Z`svW2I?53jqE+N9D6SCkN=eCaP6BH+0K9?@0r_0M1Li*DK$=
zC{%*GXC?!@JZrbShPE@k!U&nV)dD*_eTGQ9HcbjV+L@TVxtQWRXe@-gSdv1#yEGd-
zA`?!#SYu?oq!}x{PY;_qSsa=?bQOHO7r;L}kM;+=lE{R-Y+LNSET+%A%_^Y0U8b}=
z8hZ~sod4pxb3)C#TyRdj09U9y9FI9XplEKq1cj`;2viWecH)4%C%KP12EMmE<Uz2!
zVoIF6$BD%}py`Lay;N_yA$JTsL%R1oIf6dAce{wY0A~WcxBbLC8#)ua=S4z0b2`^N
zx|y219(208-CI074(-jl5~`uQz=eN34_6^Qf^?ZXn;g<R(yt=CO7nxf^7fWHWCMr0
zPoAZ`>&CykG=d+zi(ad|DhXP=QomQaW^a$Y3X(UxvxBp|q`#ayv<wrx%h$L(tqZlh
zB=@sCISSdl#%5(b(4x0HBTa$4d9@C_kXqBa;eVSv)~Bny4w`tp&Q`TNUeO>tx7b^}
zj=s*iv$x~C)#pMy*9mdEbk3PPA$wQ7CrleW(C*8;%lnqQxK1y)C!DstFT1$8XI5Fe
zco}9q0Qo&V(~N36TRm7iDa&9y9u*F~Vg<{*)Pv@_GCAG61mNSk4bZkakR1}cS;sxS
zt7-^4L{Y4|(&h3z`CJgZ9dLNO<Q~Ml5)rUG1O%YGFsi^j;^eeEuH#$0R^f-dKod5+
zm(2=0oEX$Q^EW-b{D!W)!pNk&b)Gl78A0N_tu7Bdq`Hc`bQrKZz#O4Gt|q!Xdv!~>
za?s#B?Pq|z%Z4mG=bDPUa#o(aqJT!bS}BA(bk>YKXr7fk%krGNn=OjFzTaZJ8(tAS
zFegtuz9I%a=<pdj>GKY}!tIkeHVF+p6lMcFik7RqVaqVR$R;B_IHm!+x$eL_p220j
z)c!g>cWgmCxPCCaj(=CY%L#b8L5n@S)NoV0{I+1cn+u&h{M(>BU$Ou_S?)|cR}}F(
zMs=UNE_T2?t}weh=TlodC28+FwanPO3T;9=W=gKSLSqd*^5jgs&Z)4xx7-}P-SfaZ
z1;0ML>fqiy6my`wVLq|CEpLfDupf)OP;nYOF2o$XC!40cj(;~in{*Pr;H!eXdGcPo
zMu88#;h0Cf>;A$#y90(i14@HDov=c@ogbpQ<_0M|($hOU(%48nF&rU0q*aT%sr{$C
zL?YumAn_%<@0fc$dBqexB+ie#6HOw#1&b%WO8etHW&fzWAg5@&*UVKsM&-#o87han
zLjm@@QxWOBkl?Yq)H%z%{0cj{Z(3%&l02ciS_A++4f(1(GlfdJpt6FzTZgy0WB4LG
zptm<W+twaDYaM1h&2h=RSv?iK*A9X_J?{p+W&)zT%HTXZV8%~90r#ama@Kylvw}Um
zi`DJCcqzEL1Yv?ZJPu8~9AP#)8xnN9+`2?Ps=$T39ILcD4~zi4WV${()Piih8%rcS
zi)Ii!XZ4P}bU=<gAEtvmrXz?vz$BA91Zg(A@NL|@viB1_`I;O(_ts@RmBiOPq&b|s
zuKXc9nJmV-yOW|ki!~2Dt<uLlb165x0u#}@8O=646hH*L2gKmKltLZ6e08L}4c?DC
z%PV)g`vcp%NukHO``%Q$RAy<spk&0mDVv78+w4TU*X!22)mJyY-t|MhEd2nzOY)>V
z7ko%OWoz=hG_GMgcK;*1<p$<F2ABi63UH~sxvE$_IR~q}n5_c6*D+8$o=0~)5BK%D
z?@blDed?;b|1%%G7!VG;A>p*Uh?j!9S9YL0-pJxS5VM%PTd>Hx5)3)KE?3;U8L>7!
zIXD|U97bllZ+~*UzrGi}u&Rc-5V(0fJmnude7;MzzW;H%q!qM0`%;s<zg+{p1>a#j
z_mVKYHBdx6zBL*<yWsmeCsAlSZDO;$lh5S4ydGCP*T6%$*oD?Si`#&^!uJWhK3fev
zyJxJtpRt9!9(ln$08SRYk@k(dYl?w9MB7WermG=5dKR2Ire}-1UNP{z>1<LwCl6k{
z{@tCtIb5^5!B=xSG+{`*5xy2ZS%$1UXVMZq!v1Z%bTz!YV|5Wde$G-nCFj1oKW&b@
z@7z1QIMcj21%Cv+UVooEGva%^t|!90(~C(w3jXXoLCV9tS>@rp1Vf2C0LDQ))V%b&
z+nWTu2X7d>{&f4i)e+#lDW(#;j|RXz^?K(#tjvQv?9TVRFEzKk^%y}rX1SfbA>k6e
z&znd*FzD#Jj}PWNqJy+NAdH2)lZ+)iW)s^yOqle$llpi(&cZ3Yn_xLS309oETmCRR
zk7c8~Kwly}U=A`pY<Y}42satK4a^?AIEV5(Z$Zkt_7EjINv2Xf%G9(w_6v`^(8yH0
z=?|v7dmaG0#YnBYCX4mE@CWic`?4y$-FCn{0Dn)s+{@v-kR8Ol;ePZyd;bEwdzJb;
ztgucz8C!O|-p$p!=EtbJc_a)yfq!c}y&NUHRIwesqRmpg^nIl~q{`#GBBZ4}SG|Kg
zJBkrJu7A+HCk`MyIL~Xm0Jm2>rNG%c8E>$>wiO7y5=9C-x9<76zD<X`x#m?ox!L18
zz1V;}6b%l%&#wVIzJ;E<%t(MdKGf$t6F^KnPD&v?oWamLyt6O6y@*P^BFK=urf(`e
z4ID>2l)|>Ux85DPQ+c;M!Y-OT7h9%0d5^ff=Jzl>!7|{y@Mv#5D>>IZA~iueFL}~C
z90xYMn^WgJp~B9*AVW$#eWYT#b%$fTYj-L<u(-LriztM<HCpgE)o#x`TmN%BRK@GO
zlYAw-*y9Ymt^uaJ33=hXiJ$Mgq6v&VP5!>StkY?|{l^)-doMn_PD_6~0R0*~8nB_f
zlk{)9E$4td^!XLLu!nuTC(zwIzGo=BeT$I1xh$bPucyj9D=Klja_vq$ZSayjjc7DH
zt1FT`X*gs&9v?+L*m`O^w`1}-$<lqiPtMT1M*okz!ieg;mA?Eus|E18mo6te0h;_d
z7ZP4Q6^UOv_hI)uox--f#$GtQG7@>agcXXr>GG~S7Ac23>IUPy8Q4rbry=;fy$+f@
z35qd0sg=gO>zm9xq56M30(u8M6y%vZ{hJ)UF3$`+)8<q>_cR1Mv?!xH>jpu)=JAO<
z#vchh@{373MF0po^erpB72h(vbUFsS)QTm%o5L47;R7-}XKG75^`ZH^CMOKN0eH2#
zVj;=7Eo#5Jz&ZRpuq3X$D;~i+qz}kEd$f`|p)``bpt9>a&9E=L4s!9jH5yvImGld}
zeU9q8*CM$)1bnr;Bn_-Qy6J*D07+E5jw5WmD<>B{-mBa@J&q7P({;}~ReQWUHSPqx
zPCpvFycoE<kQ%=`AQRa<BI~j{Z9E3M$AyGEOIww^8y-QtJf{Uc`KlVb^0uD41InR1
z8;lw~*T<W@X@4g?SrCl89zP;IS5=5QFr|k)g3(_+ISddz*#ir`g(<E(&$_L<8Euz2
zylqgsa+HNRLb@?LNo9UJFx&JzJps}@P-ZYZgeIN55C8xG00000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
zKmY&$000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z0000000aO400000m4<>n-6A18a@wuD4xx}cy}0hYE={7m)Nu*C9+RTHXWihu{MqBX
z`n8=rPVPQE%W}0mXQjcsxY<5DyKf#n$=as8CnzGks!MLXqWb~7c&7h5OXP^WstQ57
zeW*=57%y`?3$Vw#<yR%Vloj2(mNYHA3ycA}r5FdikZ*%LtXj}J4M?KAHHi+qs_!Yf
zS!&rk_oU`KTNuc^S16^t*{>Nq0ubH1q^4H9_&g@NLROi(%N}+-03T&M2QPfQZP=bX
z%Z$=HIc=0Y-QXy>AyL3Qd@--P){lw2PCT4E*jY5Z_}R<4WH609qdXkEE(|$5642&6
zqH0q-4pUG(%%1DKNxvC85FB~DPlahah4Cc3_mChvUFFR@XWsL?B3}hNKSPu}RU=!x
z0l~#P&SP6THLS+F#>H4W;p|kr*}Sbhv4s!10Ef!EkjAe(@aNsU>#DE3o8SaIM?Xuv
zOK#n~xAJnl>guvQPPvvmt%Hs{GMtk<aP9{^@1xf{S<3W05}?t%od;dK-My8%p=a&9
zZM-o&I!}{45tpAl<(uL=lV7Dg^~E2&*T+n|+vV=Nhe)EkJu4|ZG97`u&6O%U-oh<B
zeXtU}bzW_}mM99mVcY3DYPnIn!WF_h6zxF0U+o3EOZ4BoLIs|@upREaQ081cyxcvz
z;0h)^nF;i~JX$`yv}H#;bh$x0vw8Kqu>=}D!Di~b0HOxGG~ZJ^dm{e3z8%Xv-NE5J
zNh3%+Y-3+MS5hfFOHq|ORzi`yk2(iE46^t<g`;7-=}EA>Ve}z9Nijh@0j2D_b$~fM
z)4#^N;6s|cFdi?w02P@#vY?~8rL8DE+pvf{0vXjj$}Qi#&1bE<wTPQMiD}(AOSy|W
zfI0NME?vz$5U~O~rkO-LEvdo0=Zf#VD_;w}IYM!~0@2>Oy;`n3x!8NW!d{&_2_Bre
z1Bey8n~32&?>f6XS!W@<{$?h;Nj<?l(}VN737<;5L~Rp2XGgfZ%-G#Id)H(<`DUcN
z7zl5?ef74y5`m|@`A%EB*h4El<p$2YHp~t@0KY>#6Eqq;%MG16|IrdWf~0i2(t+K)
zf5x1=R!WDw66(4;d%4^?K@h{aCn6NQB1k^Glu9T(r%fX}Cox++gd?In*T+>niISMR
zgbL)mIPA&0IW&tr?cpao1$#8S5Q9Rzff_x$`}P(*UqzF=Y2P_J0H?4#hl3h9j+@Rr
znaTgVQ)p^Diz{wAWqA6$eRemzUGYD>4PmD|+<?V9zqk;+Rh3t}<yf}7P$efkYY`2+
zc0?aKsWxo9TVfu(eYrQi;beNeRc-vd#MY8Lbfh7>Go4X9eq3%l>Y<E1kDUBFRB9x>
zHBRNb>Oa7|v)cc=W?z}S^8PA3IN%<=27dLs2XNIqko&N_K{au^hD~TZJ57x{YhX@2
z`NSQ(4L}#XkB$PnxIK5feXD!Ci>^Psyv#Q|(eVyE7yU*(TqUVIcaf91?-jH=>f?Dk
z4&~^)mp|aViPcj(ep;wJa}7W|yxIx9c~|N@l53zlVyfu7rl2N0!c$7Tyi#R6?sS$r
zxc~pVrc!}CU<x6<MI2*23#?H)3N*kxJBJv%lCq|}Tbf_IuPD_#Oq~}zqWIsrb2Y@f
zGVcdH!cnHY7|B7r-9baV$xk`FK3_0A39H$>*O~0QiwKLm;LjaBT?Aabi-QcjnTMOW
zJH-gS#?X<x8!B78fVY!8vMdn1=^rdT-G28waxtbnph><w8z<JfK0zcr8)}a_0pB7!
zX(Y-#q$dVFPTn27z+jxbP~Zi<kN{dc=TertH8=^pN_<eeC%)jkN0`GrWbP+CEi!^U
z8v*>h*m_O9?4$@iOySl$J&l9B&&qW?5}^V;$JZ-6O05aK`+yWZeS9E2PLI^Pp`)F<
zm^4(p8A`vqKq^<fynO1sq!u$hcSHcZg^VOUjGm&qWsu0ckI`Ga_e%x6X%B-uUL|!r
z(l)s}OvvXu2U(^(-y4uT!+WDU^;xn!9pNoJMv>(_Nfzw9Ckf`esoY?^3-<;*u)mBu
zdrG%FCgtipJ(Gewh!vbXA}Me@n>AFr{IcCVo9gvDWEzjV6bQ4s@~(9}-QTG@lC6w9
z{n(AWASXgRq<4<H*A?wNuU`{96>R^!nCh*()uc!~rTBY0G!S4s-`$hE)GCp@bzc{~
z;m1}rQdI#vDWg<8GVc<)<aEBgTg4K*3@!pal(P*zkOQ>5Cd>^ymwvjuGqTLQMMq#f
z8>q#+`!)$Yj1vqz(@?6sK%mV$!5H(qL||LIGV`;%7td%sf~QD4Y);_1J|lxXdt=GG
zbY!GE;;TD66~G<4(~}3i^!MPrU>+Dd)tOa25}6OY1?h)8^ou1sqy_`M#xVmqd72PA
zLN>`e<B1hKsrxv*1piAs;A5pcbeW_)&k6WE1OJLU`N0pptVK|~ibk5eQkn0(yX6SI
zm!Uj6M>WJeu4vyorh^r|AjI`LuF-QlBCEr@t*s-wbnQXA6w5_C1Pt3ed=?bEtq(xF
z4p3LT>+zDig&PaK{^Vi2^q`%*1$;HVHqsim*sof=P+<JLITiuDZ@_oF-k5K^SWO5$
z^>5ia`L^~vSS{1MS<u+KFl`{cMaL|>cyWWf-FFndo_VM`OH3m?xoRgnoaHjT9#pry
zs)ph`)^(}8#qgxO;UZ1Ejs?%W&{h+?0`|T<vX+v(T!}!updRu&Au-ImCJ_|9YAP>0
z*fPYrO@<G>&)m>Fiw7`0%dN1w6_Qmvb%0ttf5ZU2N7_C-m>kZ$H0*jilIirjo>U6E
zVloFkrMU1rPx2Exgfo=9DeQ^7yd4U>i$h_&`=_?N)u((s@x7kBQqfdAD&R!Cm$*SZ
z{<KWHF*uk!6z{CNj6Kaf>5OH(cMLH+73SHzeQR7i{c?`GYjHF^<=`_rs%~jJXe5NY
zHA|5^9q3p*c*s(`iZRZ-uZHG43A?4dyV4vyw~dcH4DbHC;^sZPn~MiL51P`ue1}H7
z5*k^&DVwJ|#<{q=o=(}jSH`qEgr7b<fSx}*1xVmKrW-*#ri1Fd3UW%koMmCW?>aBM
z?r_?>O3(bfn`I!pZY+MfHyboOqu6!4U<p3Fj`8R_?W+MjxKkp$yzXl}(rBnWj{tx?
z24GUWG}eH;4~LJt2x1_;{&Uy7Y--UwbNVklI0;g`$%Zn#Y5Tc6+1kxH;n=Y}yku3p
zaxGK6MMb7Oo6j=5uOBJB(z+GB`{)_Gil4W<PbZN)nU)T{P*5tp1`pJ`T~8Rj;CK!_
zuO~^o=DBOUAwaV{P5cAB6z7+`)L!p99xcner5|cM=HlQykooI7DedMwTrCK_0WbhO
zI1;xz1_(Jl%jxSotXYt}8yzaU(o%CgJFKI;tACa}^i$xx_LeERO){Fi&df$U+inTF
zeg|qid<DQeYusTx=~<aOTa{@$32#a~Q?H9V=-HyX#%jsD#ycFn5HEwfI213u!Qni-
zFUhsLP!$opu<0W_SKySp36RjdPkBDP=ihodv|SWDl&U$rPmEMNCsO=71%=4GwgRHO
zr=oDYul_x{4)vJ4*nF?M?g!Mm_$&Uqgsw8Yd#-c2eP!9Z`4#Xweviex@PeScG=br~
zdD;8AS;PW7u&Il@>e*zx1X4-8B%H@Qbql~dNa7*8E-vOgBeI&hzNy4K5?2(w395QL
z60^}f)T{wK^l})x&|PypyZRkG&KK-EoG)cOIDiQ}FAh$;<|p(!r|kec+u9U7Tf{y(
z#=r?YT*4(hp?Lzmkh-Qk6hmsfuZ5L4T7nEZM6_2u$Ah6f4&gLCX%3=1M~Q+v)SbaR
zM!CAYVW|1JFnzH+FKE*|4XiZ00XOiwZWy>cPa{CQyI!|EV^|ZtW#NUphi-?wu#Lw&
zYCsUZw9tn-+-q~ZuWv>?{{fpkLmeKxZnSQ@*`Fjl&^esEc}FQc|1=srUh1^GQZ`q-
z7R~{>WMPv$^oxKzlk#u7IWw?45tNs^yUw6HIVn9nQ#J!S3l!SC%2k-U%Je$DuzANj
z9ks<gv(yAT{0+&wutsmZGAz%$6ichTMbyf?C?O!Y>1vWYaD2YJV687ai<)&jW=j{n
z^S%_lHmL?Y9K2n;eQ2CJ#tOQ;z~&>ozzT%C^?PYNL*RzI#!CJ=cxODiiA>kLx3xw*
zgaA-G`=D4n;to)}f(B+hnWw3{D`ql0C9?!QCdq}oHZ_VnJ-xBK^cqRJ{#%4R2HLT^
z4-%3(RsIIOP(tXtb4^`3mtYh;<||&k_Pw7xX~cFsW&?~oCR!ssdlV+Tv)TeYJAM^B
zl#-viev}|Q_imrOezoSj+Z|FoZz?Uh98cRku#kv6Do9B@esN*F=c>^>zG>h*M0Nx`
zxy9JLN6Qtx`GJi*d7uBg{(Jd6RWpLT=7*U)0E&yeP-})f0q^-d%89$YU3auQfjY80
zSt~p}1fW$sq6aBG9ahUbC#$o&do?CJkR94PiH*6uUY0_<B&G_z<}5S4&dHa&iPUPm
z81GO#UmylNh29{&SC2wG9q%?gVuaH<j7HiyFuqJYDuQD?5;B3j92DcbgbAoSJP%X6
zN>4nzjp98#$EK1zi;dhmewBl~=R~Tyou#t8aCW`CNaqMVUST}E=0vPJLhMhxT=w<5
zZMzw}aK4T_vmrt}qSUTD*z`C(^lO?t2U&Ew_dGzopexS1P!Hie5N<rYiF_HluIwc{
z>Tqj3xL}DoI?VyS9F$)>UEdJAb(;*k+=b)2)KcO+w$%hY+om}@uo=2Lemo^TW;g{s
zMh59SG-+SFgmW1@S8L@wn}!iPO+zTWLlT=fcrkiBee=FMs^EJ(eZ~*HyJ7}B@<VdG
z>(aP9B*87ag4lgLmIy07D>=tJ_Uj6~P4St$xCFnvM(4OZ!iXq5Hz)YK*CNzAHNX)(
zlI$A1nT?q{L{px-y&<{0!mD9CxaGXO-tu0&%N+?lqw0%1a3`QVAVBZD-LopZjix0$
zEa_=G)*S-6%2hc$*B-vRyq~?iY3_<V6TL~i4wf;!e#@LZxa!P2%{q}h%N1n2uZ}{z
zmY5|wen46|xJUoI{H`87L1#z3%T(_?IZg|_^KP6xY0Y`O#~YJ8;fJfbq0MZ)Gj*vv
z%Ir0~O`jJ$Nf_EYbvBl~M+gKwDi`@YFad=;g6)aCdK;I!KM|8WIea_4ug=Un>q;oS
zNul>VvuC!v7Wo&wjOgDx+$X2HP#q_{2C|F1vw4)feAIHh6Y6@rB#gtnIdsdqQ>wYT
zz^GX~$s1cda~R^hK+b8qF`p8>TF#I>buRh3X9W{InH^(0ZdX=4Hi#-b8M_NTY6*2a
z6|~~J8`hmXrUe5#uUjoV;zxuyA~d`_Z>-xon9)DH$lQQCc<f3$f+I4#z3L6Tr>2p+
zkO2Sy00000004kL00000000000000000000000000000000000000000000000000
z000000000000000000000000000000000000000000000004kL004kL0000000000
z00000000000000000000000000000000000000000000000000000000000000000
z000000000000000004kL004kL004kL00000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000004kL004kL004kL
z004kL000000000000000000000000000000000000000000000000000000000000
z00000000000000000000004kL004kL004kL004kL004kL00000000000000000000
z0000000000000000000000000000000000000000000000000000000004kL004kL
z004kL004kL004kL004kL000000000000000000000000000000000000000000000
z0000000000000000000000000004kL004kL004kL004kL004kL004kL004kL00000
z000000000000000000000000000000000000000000000000000000000000004kL
z004kL004kL004kL004kL004kL004kL004kL000000000000000000000000000000
z000000000000000000000000000000004kL004kL004kL004kL004kL004kL004kL
z004kL004kL0000000000000000000000000000000000000000000000000000000
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL000000000000000
z00000000000000000000000000000000000004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL0000000000000000000000000000000000000000
z00000004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z0000000000000000000000000000000000000000004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL0000000000000000000000000
z0000000000004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL000000000000000000000000000000004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL0000000000
z000000000000000004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL00000000000000000000004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL000000000000000004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL0000000000004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL00000004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL5C8xG
z00000004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL5C8xG00000004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL5C8xG00000004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kLkO2Sy00000004kL000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000004kL004kL00000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000004kL004kL004kL000000000000000000000000000000
z0000000000000000000000000000000000000000000000000000000004kL004kL
z004kL004kL0000000000000000000000000000000000000000000000000000000
z0000000000000000000000000004kL004kL004kL004kL004kL000000000000000
z000000000000000000000000000000000000000000000000000000000000004kL
z004kL004kL004kL004kL004kL0000000000000000000000000000000000000000
z000000000000000000000000000000004kL004kL004kL004kL004kL004kL004kL
z00000000000000000000000000000000000000000000000000000000000000000
z004kL004kL004kL004kL004kL004kL004kL004kL0000000000000000000000000
z00000000000000000000000000000000000004kL004kL004kL004kL004kL004kL
z004kL004kL004kL00000000000000000000000000000000000000000000000000
z00000004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL0000000000
z0000000000000000000000000000000000000000004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL00000000000000000000000000000000000
z0000000000004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL0000000000000000000000000000000000000000004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL00000000000000000000
z000000000000000004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL000000000000000000000000000000004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL00000
z00000000000000000000004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL00000000000000000000004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL000000000000000004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL0000000000004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL00000004kL004kL004kL004kL004kL004kL004kL
z004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL
z5C8xG000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000096100000k}@^CNqmL7KdA}3EC6r3E(i@gt+AcF
zFhb@#PloNh;1318d%+64?)`*2Mu~Ae>sX|`2eI+IS#6lS$}|)_2kv&fM5O1uX7Opf
zZ}k8@KX#M5@W%+eZ=oYRFukC>J%VpMAk8ehC$-Bw_mB&_VHp;@LNlYh*{C)=Yq2ss
zDIm8y{|SaXDF`;bK2aw-h~tO6$HO7KIS7ipW==Xh5M*LJiT*x4+eP0zRW3z4L%ngl
zr9Ij_)DCPr)0L4s^{@E63!CY>VUk!pokr8UDEFE>X1=LAsY4Mx5;8A5rjI+kyh!6a
z;A9j$c_4JWw7BrR)4{5|ZMmkqM)tG1sy_%l!Z;zkQ$c<_AvW+kdP^WYr%fTf$9Ty*
zC8%k=@JW%pyHO22ZVc?aJko@>5J;#zHQ1XxFX+*`OmQMT&x*G?OB-f8M}Is!%nWtA
zNy*APZ)N#Bk`qQe)9Nz39pD+f{bw3H87o7)wyFobbzDfh^y{9y0^m-&OAop{sWIuh
z3lLbm_`0;Y{nq-tT??MNUcM^3Rs|+KK~O)tCkyGk0g5U*y>(_hCZk`wc+OG0(<lNw
zGM=P8CJG(73W{_*=TdsSawW99Dl7WDJm!_W0+8-Jn>YtOtLS(<5|sqKPo|7J6v_BI
z7oE+!o`teJfKvv%;Nr5pybQoR6U+m<e0#<`xjMl-mI@O*%QLAvz7QupbhuW#&PHB4
z#=<5&z;*z=E~W0dlLQI8?lto}&q!1}FG&Hr3GHpX_`>u%DciuiQ~{K`lP$43k{<^>
z0&*9;bl5t)Le*5fOTJ~iQ0H~LZsJZo^}EMByEHvKP*He2<gyCATSqUwq!6OK3nN)P
zsuy^??;+&7e}aQMoaEs=sf!yuB`HHa_(;{fRzm|l$pwnMfyksh@^8Mp9LPVr(|mxu
z+NT;kxFxu}gejFf_vBAJYDiK%P^p)^-!&b*Iri1O=La*p-L{dt8)bUB2EUR#&a;_2
zxmXOltlO5k<r&bt04=P#2NYGj(1(#c)HVgZw5%Vz9VC-G!t`f3D8n4RfHluN)ze`-
zCW^H?H@QAMCsfTl0!O$!W{4&|OGhHT&Cv}y$NoJ%t@b#)qMT(rf4~L2fZM7(M(j>K
zSsNKV>o+F6<ZRMBuTa%H<1BkR$9RZ18={1~;t38tW(zvJa@Np1>4TO#J-)~~>wG7?
zaKmxDu&a|i8&?uNa0r;Z;p-Q@AfJT0ve0EY^--ESW>GY~TT?PTxf%62EkT()>rhBM
zMJM7rt(nKWw{NGsxR|rN9Njg%1R&zNwi}tehf-F%)*Gz6aUzktc@H)`R1JE%O4s5%
zW-2PYP&))YA^|ZxFaQ7m000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z0000000RI300000;0@+HK=JB3jJ9w)$9wI(xs<s)yHSO^<?T?siSCL#`409w1e)Qy
ztHR7Xq^<(J&q*q|*a?(8A=|V(D@O0UWGzNKdkiW(dGTJoSOV6(5Qw2XC&(_m((ARn
z)7)jeu@eux>Q(=|{6w%i;g?CgVzEj*s{K<v!@cdi*9B?3AR;)sJo7)iYDo?}%R`;K
z7D5X=3X!6_I}q2qSI|Pdjd()6Rr+r{cfIXA7Z?vb(j1jME7!6-#9uT#%h#Yh&#dD-
zOJl)2mW&fUlI?{%%?!S~Z=R1lcfHU%p%W}Sd>|S;ion~uYUt;?H^8DiPxgsCJQL}>
zlH-9pe=D858`#6Tw@rw<LaWcbD*lYS$#OHi5Z9W#CqD1I#KgBeDw;Svkf@owu(29E
zlyb^Ez&xG23zIgyaQ>9MZa{%L64DwvLUIQ?ekdM2T~7nNlt?K$O8B}wAWBxe*yJs}
z$@CRD3DEGoGI|8OJ9>yb*|KQ8LYMEn``-JzVi74k#a7)rH9_CJ7u)K*pj90_K$gfm
za4n%b${PSY4>#evm<!Omv|x}tW?E*v@bewK$B!Vqm>u{$r+w(W<p+B_B9*K>Gp=5`
z)*B{0&TBEc%TxTjP}Q)!S-_pUA~qyEZh`o`ciP9iyTXJ#9NvdJfy4$p1ZI%Db1JXA
zxdxcLy1aqB$E*B2e0lr3Y`*ckC)#_xz2HSU3*jX?VRgW}ROtykaz3m)VmIDA2G1b8
zWl27|N5UdK;00K`gr5MqE0r|7?j+GXg1yqb#8WCgxfO~$@Q^J%;*6R*(54MM*;>84
zB(x;E^C=I!>(*(!fa`0yRhhaxq1+}r#^evZG_KV=kEJZUHQRJMxH`E!HU<*BpD%*E
zoXMcP4i|Sk+?LV201AdY9!Sl+^XBNhAMB4je*l*|uTwX?yxc3ib3FvRa@@H(_|&jF
z8pRAd8p4;n^fjeBxSqE>nJrj7A<WV{conO>LRb;KwZnG3Ff-9SrFr@~^t|A^HmHld
zxXJ`Q5}i{$RV<u5q{WRn-5w-7^?`i6u`!Lj+j77;Fc!Z&3-M7rrmeZWrV<>yD$T&U
z&e?iAh7`3tXLbBL8SF&6?5l;mVy;X)mp~4@G-_5nM#&4kc043Jyp;tz@8e0k?Qokr
zbje1$W9gGT52gS;ArX%|cM}CX0#N)rhHXSV8|p(my{eo%n4Awi?@Eij<mnnbscieX
z@@*`<@RuV!)L;2JLyV9+97RXGq}e6BV@T~hVZNq3S5J~VFT&|O{L@pr1r71M+Z@`w
z-n~Y==59MZ?L?Y9(rL82#%031xDqiuT|?!&CS=Y!L|bA!7L6%ARXheg@+NsarIVOE
z?!=rtr5?1rJo2u*WGYd-f^dmEd8kP|uB9nFnd61Lyw`6#6wfTZ7(%c+9ZVg)nNJTr
z(+_1l^U|j|{;Ev8x+jA?7K1N6%Q{^>r=A!*HjAG;@b>CF88b4y^0+5F*qeMj<ZI76
zxOTfd&Zsdv!w8GJ6Ddi%Z%n{D1^CswzT4G2`1onOOsfIC&k1NdaJ!+q&Fjg$+1p*c
ziIK9r#2?<fY4rBHfUz9CYEBP4lXu}ebm5*j(hp|5VaxwJ2A0h{5IXlefG9z{b?5s#
zu3WS_R!t|o=j0!~s)a7R+2H)Us|C$GNzE!fn=lN$L?X*Oe)=CgK!{~KNeQ34bV{E(
zmrVsd9bYTGj-zb61ptdYiVNU7B_t)h7mkCx@@VP2pxDbit{N3P^xFnKn=s3}F8*)4
zS+Ytz<;i%wzApj2GTB-@@IYcbewREv=kcCAw`kov__oly+0F|+<Wy6<pi8N{ta;Ns
z?}))W5Q>1jb-ndGb)t+sx0g#jNDmD?ON>H1<hx<Ko(ow!SPB8XB$<%A9w6L2OJxK+
zkYvWZOn%|K0@r^#-tnNk5{y$kRZ8wVR-hR@k^~F91jEETFE<oCjb64qUt5X1nSp{m
zBoCy#jI;ebj*s`eq|(JZ7rjWlvP9QBbv?_xxQ%K&0oqT!H={qjGK(oa?#Mel+%|JP
z<e#^^`Edum#?o~=YE)LZy!X4jxlaDPHk78j>8h$cr%)I?2?d=zE*)UK@Y^7~-~?2=
ztE(zK7Xrn+6+PcO@WX~Y3$9Z<{3}MhqVV-R`}m4H-HNKbr^OOI>;>z*K<6YqYv~8P
z;#(IyGRLaBSdDAE*D-%QQ?M#M1?0uLMC*+_?j-NLL!AD+Mh?Nd22G~C(&0V4emi13
zPaDd-y+BU9pbJAhVq$^36B%DU&a1q<;fe{pr|rr+{lq}L6dr{<c?_UBUKzhUrqNWq
zYWV*=d9RtgDX#ZAF<AmU%Tm6(^VDfP#HqwQs^kd0hwqF$A2t;|ggzX-Pdc_d&uJ1o
zz8DZZT(G)3$C_t5?F5{>qI}dka#6!P-`t2i1j94D+WVQi8OiZHKmcvLnYMsDY2iz}
z5acGkNzza}RE`S0kXC@aTbQ!BW}dz~32L1@59uX5K~T>;>9i!gECG8w%M#nWd>GKX
zA)6e$Po5||%o3Tri|akSC0T|%32Ilnvqa0eJHL@UZ==CEnDkFPI1<P_4@*fr7qX?i
zyhYEuQksN3LfITV%4(%NNB`bD4Q#SIhv+=L6keA*GK=au>SY={JbTW(yI*%b7mSX(
zp5rb&oo9`_hFGS&VosVo%CaH6DH`6pMD<&|ha_!0bGgyH*E1eGq7ccvW<MjnrXVIf
z)@GNyFFz%`Mvlz9^x4iko32PaPI8qz3OB$!^Di4a70))jJFbAdh5&~=&9ss`&#TEh
z*wCZAVkp2oo{=BDq)pnq$%5ZJy{QPi2Q?GDwJ}&dLIc=2Ny~#grmxgHE9<1ZhG>Ah
z9&uv4(pDlnHS7L6Mf5?uppxReYhQ>w3o2|pWaEuIp~R9reQ2J%f;$5{#RWq=dmEX&
z4%NB5qRS3EYOyyvSJY~}U8%D>L>{HQT^Ce4{3u|%^=HL9PgRe*0@J0uwU$1;_<5&1
zVz}+Re>Mxe%udg_y*j@;4Z|osbK+yXaQ@0Wz-fQHAz7)sfw$2+&yAuyJiB?j<=UOQ
zgzA+$tB<EVLa=YV{?uJO)2Pe5nZ{;4Sd_Uu*uPl30)IO^a2F*zMK+&2DE1#b(i0&(
zx~_}7HGaoClCovICj-hn3}2AEH;OvC7=uN<C9Ji)f?eA@g23my#wPT<y_3AV<^*9p
zGRcX&)jFKIG<gcWCCwbVV0804W}($Nq6Y>&IBRV@P<4R3Ac8SG46hD6=hyAIoS)9T
zwW4;sD-01leP*V-OS?b2!ci2xOxWl;W0jS=);<cnZvGQIo;7H@mh~V#S8v+7IkpD8
z{;6j?8E=}r6sr@xE;Ro;TwnY=r!8W;VkZ<l<=bPs)L6Q@#vBGcPBBkBG~(4d@~Qy5
zEt8qN0NU9+aH^#|m}?0=?fnwG@&TGW2~~f&k0PkN)9yb!>7|c4Tj}gPR1}B1Wp3L$
z;)OOmyWEMqqRyMVQv4}Avr0X@4zFLlDUXRf2X%_OW>C30M^eqZ<a{$dj&`{`KvR1>
zp<{MDnz)xdbq;bnwTrU3?LCCN1Q$a*h9?F*lo?Sx6`AcjuYr8L!NnoG2TL40rY!8d
zZ#=R*Txu6R(iW_{_sPz@w;y^ux<`h*<jTgpT7H+hL7=fasRl_pDfE&&>>v=mP81(J
z*v&9JxG1|k?X&?pVz8V%vf&-Q?BC%!97fN&iClm>-7=y(+Yx2E5c{jVFzz9}ptnA|
zK?xnbLuZA&-K#&lB8l_6i+%?@M5}|kb@uSQDpUYGP^{0o!X#+C#;`xUGw++bN1{_Z
z$I~4<72wc3ITsT>6Be2}>kw1C{>H*Q?7C9BA_FbGE#|#DGyw-Zw3$RZQriZ-Pht~0
z(sGcz`An3%@(jDXS+y6uM{Bmc92rGCm&}$to9w_k{j|M14vbK|Rzs&eJZH7Ld&K!W
z!4+=2vXcosAT%>PK*xzZxM*EGdBC+iKH#Fen_$1Z>hD9nfTRJv9Q%^IbERlIwVq=<
zxiHE+0Ug1-k|?e_0hUd@5C8xG0000000000000000000000000000000000000000
z0000000000000000000000000000000000000000000005C8xG000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z0000000RI300000(#Xv`mDIjGhLy6sDUjkj53l39Ce4{ViZ|;#dj!0@=<AI<5>ocL
zM`EWuqSHscWjVV#TV(@18mNxD57#)n^rR;{l*;TpAS)w0e-EQPcjbP(?S=%rf`oRw
z?!_WKo5J-v&$R12Ybxuz0fjX^Im@0rrGARM@QNb43Xs`5GVwFJp5;5e`7=U2&_qHy
zXUuQBq%<zQnGiZW`SkoetEjv@sfeSz=Ts#;qya9x)fZhn*up`*xQvjz&H^~R{0fUZ
zTb&=gJk$6*+3uq}o^wz)RdwsUt$ZFk@Hfu84)8?0ua#^(d031*n5{d#q3fMI7qcO}
z)zMo!(A#%AiwnZMXCT_VZURs|3bQ3Vpj|gS7`_-heVe&F-`);9drq3X>i%lF-!}uj
z4GWXJr=R~k*GG*!%YL6cS*M}99&0?is-ttfA?{(kq7Dl^QWX=ttv{c<HRtC!03z1B
zj|#24bfKC$hkY!&LNDdJ*E@SWfr>pkWc(XEh7N){_d;~M_eYRCL1UmiLi@@)$7$|5
znFJraEngr!Yaj<a%-rQXJh2D6(XR))Vv)W)i}u1iY9lVZRTY6eW>o{cV3*yzWg0NN
zmuV6_)B_tlA)50$n+Pd9*!Lj3;I@4{h84a%7tbBMun60{#+Ge7$vX8r4sLb4qkx$_
z4kI+Y2i(XzV~Xy(^$WK<{16d5G*$DwV=J6H>j%I)$0b8Nx8Rw)c%<~arYzYz{~i!J
zd;+vP)qmZ*E&y6Q;00K``5DK&qJx<`o$ryn8DCnxB4T>Hya!r3`8InzFr-yHL?*qv
zJH$#ng&2>#q__*c`863l#$=8>ltsO~!0=$Z8yDw1=gKF&dvdkApZ5d3Ry7g5eu&Gv
z<?@?7BwN=zzYd7J4`-^pw<M`NIr3q=6=m(a{Y3-3N@S0_T9^*KMbj0$;BVHvhtN2>
zH?4BL5+kZS1Pk6g|Fe?3BGiyNS)UI)0r+yf6G^E%veA}005$%+0IRjU;b<j1&arvC
zuE>x(Eryx9x$Mh4JM8Ye6sR0MLS4-~97Er`R2VWm25&|@S-)hw8`~p2Y0_pqls&mS
z8%=;cCN?F!eA1=6LxPe#b8F{2ZZW33sWh-V=32_TuVe|lXODwCWzFln6_&I+hT3#I
z_mw`obzKs@-aucxM%RYCT?3iCbP6Oo3>{BAi4an}YE36P3Stwz>IBm~FS$-Teoc5h
zM)q>NH{A3(Ph`%#?uUK62C0C&8MZS$%>V<uM-bvU;w09*|6dxrdzwwW?sVllOOUF#
zRkR?yw>xvZ?GuSSI?R*2ezdZ@_F@OT9#a0gVUn1<q%&VV<oL@x2y`30xK9r|*|?ND
z$AQi}wqzMS1P25?a`X*6?yN7pnIdmF1lK7&h3|YkvvYbpnR8=2Qd2TKguLUs-)Nt^
z-24nY@~9s>fI*kLt)a*}EUKJ5QseBqk~Flu4yv|1v~-ueR>&s36U9Y5f;2)qX$;)F
zbw1j>^cvf{)3hr+RQZU!(C*2+FFU?G{E!2?20Oz!Kk<n?WS87H1{lA)EoKfq_+8CB
zYz%-r4>|O^f%U&U&Egik1=r2IAcC{J7MqZ~|274@k3kT-A%r!&H7<I*!j%3yAykq&
zRZdyFPqxoIz&16!6M?Wisa4=SI|nemM2$MUo4kX(71am5#c{DbOZ<pDLZqR*)R?+E
zx17H`U5HRR$JEcfbY1Ve05MKHXc8ek0MfWTTQWYpAkjy>Zr-fC#y}vv>|d9>*_Amv
zLKOr(p)t<9MSj&fm$ojv+19W;l5Xlewp9zgCY~g{&KPsNo}l|XxYgG?QXzf3w||m5
z?j^cAkIyE&-Q&GG<ndiRp5r{cj;OynYZP-lzXhAT1f?51{rJK=L2`*a%ivQyiJJa9
zDw3eQ+r{5I9$W;y`~p2Y1!j9YJ<txkui5Cl8$1_01pC0eV&3&T3XQ}&&}L3N0R3>h
z5Ud8gqZetq8P9<{x6Xz<cQul`+(}M6y;=i3=N62*^}mk0IQv69<%|G4aaDo4l)gSZ
zuU>(?E4{+Jd<%>>lh*jWSU6}sdE2hMzP|#zO}m0SFan-DAqd1d#Fh%Y_aNdt{;A?S
z!EFY-2@vf(QdJwhv)@iTQ&RUkYbxhDn)DAn3+6Vw;*%4-egE3Ke5fitIRp?r7*5Z;
z)a~cIY9#DD>u;|-&VSOopv79eaL$Un`J;5at4#qtR}qsuTf?qAP*y6vZrz$Y?noEB
z5__Dx>Iql7vF{zb@<2X4nsO1nxnkqIfrT_ZtHXo5jVrx6qDcq6$Hw(McuDO%LSqiR
z_vy4e*A$LCj+4H+*=b5VWeDdxFCL0KdB27{4ktpq)JcZC@S)hd3)7OkQZ(JXM;E5N
zxn-9;*>(6k5IGDx`<G=rAsf)VuD@5jM{-lUfj#Csb|nD3rXj66tCOL;bzjIkXZ%CF
zuSgd>ID?YBM$`_xeb<t_nP@ycGXDU)t?I$NkDnAgM}ibN?&N2@-pRi_?TU>&Hjs<F
zZ1P&XG_@GKfd?c!CF72}Y%ldZk~H}{8*4T@=WukoUB?zZYoYo)!P88<6xyY`5nmj<
zz-Iou9g#0QF0^F3G+C%TYB7~Orp7orMJhl&=e&o!5G%2{wZ=?5{oidnw9&A;sf1cP
zE}~mJFBObBi!7Ww=V3Rz<r)CIkGBLpLFIWo2k|Gm>3ko&psQdzJ(DFob1_o9l3}+v
ztUL_8c`KAWAiS_V=jyCHH$M(LkH>pFHz#8}mLQh8UdgOF6UV8%=wJ!E7^4roL>!Sk
z3lDHS4^@%8G`M^`8rBOvoOiammj~Xw=ex$dQ(*Wy8@`h~1>xblP^6o@o>1sK2xptR
z0O6;+0D~qxymv&rvO(HBGGstJMjPBbT|G>^l)nKyfL5$L%q;>vMU)9Wyqfd81olz8
z$2omGkKaJNe6xZ)jQAZq9XAR*%oY#5k7PbOV%bYP$QX#c`%$Vq3YgD4Ifx{@8=;N7
z8wmb9AS<@KG0ufN3q$I>F4S5)&7Kjw;0<Lv_ZkB`R$T@?m&1U)yx|BvbPnh|9F1)|
z8l8i@ou{fgS{a2rcq${j4~JO2>-sOdeR)p2m=Vc5D%dDIuS$Tt)qFZTrRfem17~Qu
zNiz$)V@mrwJ#d!1r#q=UwPW|Yl%#Mx1wW*_3WshzCGPjT<V!g{TxthB2WT%nqyy(W
z{t*E^Ve?o#0?W?1V=eBxCVr5+fT6X!yV|`x&w-J;ExWHgLZnPQMC!#ngz3Y(2W(xu
zlmoCk(xkb&ly*hB&kRw#{Xo__SnblhL*oNG4$6?afjj!V<l3J*4^#-f{@o$GJPKsI
z#GuW*a~eK7ge|;0v=E;=?!<9Am*AE>E+YE7xO4$LH9EjK4cDT)%11l9EzqSr9voOa
zc><oinK)#;L9bOh_qq5y>NG;T9OCV|xj#ld=!TX&=ag?eA>q?JTrp8R3B8s)!Xewd
zwdevn869OjhpK=&ZpGs~qTne!8L%w8%Z9N#i|GnH3!GQH*FOq9!Y5w5&?ZK_J+BA6
z*fF5GQhJoT72tHcx7-swjqD0NkVI*_vjhb_PoSebB9!;L(4C>X<aIf_UTZbHATPMP
zIj!P5Pm8QPD!2PQ)Y~Vz7#>`_eAef?+XPuW9#4%t;7Dq``hktQ>d)Q0OEc;`v)n#A
z&j=~J8|?<Wat=^D8(l^_NA0z{VwB%Ja*x@(cf(D)sd$RKUX49FM~6qdT}LRq583@Y
zA;-8oIi6KL&lN1a9$eQu*rpCVx}BLjCfWwQw?;labmE;nZjxiXce$@SeBW8TPxIfr
z=dQZE#UJoIkXfWW7Y$*&;AOEn%L^4f{_><e;Sn9Z5ONy5Ivj;N@?}iB516C7hk8)F
z?jAC`FF<v>VG6Fiq5yw6Io2dQfeTAK=@-Ac@2+~fvmzfoTv^e&(odSaaHiKgFfAIq
zWonH)-tB_C)=&*S9o`?i267oXgVA_9e45}o4W1>uKHiQz{wsRCo2P8M#Jr@u#aY-q
zg281xh$V!(WI(IDoSVKp5C8xG0000000000000000000000000000000000000000
z0000000000000000000000000000000000000000000005C8xG000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z000000096100000?;#w!D=QVfuLf+pjWnt}Ir6@|lxBWBLwEH&7iQ-?8*+O)S;7Lk
zlUGMPwTN!KE0pa#(5!L1`BSet@fm<T3&~=<H@U>SVG7i{{!KZ(DJL+z|Cx+D;v~(y
z_|K<2W2ln6!y4|pWkuvXp{%sLGZUb^-Ts8Uo+T$coWM1_j+d%DM^Z(-tAWNm>Sf}*
zQ%o(q?@F>fXM43e^hp%FlmYs?9W2{C4#0Cf{#PQr7-NP#>pX@zU5OSv#72+0^MGVL
zYujAA#$+!%b$1TC^yPLuZCWzD{k<bSj?u`xY*q0*9<rjm&o?H#aiW5}t+t>%Y^;ww
zegp`-X7>cW+_kbi9%{5a9gIUeKHs#w((^Svj)p0^M%jcsboh_Dree)J;f-cIo!HjA
zif{qFxk~A}X)Y8zOW<U@do=326MR`bqVxj2Dqs7%@@)OPorLo_eb%?UwNF|+8HMXS
z{m~ISu7*rH?Qs;mg11RLrsD0q^2af|TZBeE0sB=v)B(yoj~U852M<!ZmzpBIz_~fR
zy!sG4SRJao1?Vchi8S`RsoX9+O46b{6z!e7*eMA;V?>0!cl2nx-*1vTk{Or0PT6?8
zG`fep=B_uq@lXIe8NCZURsYbtqSUCn?3V00-jg9c_RX(60afQa3FtJvg0DI}BId-r
z@-^+etE$?()82Tz*<6mhJGZC2X8TjS{c?r8si#pr4uvi}_l1_bJRkr&)2_ulEWa7N
z(lcJW`(6>clGr@F*r=+!=^zBW{mPiUhh(a|L^r3rcrY2fxDO>gQ$YhgkeI~0D{kF9
zFlO<)c-*VJ8FW#+|F(`i6f%c9Cp%F*#l9H4Ill0_Jm4<8H<=*4cNTWKpfkWcXL=62
z&E^?74{JR<-20X~10wvq%RYp>V$?>wN28s*<{p(h!dm(|<Z4_zA~<R~0Fd1~l?6z<
zYQ3hs0FoZO$Jwhqi(dLXxhkkU?*A;k#5%}4|JMpUr|&a8C2lCa77~KIH4XJVes@B=
zF*=mI5Hs?-QZn+qSOVU=M`>L=x0?n$Wf3mA0rfGvw{Z-+xdRA1O1nn8yCwWQMs?u1
z>NqvLytQvUrcg~gESAl@aAQe5H0%ewqzJS;MsS_G6d^>s`2w;$`S}jLy5FC=C%g2#
zM?p2bjNAA;U7J0-raOf^OLEn_6%OCLf(~E24!@~8o>i+nwuhcNp?+z*SuoDKP<_d~
z?1}9>)t?JIUUAqwa_9*>UF$D87YTSgs=M90ZYUtUs&!{O9Q)n89&50?$J9eS_c%qn
z+1dBHJ>amrJ`k(C_vHsY{IH+AtaaEsP3nTXmz!QZ3Hl~H{`Mif5Xud^32$|~5C8xG
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000KmY&$0000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000005C8xG00000004kL004kL004kL004kL004kL004kL
w004kL004kL004kL004kL004kL004kL004kL004kL004kL004kL0RRC1{~Twms{jB1

literal 0
KcmV+b0RR6000031

diff --git a/test/data_for_tests/embedding/small_gpt2/vocab.json b/test/data_for_tests/embedding/small_gpt2/vocab.json
new file mode 100644
index 00000000..8f9feeda
--- /dev/null
+++ b/test/data_for_tests/embedding/small_gpt2/vocab.json
@@ -0,0 +1 @@
+{"\u0120This": 0, "\u0120is": 1, "\u0120a": 2, "\u0120demo": 3, "\u0120sentence": 4, "\u0120another": 5, "\u0120this": 6, "\u0120text": 7, "a": 8, "\u0120model": 9, "\u0120voc": 10, "ab": 11, "<|endoftext|>": 12, "A": 13, "B": 14, "C": 15, "D": 16, "E": 17, "F": 18, "G": 19, "H": 20, "I": 21, "J": 22, "K": 23, "L": 24, "M": 25, "N": 26, "O": 27, "P": 28, "Q": 29, "R": 30, "S": 31, "T": 32, "U": 33, "V": 34, "W": 35, "X": 36, "Y": 37, "Z": 38, "b": 39, "c": 40, "d": 41, "e": 42, "f": 43, "g": 44, "h": 45, "i": 46, "j": 47, "k": 48, "l": 49, "m": 50, "n": 51, "o": 52, "p": 53, "q": 54, "r": 55, "s": 56, "t": 57, "u": 58, "v": 59, "w": 60, "x": 61, "y": 62, "z": 63}
\ No newline at end of file
diff --git a/test/data_for_tests/embedding/small_roberta/config.json b/test/data_for_tests/embedding/small_roberta/config.json
new file mode 100644
index 00000000..4814927b
--- /dev/null
+++ b/test/data_for_tests/embedding/small_roberta/config.json
@@ -0,0 +1 @@
+{"architectures": ["RobertaForMaskedLM"], "attention_probs_dropout_prob": 0.1, "finetuning_task": null, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 16, "initializer_range": 0.02, "intermediate_size": 20, "layer_norm_eps": 1e-05, "max_position_embeddings": 20, "num_attention_heads": 4, "num_hidden_layers": 2, "num_labels": 2, "output_attentions": false, "output_hidden_states": false, "torchscript": false, "type_vocab_size": 1, "vocab_size": 68}
\ No newline at end of file
diff --git a/test/data_for_tests/embedding/small_roberta/merges.txt b/test/data_for_tests/embedding/small_roberta/merges.txt
new file mode 100644
index 00000000..2af8d178
--- /dev/null
+++ b/test/data_for_tests/embedding/small_roberta/merges.txt
@@ -0,0 +1,39 @@
+#version: tiny
+a b
+c e
+e l
+e m
+e n
+en ce
+en t
+h e
+he r
+i s
+o c
+o d
+o t
+ot her
+x t
+Ġ T
+Ġ a
+Ġ d
+Ġ is
+Ġ m
+Ġ s
+Ġ t
+Ġ v
+ĠT h
+ĠTh is
+Ġa n
+Ġan other
+Ġd em
+Ġdem o
+Ġm od
+Ġmod el
+Ġs ent
+Ġsent ence
+Ġt e
+Ġt h
+Ġte xt
+Ġth is
+Ġv oc
diff --git a/test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin b/test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..73282346e7efbf48855b12c48dbce9191cbd36f5
GIT binary patch
literal 29928
zcmV<EbQg<&0*VT3{G3MlAZn;k87_bVP3Z$JfC7DS04P`x0001Ra&L5RV{dF<c4cyN
zX>V?E0Zr)xSPcLG0BmV=bZli`Wo~3?VQz5(h*$~$004A(aAjX}X?kUIaRYsE1SnVq
z0001UXm4_KaRo~PSOWk60BLS?aRy5SSOfq70Bmn=XK@Eh1a);TfC6J<Z)|L3V{~b6
zZgUDxa%5$4Wn@HYV{{5}04Y*&0Vr4@0001GZDM6)WNB_^b1rvpa%5j+ZDM6)WNB_^
zb1rvfX=iA3aROs>Z*pU3E?;$YX>4-}Uvgz)b!lv5Uvy<|b8m8Ab}|Za11Kn12LJ#7
zb98TVVP|D=1Y>k>a${%;Mr?0kbW?P1a$#p>3ULKk4*&oFF*GnXI506cI59UhGBYx9
z23P|C003ifb#VtxKm<;7aR^aM082zm5QcFHOAt!|hH(msXaFfvaSL>D3{r6oSS0`e
z0A+1rWn^S&ZfA2YaBp*IbZKvHUuA7#Wn^S&ZfA2YcV%g3XmoK7XaguHXar~lSPuXI
z05LQ$HaIXbH#jmfH!?9caSvz*O+W!oba4<-O8`q0OAv-}5lawD0fuoBiD&>RQgIV>
zaTHQ<6<8+#003oeVr67xX>Mn8E_82eWo}<|d2nT4Wo=?*WMpY>XLBxhWoc(<ba57F
z11KnH1ZV|V4*&oFF*GnXI506cI5aUgHaRwN7ib4d5KeS)7*R_AO94v|hH)875K94u
zaT<we04Y*&8+36TQgIzv8vp<RWo=?*WMpY>XLBx0VR>b8PH%E;E_Y>VXJ~YB9%ut7
zC};#|1y~ON001#GFg7?aF*i9iFgY+cGI1Yh2TKr6ba5b2O8`p{g>fNE0fliQiD&>R
zQgI`6aU@c4C0H2%003oeVr67xX>Mn8E=*y0WpYk$a&0bRX<>75CTIgFC};#|1y~ON
z001#GFg7?aF*i3kI5#mhGI1wp2TKr6ba5zAO8`p{g>flM0fliYiD&>RQgJJEaV%1C
zEm$i6003ofV{c?-axQFPd1Z1gFfL(qbY*UIX>V>Wb7gF1E^&2ba(OOyWoc(<ba5_d
z11KnH1ZV|V4*&oFF*GnXI506cHZm|cGdMJHFK7o%00B;PaWGL!080={5QcFvOAt!|
zhH)~9XaFfvaWiyrG*WRjSSbJi0A+4tZ)9b1E^J|WWpXYsE@5<ZWo~q7Z*DGgWo%|H
zadl;Kc`jmUVRLadXaguHXar~lSPuXI05LQ$HaIXbH#IXcIWjOXaW`lOOAt<UaX3*+
z080>saXCu?g>gEGXaFfvaXWNzJW_EzSSbJi0A+4tZ)9b1E^J|WWpXYsE@5<ZWo~q7
zZ*DGgWo%|HYh`&ZcV%g3XmoKtXaguHXar~lSPuXI05LQ$HaIXbH#RajGdVakaX)AW
zO#lH-ba6mYO8`p{OAv-}K}!%z0fuoxiD&>RQgK6caYRyaMOY^Q003ofV{c?-axQFP
zd1Z1gFfL(qbY*UIX>V>Wb7gF1E^B3ZE@Ek6b8$v!11KnH1ZV|V4*&oFF*GnXI506c
zH8V3gH!(7CM`#C25KeS)NKs1wOAv)|NlO8RaY~7304Y*&OLTEeQgKaKD*ylhWo~0{
zWMy(LY+-q2axO3~VRUq5ZggpHZZ30WY-TQYVQh6}E_Y>VXJ~YBPG|!tC};#|1y~ON
z001#GFg7?aF*h|cGdVaoIB`#C2TcG0PIPflQA+?z5K9n-aZyVUO96&)Qi*5)DN=D$
zba7NtaaC9;0001GZewp`WpXZTVR>b8E-)@(baZ8IbZKvHE^}pUW-fMNY;|QWVrgM>
zaaL#pC@5$IXa!gg0000nG%z+eFflhaFg7+ZGB|NpXa`FWPIPftQA+?z5QTABO96#(
zT8U@?DN=D;ba7l#aa~v~0001GZewp`WpXZTVR>b8E-)@(baZ8IbZKvHE^l>oaCLMp
zWMyu1WiEGRX=iA3ab9QxC@5$IXa!gg0000nG%z+eFflhZGc+(VH!^WwXa`LI0Zw#r
zU{Ol|OAt#ChH+s_5K94uabk&R04Y*&V{~z3QgLNiD*ylhWo~0{WMy(LY+-q2axO3~
zVRUq5ZggpHZZ2<ibZ~WaE@WkHb7d}KX<>75W@rN_C};#|1y~ON001#GFg7?aF*h|c
zG&43iHgRWY2TKr6ba7}=O8`p{g>h+10fljDiD&>RQgLf^acoj?ZCEh?003ofV{c?-
zaxQFPd1Z1gFfL(qbY*UIX>V>WZ*_EVb#yLFVR>b8PH%E;E_Y>VXJ~YBZfFB2C};#|
z1y~ON001#GFg7?aF*h|cH8wXeG;wce2TKr6ba8M|O8`p{g>i990fljLiD&>RQgL&1
zadc8~byzO|003ofV{c?-axQFPd1Z1gFfL(qbY*UIX>V>WZ*_EVb#yLFVR>b8PH%E;
zE@Ek6b8&WP11KnH1ZV|V4*&oFF*GnXI506cH8V9fI5;qIcW4Jo5KeS)cu`9LOAv)|
zc}oF>ae9ep04Y*&dvtMpQgMA)DF6TfWo~0{WMy(LY+-q2axO3~X>N37a&2W~X<>9_
zE@WkHb7d}fWoc(<ba8%Y11KnH1ZV|V4*&oFF*GnXI506cH8V9gG&eGFe`p6yKmks4
zaez@v081205QcGqOAt!|hH-+4XaFfvaf5Vmgi>*ZSSJ7g0A+4tZ)9b1E^J|WWpXYs
zE@^IbWpZt0WNBe^WiDi8ZgXWWVrgM>afWCEC@5$IXa!gg0000nG%z+eFflhZGd40f
zG&pgGXa`FaPIPgIQA+?z6oqk#O96#(iiv0dDN=EZba9MQagA6b0001GZewp`WpXZT
zVR>b8E-)@{b#!obbS`9NZgXWWcV%g3XmoLoXaguHXar~lSPuXI05LQ$HaIXbH#IXh
zGch(eagS&RO+W!oba9YTO8`p{OB9B2kxLXy0fupsiD&>RQgM@Xag<VVl~^GF003of
zV{c?-axQFPd1Z1gFfMO(bZ~WaE@WkHb7d}KX<>75mS_VgC};#|1y~ON001#GFg7?a
zF*h|cHZwRfG;x<`2TKr6ba9wbO8`p{g>jin0fljziD&>RQgNGfahy_domeLT003of
zV{c?-axQFPd1Z1gFfMO(bZ~WaE=*y0WpYk$a&0bmWoc(<ba9?&11KnH1ZV|V4*&oF
zF*GnXI506cH8VCeGB7xCpJ)e55KeS)pixTzOAv)|p-TaUaiWQ604Y*&qjYhkQgNkN
zB>(^bWo~0{WMy(LY+-q2axO3~Z*_EVb#yLFVR>b8PH%E;E@Ek6b8)6<11KnH1ZV|V
z4*&oFF*GnXI506cH8VCeGdMJHr)URD5KeS)s8LG*OAv)|sY?NcajJ=E04Y*&t8{U!
zQgN+VD*ylhWo~0{WMy(LY+-q2axO70VRUq5ZggpHZZ30WY-TQTb!BpSE_Y>VXJ~YB
zu4n@&C};#|1y~ON001#GFg7?aF*h|cHZ(UdG;yzJ2TcG0PIPgwQA+?z5K9n-aj{Df
zO96&)vWaK_DN=E>baAv&akW?}0001GZewp`WpXZTVR>b8E-@})baZ8IbZKvHE^}pU
zW-f7cWpa5gVrgM>akgj!C@5$IXa!gg0000nG%z+eFflhZGd46hI52UyXa`FWPIPg&
zQA+?z5QTBMO96#(x`}82DN=E}baA{=alKe60001GZewp`WpXZTVR>b8E-@})baZ8I
zbZKvHE^}pUW-e=Gc`kQlX=iA3alU8+C@5$IXa!gg0000nG%z+eFflhZGd49aH8yd-
zXa`LI0Zw#rz)?#8OAt#ChH=455K94ual(mc04Y*&!*p@PQgOvtCjbBdWo~0{WMy(L
zY+-q2axO70VRUq5ZggpHZZ30WY-TQNWqB@QX<>75#%Kd5C};#|1y~ON001#GFg7?a
zF*h|cH!?FdGI7Uf2TKr6baBX0O8`p{g>lJC0flkOiD&>RQgO?4am-S2%~&e{003of
zV{c?-axQFPd1Z1gF)m?rbY*UIX>V>Wb7gF1E_PvTb!9GhWoc(<baBpT11KnH1ZV|V
z4*&oFF*GnXI506cH8VFdH8MDH&u9lt00B;PanMmq080={5QcHlOAt!|hH=t~XaFfv
zanp2h)KYQPSSbJi0A+4tZ)9b1E^J|WWpXYtE@5<ZWo~q7Z*DGgWo%|Hc42IFWiDcA
zVRLcTXaguHXar~lSPuXI05LQ$HaIXbH#IXiGB`9iao1=EOAt<UaoABy080>saoI}&
zg>l-6XaFfvaocop+){DfSS<hm0A+4tZ)9b1E^J|WWpXYtE@5<ZWo~q7Z*DGcb#!ob
zbS`9NZgXWWcV%g3XmoMjXaguHXar~lSPuXI05LQ$HaIXbH#IXjG&eRlao=bMO#lH-
zbaCKOO8`p{OAv-};Y$!p0fuqniD&>RQgP#SapY2Q<yb2K003ofV{c?-axQFPd1Z1g
zF)m?rbY*UIX>V>WZ*_EVb#yLdWo~n2E@Ek6b8+Tq11KnH1ZV|V4*&oFF*GnXI506c
zH8VIgIW#nJ=V%8@5KeS)=ut}mOAv)|=}Q5Haq5X^04Y*&>vVDKQgQ8AF#rGnWo~0{
zWMy(LY+-q2axO70VRUq5ZggpHZZ2<ibZ~WaE=*y0WpYk$a&0bmWoc(<baC!z11KnH
z1ZV|V4*&oFF*GnXI506cH8VIhG%z@E?`Q{05KeS)@KH+uOAv)|@k;@Paq@|104Y*&
z^K^0aQgQWIF8}}lWo~0{WMy(LY+-q2axO70VRUq5ZggpHZZ2<ibZ~WaE=*y0WpYk$
za&0bRX<>75_GkkrC};#|1y~ON001#GFg7?aF*h|cI5;*nHgWf82TKr6baD7mO8`p{
zg>m^y0flk;iD&>RQgQoqar{zo{a7gg003ofV{c?-axQFPd1Z1gF)nFtbY*gFWn^h#
zbY(7NWo~n2E_Y>VXJ~YB{%8XzC};#|1y~ON001#GFg7?aF*h|cI5{vhFmeBA2Tec$
zPIPhr0RR9|O8`q0OAv-~0RaF2OAt!|hH?S{004<-04Y*(0|5X4baDg%002^Q1pxp6
zSSJ7g0A+4tZ)9b1E^J|WWpXYtE@^IbWpZt0WNBe^WiDi8ZgXWWVrgM>as~ka0B8d!
zC};#|1y~ON001#GFg7?aF*h|cIW##rGI9q2003wQOB7CYatHwc08vW-OB9822>}2A
zO96#)3IPBBiD&>RQgRCc004Ay3;_TDQgRIe003Ac0001GZewp`WpXZTVR>b8E-@}|
zb#!obbS`9NZgXWWcV%g3XmoN80RRAK11KnH1ZV|V4*&oFF*GnXI506cH8VLiF*Z1I
z4*>uGXa`L|0Zw#s5CH%HQA+?z5K9z>auERl081200furC0RRArXaFfvauWdn0CaK`
z0RR9}auopp09YXa003ofV{c?-axQFPd1Z1gF)nX)bZ~WaE@WkHb7d}KX<>7676AYN
zXaguHXar~lSPuXI05LQ$HaIXbH#IXkH8V6cau)#r0B8qG5KeS*7y$qPQA+?z5QTCX
z0RR9?0fllJ0RRArXaFfvavK2v0CaL30RR9}avcEx09Ypg003ofV{c?-axQFPd1Z1g
zF)nX)bZ~WaE=*y0WpYk$a&0bmWoc(<baEa6003wMC@5$IXa!gg0000nG%z+eFflhZ
zGdVRiGB9!<0RRAK2TKr6baEg8002=-080>sav=c#080Udav}i$0EuV-DN=GH0RRAW
zawGu&08(-#0RRA4B>(^bWo~0{WMy(LY+-q2axO70Z*_EVb#yLFVR>b8PH%E;E@Ek6
zb8;pD003wMC@5$IXa!gg0000nG%z+eFflhZG%z<aHZXE00RRAK2TKr6baE&G002=-
z080>saw!1-080Udaw-7;0EuV-DN=GP0RRAWax4J=08(--0RRA46951JaBpvHWpXZL
zWo~n2E_Y>VXJ~YCE&%`lXaguHXar~lSPuXI05LQ$HaIXbH#IacH#IXhaxVb@0B8qI
z00B;Paxeh^08vW-OAt#ChH^0h002u6O96&*G64VpiD&>RQgSl^004AyGywnrQgSr`
z003AK0001RZ*Od6axP?LZgXWWVrgM>ay9_~0B8d!C};#|1y~ON001#GFg7?aF*h|d
zFgG_bGIBQo003wQOAt<UayS7108vW-OAv)}IRO9wO96#)IspIxiD&>RQgS;1004Ay
zJOKazQgS^3004D;ay|h709XkC003WYWprU=VRT_~KLG#$XaFfvazFt904P`h0001T
zK>+{&eR4tp003AA0001XWpZ<AZ*Fo!0RR9?0drUi0001GZDM6)WNB_^b8<uh004b*
zMF9W+YC{14080ULSQ-ET0A+1rWn^S&ZfA2YcW-iJUuA7#Wn^S&ZfA3HMgaf-eR4+u
z003%30RR9?0drU$0001GZDM6)WNB_^b1raib7^#GZ*E^@ZDM6)WNB_^b8<)l004b*
zNdW)=YC{14080ULSReoZ0A+1rWn^S&ZfA2YbZ={AZeMhHaAjX*ZDM6)WNB_^b8<=n
z004b*O921?YC{14080ULSQG#N0A+1rWn^S&ZfA2YOksIta!zk@ZE{Qj004b*O#uJ^
zYC{14080ULSP}pL0A+1rWn^S&ZfA2YWO8qCZ*_EXP5}S_eR59$003%30RR9?0drUf
z0001GZewp`WpZ**0RRAfa!~;Q0BS=4002t?b65=k003ofV{c?-axQFPd1Z2PQUL$}
zeR5L)003%30RR9?0drUn0001GZewp`WpXZTVR>b8E--Rb0RRAfa#aBU0BS=4002t?
zb66Pw003ofV{c?-axQFPd1Z1gFfL(qbY*UIX>V?FRsjG2eR5X;003%30RR9?0drU$
z0001GZewp`WpXZTVR>b8E-)@(baZ8IbZKvHE^}pUW^z~o004b*Spfh5YC{14080UL
zSR?=d0A+4tZ)9b1E^J|WWpXYsE@5<ZWo~q7Z*DGgWo%|Hadl;Kd2(6-004b*TLAz7
zYC{14080ULSRw!b0A+4tZ)9b1E^J|WWpXYsE@5<ZWo~q7Z*DGgWo%|HYh`(ITmb+8
zeR5p^003%30RR9?0drU+0001GZewp`WpXZTVR>b8E-)@(baZ8IbZKvHE^}pUW-fMN
zY;|RFUI73AeR5v`003%30RR9?0drU;0001GZewp`WpXZTVR>b8E-)@(baZ8IbZKvH
zE^}pUW-erMZ*XsQbaG$;004b*VF3UDYC{14080ULSReoZ0A+4tZ)9b1E^J|WWpXYs
zE@5<ZWo~q7Z*DGcb#!obbaG+=004b*V*vmFYC{14080ULSSA1f0A+4tZ)9b1E^J|W
zWpXYsE@5<ZWo~q7Z*DGcb#!obbS`9NZgXXFWB~vGeR5?1003%30RR9?0drU?0001G
zZewp`WpXZTVR>b8E-)@(baZ8IbZKvHE^l>oaCLMpOksIta!zk@ZE|J-004b*X8`~J
zYC{14080ULSSSDh0A+4tZ)9b1E^J|WWpXYsE@5<ZWo~q7Z*DGcb#!obbS`9aZ*XsQ
zbaH3`004b*X#oHLYC{14080ULSR4QV0A+4tZ)9b1E^J|WWpXYsE@^IbWpZt0WNBe^
zWpZi(004b*YXJZNYC{14080ULSRw!b0A+4tZ)9b1E^J|WWpXYsE@^IbWpZt0WNBe^
zWiDi8ZgXXFYykiOeR6F9003%30RR9?0drUu0001GZewp`WpXZTVR>b8E-)@{b#!ob
zbaHM1004b*Zvg-RYC{14080ULSR4QV0A+4tZ)9b1E^J|WWpXYsE^l>oaCLMpWMyu1
zWpZ!<004b*aRC4TYC{14080ULSReoZ0A+4tZ)9b1E^J|WWpXYsE^l>oaCLMpOksIt
za!zk@ZE|t}004b*a{&MVYC{14080ULSRMcX0A+4tZ)9b1E^J|WWpXYsE^l>oaCLMp
zWO8qCZ*_EXbO8VWeR6dH003%30RR9?0drUn0001GZewp`WpXZTVR>b8E-`X;0RRAf
za(4j$0BS=4002t?b66Pw003ofV{c?-axQFPd1Z1gF)m?rbY*UIX>V?FcmV(aeR6pL
z003%30RR9?0drU$0001GZewp`WpXZTVR>b8E-@})baZ8IbZKvHE^}pUW^#G~004b*
zdjS9dYC{14080ULSR?=d0A+4tZ)9b1E^J|WWpXYtE@5<ZWo~q7Z*DGgWo%|Hadl;K
zd2)OK004b*eE|RfYC{14080ULSRw!b0A+4tZ)9b1E^J|WWpXYtE@5<ZWo~q7Z*DGg
zWo%|HYh`(IegOageR6*R003%30RR9?0drU+0001GZewp`WpXZTVR>b8E-@})baZ8I
zbZKvHE^}pUW-fMNY;|RFfB^sieR6>T003%30RR9?0drU;0001GZewp`WpXZTVR>b8
zE-@})baZ8IbZKvHE^}pUW-erMZ*XsQbaH|L004b*g8={lYC{14080ULSReoZ0A+4t
zZ)9b1E^J|WWpXYtE@5<ZWo~q7Z*DGcb#!obbaI3N004b*g#iEnYC{14080ULSSA1f
z0A+4tZ)9b1E^J|WWpXYtE@5<ZWo~q7Z*DGcb#!obbS`9NZgXXFh5-NoeR78Z003%3
z0RR9?0drU?0001GZewp`WpXZTVR>b8E-@})baZ8IbZKvHE^l>oaCLMpOksIta!zk@
zZE}bK004b*i2(orYC{14080ULSSSDh0A+4tZ)9b1E^J|WWpXYtE@5<ZWo~q7Z*DGc
zb#!obbS`9aZ*XsQbaILT004b*iva)tYC{14080ULSR4QV0A+4tZ)9b1E^J|WWpXYt
zE@^IbWpZt0WNBe^Wpa!G004b*jR61vYC{14080ULSRw!b0A+4tZ)9b1E^J|WWpXYt
zE@^IbWpZt0WNBe^WiDi8ZgXXFjsXAweR7Wh003%30RR9?0drUu0001GZewp`WpXZT
zVR>b8E-@}|b#!obbaIdZ004b*kpTbzYC{14080ULSR4QV0A+4tZ)9b1E^J|WWpXYt
zE^l>oaCLMpWMyu1Wpa`M004b*lK}t#YC{14080ULSReoZ0A+4tZ)9b1E^J|WWpXYt
zE^l>oaCLMpOksIta!zk@ZE}<W004b*l>q<%YC{14080ULSRMcX0A+4tZ)9b1E^J|W
zWpXYtE^l>oaCLMpWO8qCZ*_EXmH_|&eR7up003%30RR9?0drUe0001RZ*Od6a&njf
z004b*nE?O*YC{14080ULSPTFF0B~<_Y-MsTWMyu1WpbJU004b*n*jg-YC{14080UL
zSP=jK0B~<_Y-MsTVPkY@c42gBZ*Fp&0RRAfa-9JH0BS=4002t?b9HlKE`S1EaR4Y-
z4*&oFF*GnXI506cH8U|eGB7Z40ay<J001#GFg7?aF*h|cGdVXgGI0V}4*&oFF*GnX
zI506cH8V3gI5;?Q16U6L001#GFg7?aF*h|cG%zwZGI0c04*&oFF*GnXI506cH8V6b
zHaRwN1y~ON001#GFg7?aF*h|cH8wXeG;s!44*&oFF*GnXI506cH8V9fI5;qI2UrgP
z001#GFg7?aF*h|cH8(UjGI0o44*&oFF*GnXI506cH8VCcI5aqM30MyR001#GFg7?a
zF*h|cHZw6cIB^PC4*&oFF*GnXI506cH8VCdI5ISG3s?^T001#GFg7?aF*h|cHZ(FY
zIB^VE4*&oFF*GnXI506cH8VCeGdMJH4OkBV001#GFg7?aF*h|cHZ(UdG;t1C4*&oF
zF*GnXI506cH8VCeI5;qI4_FTX001#GFg7?aF*h|cHZ?FcHgOPG4*&oFF*GnXI506c
zH8VFdGc_`C5m*lZ001#GFg7?aF*h|cH!?LcIB^nK4*&oFF*GnXI506cH8VFdI5aqM
z6Ic%b001#GFg7?aF*h|cI5amlIB^tM4*&oFF*GnXI506cH8VIgIW#nJ6<7}d001#G
zFg7?aF*h|cI5jjdIB^zO4*&oFF*GnXI506cH8VIkHZ?YJ7g!Gf001#GFg7?aF*h|c
zI5{vhFmV`I4*&oFF*GnXI506cH8VLhIXN<M8CVYh001#GFg7?aF*h|cIW;jhIB^<S
z4*&oFF*GnXI506cH8VLiGc+`D8(0qj001#GFg7?aF*h|cIW;vhFmW7M4*&oFF*GnX
zI506cH8e0cGd3`B9as+l001#GFg7?aF*h|dFgG<bHgO(U4*&oFF*GnXI506cH8e0c
zH!(7CA6O3n001#GFg7?aF*i0aHa0ObIB_6Y4*&oFF*GnXI506cHZm|cGdMJHAy^Lp
z001#GFg7?aF*i0cI5RmoG;tzW4*&oFF*GnXI506cH#s;rF*P!ABUldr001#GFg7?a
zF*i6dH#IUdGI1nW4*&oFF*GnXI506cI5ILfGBGxBC0Gvt001#GFg7?aF*i6gF*i0j
zHgP6c4*&oFF*GnXI506cIW;gjFgG%BCuJ@W000000000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000G00000
z00000000000000000000000000000000000000000000000000000000000000000
z000000000000000000000RR910000BGu=F=E4jRHL~y*I<K?_>iO{>^>jk~LIhZ^i
z8VEgCCFHvjfz~{aaf!S55)wUll1aSht_i$Zivhji@Yp<e6#zWS)?K_ttAabK;%7XC
zdxktdeY(4pj7_^E1`@nn3tqYu{USVkM6o;~|H(W}sIa^geB`{5PoBJCt&}^-W>-8r
zStvUbT$#Mnb%8ru6cN3TTiClzn*zOtioLt{am_qlLUp>ri;p^ELH;@iog_Sqp4>Nd
zJJ`G_gCx5PJ14y+C*V4TxW&8=R|h>rkFq?)NJ6~p#{oT92co<R>}fsZy%M}ulB+y&
zP`JAQPXxUNdU`v3{tZ0RFEqS%;F>yQ{=GZG58=Gz;!wO$k_SA+Njg1rNsqfY+OfPx
zlC-;GiIlrj98Nv?3bH%rJlMN^F!4NCRE)e?qT9Uk+=M(6gDpF7kq)}NF;Kjjdq%t<
zEUmnNwv#*pA9uT?s{y=tC)&L8tqMF~bXz-Rtv$PYVqCk)@dZ58Y8yNz3qQQH`Mx|(
z$oe=&JgmGS){i`;7RkGkmPR}U;Xk}Nqi4GX%1k`BDHFYOh9kR*Q^GtM6tFx^Y^*z2
zjYd3V2ot@c=QF(5<pR7meHOecWgxu@`((U1Rx~@(x{kcOQk^`!s@1#}Fq1qh5$?Lm
zJ373xWpukMiDW#fMVP!VP_w&;>kd2qDh9pL=99e0?ufh{`HVb@TB^GQ;yb&}ywE)0
z2?x9+6d<}EO^v#XrJ20D+`hb<lg>LE74E#RTavpsJ6Sxd6qr0-8@{}Yc8NS1h!4Gr
z&hWe7x|6(SKO#J1#R9w#@&G-<H<LVW(9FC9j{&_51Oz<t(MmgK%hEdZW6(OZly$s|
z5&S&G@wvPU<xsriwD`NdE{(h@mGC<<%u_v%q3pWr0D`$4MKw8BE`7ZIK72fkRU<s_
z^P{}xINdys$2L6GiIck+qc1$?5k<S#lq$Qqwcb3Why^@b*P^_Obe_AVO0&Ax@(Vpw
zIk7tTN6otgW01V+??bz^odrF>V$?il<b%9a_+Y%CyUsiAnwY$hv4lKbx7)nbdA~fY
zTYS5_@0q+QGsQc&ea5^>^vJq2trxshiU&P!eB?V{{E58BcE`IVjlH<4i0wLj2`@bo
zn0CEI;W53}Q?fh~(8N6BxCK1Vcz`@?5!5_B=K4H}#nHTK5SF~nX2Cj1w_3c@)W5uU
z-srnqN|d{=9ag+RX(>Gi{)xP(+=4to5pKMHdjh<HDC#_+!gjsS@zlFXB9A=Wk^j3?
zY{tB-&F?()%HTW!mi9a=)ik_Uj9WZ2I5a%wDA2q$0Qfv+_%glpZn8VwD$+dR=c>F^
zvogH<ckQ_V0RR910001!K`}dyA}qY?Gz`7nF2OtUc#}JnP9!|oUe~;>buPRCza>0|
zBc8lT=`g$_1cy1oNqxJ*bK|>~;}N`23Jg8mLxsE({93%3F-km_c<ei~v(~)X`;WY-
z9iY5H{mZ-!o(#P!kQ=-muduw~iSs+bpewvC8%(=byfZzt?ff}}v!Oc$b-ugxK)Ss7
z{H8o&mC?I}4BNcN=smr?jFr3jR}(z1D}=ly!r#2&Vr0Am#dST(>WaDqvLU_gdr3Vt
zcm=(3v7Nm4l><Dl_Z&UcwIVtSBQQOm28+C2u>d_vW<I?e&V;%h;_|%JdzQRso`t+Q
zdJ?@a4wgJqoi;q%snEO$&GEXm%DB7okhDC8Xqvl*9D}@{c!)ghO2s^<|EauC5zM@N
ztIj*E1hqR*3eG&(*)cqedfB@&=m$B^Zj!r(ULQL)be}w&Rd~FYQz$&a8L2y_eY(7l
zDMC9;byvJnWw*SK9;rO@)x*0B`{BHUFoHawTf94gY1}*6Tf;l=xW2nx$x*y_tf{=p
z;FUbt=^?vxj6ORORSY*UU<EvRS?oM~Y_GgiqP#oA%0N5N=bO7Se$>1p^!+=ERwX>R
zU$47Dv9dfA-;+EdMDRL&+337Xd6>K~2&=r_ZzsHS8e+VZF~z(Q2M0TM$)G$~#mu~q
zvYI?)>TA35mLR>FGa$Uj&Aq&5h@L!0mcu(4h-|qd2n0Q3zzIDIA!0oBcM&|s%(Fak
zBayq_<Knzr)z&<epZvS*Z~r{m(jYxYmL)sunnt|qOfx*NMAo|pV4*uHo$@<*`r*2V
zYA8IfA>TXHI9oh$%jG=ozco6q=)1d)>aaXT61h8xz^%LwDGEFT`f9xXK~g#6qkKFY
zMwz@4@!7krypcPSlG{91kR3cP^2j<4T@<-4h`YSZS(QAk+7vzMS|L0FCHTBGVzN4&
zMq0cg?QT55y6iiYwnROY+GabKpwc{F=Y%}ZFBLsy-Ltw+{q{RvUMsw?Z4NzU@q4_)
zI0U>`-;6xeKvO)@BaS(wgx)&>?8`hf2?0H`DkD5<^tU`m@gln|8YeuG6&$^)F&aFA
z3oJWt3)(t$26jA4pP{_PcX>QjrHDKn`7^y(6B0Z|HzB>os1H33D#pB0aGp6(4Ng1U
z_Ru_@tGGOnsLi~PBxbx53Vgikf5$vr+C02aHI_UcyP&*F()_!1F7mvw(xJP4?bE!<
z0;oIgI+46}I}klKOBcKQsUbTV3ZpzVyaPK&FsVC7*wH&f3+_9sYq-3fCq6t9ueH0$
z7pFWj=$JfN)bu+qZ{xf-P>Q)o10TIN?ZUhJY??egCCNOyUn)FAqBT6uil?{UNi#bT
z00000000000000000000000000000000000000000000000000000000000000000
z000000000000000000000000G00000000000DwOL0DwOL0DwOL0DwOL0DwOL0DwOL
z0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOb000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z000000000$0RR910001M@2orob&xxSP(r+vQ}#TfW3oI)R4+SZUf;ZZ`XW4R1K2!2
zN*6tbKv2BW*Zw;N^ZdLZy}Ud#*fYE@c)C1hpxwNDk*++mVSc;w&!9O1p~E~xB2~QL
zAC5c^@tr&iO6<Jy#gjZTiY&b#un;}hA_Y8|HVQj8b9XySrCL0F78N}{1G2k5s*b$*
ztzNw(u)I6!(&;)D^MJcfBcr=}ljJ<rv2nb+2-Un#k`p^y$ymEG)#1G5<Ul>d>m@yS
z1QfhZ(+@lccqY9sChWT-H5R<}Pj5VbTLQcrVf?y6WS6`Ys-C>W#W_7hz&bp+*VQ{$
zr5U`?%O1Rtd~CeyILbU+B7?lP?x?!T@d>+XmF2vNe51S`V6{BUMp?Yd!L7UMQyaXe
zUAR0oQgpma?N2+~RL#4BDiJ*R{7pUS-u66U>Y2RTdC$DmsVO*Ig#kU?S~t806>2;s
z%2B+z)vdfhlykY|6r(&?qv|^)BiOtfs46_(V4*t_-v7LKaU;DPU)8(<#~?i{;cz_p
z-K9KTwkEy9#|pj1DB-+Alp{RJ`C&Y#0x&wKmO{JO>Ak#6FAKXbbxge-LYKTrq{BSk
zciuaF<Jr8`YV5oWejGgH;JLh@x?VgIx8gh*s_?s+aa6lqZEd=az`Z-8V2QbDG6%g=
zdtSWPW3)SS%;vf<Es;Ew2x7c*9?3l3<r_RX$bGy!kmNi!p@zG=&Xhd(@gzJ_G(WhK
z+z&lPG@`syDf7G@05ZHQyX3s6aUngoTdO<G#h*FCDds!5^nW}Fe~COt>oYvdR|mcH
zMIF3g{1LpgvbsA9icmX&_w>BkLk_#01dKZ2-z~gC3QRoezLmTSwH`fc-I=^q%YwWK
zer!AMfs(w>MdZA9qLDlxhAKQeMU6b7aNs&{$%i~aJz=~8&_BFVzX83)v(G$zs3JXl
zGTb}jIrF*_usOW5bRWG3!d^RVe84-nrK`NV6g54h6XLv0nu9z~DBwF$YnnXChrv8b
zBcQvg+~~RxYL7fu-kH2>2lKn23@*J4Lv1{l&J#TZIuE>%)-*g&(k;9U<=8tE;ubtl
zeg(amudX`pKrJ~GaE`nI<}bW%P4PVFY7)Cym99Ldh6FuI$L%|ktj4>AHfuezv#LAL
z?$tX>;b=St*0a2xO&mPkQFy%2IFP&d7Xv+O0Rg-OUT3`Bd(ONnRpq)DN9?=%JC{5#
z`M*4j59qvURu(-{vW7gsvT3?G5ZSyNZ2mimfZ@DU4!OI~SfRXS9=kk~S-!g@h!woC
zHyF4|#?ibI3IaX&@#4J4xR*OPnsYpGNl3g7q@28hM2S2s@_szZTJt(rXoS2ThPb?j
zV6D1@{=z)aV2(Taf-=3?ayz_4@K-#OVvsykFhx8>eq+2Cy<I#u+pao`rT#opEE~M0
zFBLp>b2U7an{~RzBwaj{U>7`FIR-qByyv=UJBPTQpxiokstY_yE$Y04AD%o}4Jkal
z0W7@5e}g<Ezqq*^KmxrksNy?i>ASpBgBiR@J&QXlbb`DIOD4SmSMof4kAytzJLbF*
zxfncrn~pmmo)WxsGPOK%6H`2tfE&A%-vzy*EnvKbuQI)96k)vd$k04%eO@~gt@ONZ
zZo9gB=F7aWFK0XahTuF#jr}|6#5cYB7nMBTd<;9><fJ=$$+5gB5{5j!$tye*00000
z00000000000000000000000000000000000000000000000000000000000000000
z000000000000000000000000000000000000000$0RR910000zLJ2$m>94%K39CCB
z?$W#_d5AnqF`YY7E)hL^@ix2M&A_~!ol3iwiVHpUEXh0qu_nCO!n!;Tk|4eA%bYyD
zz4ki}^Pam?haSDgoxwW~O>4YYToXNwl*Bw%H!eJ<zEeCqa>~0nQKma?p|d+c!3jH0
z8PPnz&wf16s?a-%V7xr($^bkw9_qXk<Ep#`h~_+ev)wzS=4Cu4+$y|}UcEeNJi$As
zZzjEK8G}4?kp(*GG*i3O>jFJc<}*DxOX51bN(;Ql=P$h}RfIf-`x`t)#F)E*yUM)#
zO~1TGUtYX#Bgi}38@arlpJhBaUcNkGM7=xgj$FBEyth1GmkK>OcLP1y8j!n!OWixl
zQPn#PaCy4~#IC%quH-ydJ+wT2)QCH)8}~e<l{CEaMZ3GVTS>h6m=`@4IFh?X9y7bx
zo6ft*T`j!QoSHl#hRi%N&HcPiRf4?Y)9O5eiE2DaOOrg%^2@vZVhcS9DvP|gnWDUN
z^hP{yIDkCIgAY9>E0nq*&$K*x!ic-|#MQjpR@b}Vl^DH9Gz+~dlnT3Ys{lKx4xqcT
z65hO~)lIyD@1neBXOz5Bsg1h>85lh&E2BK@+2uRQcE-ACPlY_bgNi()E}1&DMYuWz
zcp$t)(@wimDg-<8{4~6(xsbXS7uh@yo!2`~rl36L_zk=l@`5}actJbY^=mxxJcK(s
zy(2u`81THf8cjUgT_8OHn>{>dG)=tbwePyg&%eA^$GSXq4*feg<!?M2=Ttl)B&@p%
zs;4`JvameQSFyaW(+It9V0t_Rx){6#h}FACY3aN^c_KU_7REcN=TN*sERVcxnYg=|
z9i}{<^lv=MnPEI8O_{vpv9vn_R`WdFTVTEN2|~TJYqPwImyEhgbEiA`=t8@|vQ)ep
zuY$ZpkqW$<oeDkck=eX0MXWrW8$dh;slGgtA%Z;c9F05|s1&{BP+vP|3`;!swR1Zb
zGn=~`r#d{g^btJ9@_M~y*E>8SOD4SQIb*ywtB$<SdsMoRBKtdHs>r-d^oP7~>rFjN
zoVU9E{sFv8A{{*~<TpCSAqBmcgX6rHAKE-+8euu{7y`RuRD-)ft7^PX4yQaI05!Z5
z`>#ASXaPK=oUJ@dLOZ;vlkL0W<~O`DB?-I5{{1_%h>pAmLB6}=t(-b5<p4eBjFP-V
z8SOjZgu^=En(@3RcK$moN{qXOeIGpdE4?}#8r?gFn?XDrbhEt93;nyZipshQ<f}X^
zYHvKb>E1jrhQzybheW%@L8&{*DtWwADtA2w_dPr|bT7O2t4KSWa%Md1zY07+N_)Kg
ze2}~*I`}%CIGwpmX<9r7ivPTIQN_G9pl3Xu{TjRzP0cyLOtie%s=~Z|rz<;RGm<<V
zqr|)_M)JD!f+4$BCn-Es?I%3Bg|EDx7<RpeuZ%om&AdB`^V~aX2WGopyaqc4(YCy`
z#e2K~1<pG<bC5hcU&uT!C3(EDmleE<R{Og&)RMf#K_|UbUje;xlRLbrUe~<$ZNxmS
zxv#uxWehvu#45c`B1XMAYXdz^-afm43zR%g{1d$9`H(vWQzW~WHVVDmzq`CYEdf0k
za5y|Zd#*e+y_`H9L=in3k3K!}(NVmRA}2h}7l}M!r*OQ1fit`&$CtWwHFi1V5d}Ow
z<5@g#Qz<=|*!esL-*vn^VX?aq0000000000000000000000000000000000000000
z0000000000000000000000000000000000000000000000000G00000000000DwOL
z0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL
z0DwOL0DwOb0000000000000000000000000000000000000000000000000000000
z000000000000000000000000000000000000RR910001Q+=IMz`Eon>_)R)Hi$y)P
zn;twn`s6%=avVGjp@usakd8aW{Lnll3fa8g{2IDI#;80qbJ#n+oWneC@A<slmM%Ov
zySzFeF!a1stUfy(w`V-7Fyp(}<@q}5WkEc~rnkF5!2mm3U$H#fAVNFuR<69QIZ3<z
zRiHc%_9i`#h=@EG=m|U>WtF_Nc-^{WZX`Ule-6D>Ve32hW%|1nPu0Azj?O!RB$d1X
zKHj|U;n}>iC(=A4Gn>2|K~KD|PUXA;Nku!soL;*mHNLyBBi_2yM;^UhP@%h$<kh@a
z7^^&8p9Q_2js?1Tu$eo{hkQI_21h&+`YXNW4kkR+<_bGFZXCQO_9(oVHugHs`mj97
zZ@jz1SMj`L$WFYalAyf(oYgvX%M85_e84)dY2-Tq{9HSZ>&-j+0pvWu8F{>`ofSI*
ztC2f$|G+$9dO*8A16Vv{63jflT{=6Gr0+Y><k`GpDQCO==})`j0L(mg!i+qo!G*k*
zRHnSbZ`-^F2|hfVqma9cGm1Mlnu)s;_g}nTB?P=G`=`6~k|Dk8KM6dn{(ijPO&2=C
zPANU3OaD76I}$vBR;jx(T%$YB@$@{m%#OS<FAluj9I`y%>xnzPH1@oe(M~+7v2nXR
zBfYz4C+s`R0mQt_=#RV^8XG%?V2->UORBrZ8TGuN9+SNAK;FE86vVt)O&2^{B85Ea
zNUu7w{<6CxVUj#Ef_1zCFrmErC(OGkexW?Qq~JT;rl-8PxsE($w-~%m1{yr=GO|2?
zQ0}{z*_u238W=ioA=5kg-#Wb2duzP<8iKr3XnwrGnZ-Nxg~Gh!i;_HS2=KgJ{9`-s
z#t1weHvPP>Y-BtXc9Azzoh-bVM~ge_%*i{BMXfwS6tcVqz4N?B7@WMs5mq|~VH7+@
zm&-f>Jqf%x=t8~2mTbI4`i8qNaW}h}_NO}gYmYoV*CV_X3d}nxf8;#=;*Y%N3QN5A
z^Ljj$0tLMrn1DPLx}&@xd8)kltiC*^eapQ3(yct*kTty$dIUVeP!2uQR);*NyK}lO
zZiqZHcs0CYAw)fFJmEaHstr0bKPx<5<N3Sv^+CM22U$D!*Eu_(?~c3$$Y?xt<QF}~
zFr>T~MPodnQXjpWtTnn8pC3K9Wp=!mag#f0-6gwYvyr<LiI_YpV*k9^#EU%BZW}yl
z#)mqm^07RYk_^2Fs|h=5l7YH%IO4la$%(!AC$762R1v&mPI<gLk0d(~8c;m2>A1X0
zod-N8dUm{Af9O2*p6k3~k7>N?P-;8Z8ic!w@u)k8Iv2f(FfF}rCc(U7f)c!rw!u6r
zDBHW4@hCfzy~4Xh1DU+^QVBc|0000000000000000000000000000000000000000
z000000000000000000000000000000000000000000000000000RR9100020XQny3
zjdi?~5!yU?@Q=K7VPZRi{B65r?u<MbKd(H_G}b#5D)qc~EJ{4i)yccKk)%8Z^?W(_
z8jw0o(4xCUm8v^(nJ&DGrAoU2smwacaLYR>s~bH9>gl_L92PyNU{pEbmk_&Xgcm(W
zI90n(Ltnfz`oO#}R-e1odCocv+333i6a&1@SbaRzh+#Zw?+d-?5CgngV4OQvtEIff
zir~ECNbWjLz)C%=JeRv`Z-TolQ0YAB#T-1{(W1KqgJry!*C0JBMCd%KTe-ad_XoX@
zc91+y9~-@rcAUE6{$e^wt>ipTT+=*)U%NX+dvLsymD#(CqJ2D2;5j@j2+ccX5%9cz
zO0GP7v`D-s0ew5vxUW0#>b|)(o!UGpu(-VNd1pKiKf*jOmZZC(NjW{w?6A8%B9^>C
zMUy)`(Ud&L&~&_c*#$kTu|YdrozXkQq=LFhsd77y^a#Bk#6&!kpe()Vi~PLMxD&i6
za|yi?h1R^*D<eERI>bEJj;OnL6^gw49I3k%Qjok`rj9%v71O+T@dv!+``JAAkxD!{
z0D`-h2Ln7n8s)kIelEPccE!1wT_ieSv)enx@_9VIhz+`=kjuPH@+dr#*nvE<`~<z8
zn?AhGD-XN`k%K%AD<rv6s~o&0gyX!;0uQ{{$h$m#Zm>KFGw(b;Di6C4=u|uxc8k2A
zw==t^d>K60|ChYCzDheiI?XzdP~<$|f<(IRqanQ*y>2|&d%V2wPdU9IZali*7m_?!
zbT>VF`v^NQ%&WV##7{itq=CGrVF*3{p$5Epl8QX%)uFrx*Pl8~6eqm4n=U-9c7VH2
zW(T~F{!_dK-uApazPr34ZFfCY2>QHKHl#cLV>~=6=b}0tg5SDypGCZ0(Y!lt6`;E!
zJBqx1Dv3EtsJ*+gu$8<t<`g|w5*|EfFts}}=)St;1H?P-*cLl0OIp0gMQ}U>qUSsf
z=fS)Z3iZ4fWURao3!%JMaYH@crU^afWCJ}z)Resb4?;ZsF*3b@Wb?ZM&G@^MZ=5`2
zkgYttaD%)mgGxMkri8ozlm<PnP`$h`_q#gpX4X9Z!TvlugSR=PM?O4cg$_MNdOEo`
zT}C`Urv5u(nDIP`{gOO;?jAjuQ*t~|jB7hDGE_Ye6q7qkrA<6w0HnOvJ2X8?wFW$X
zYjZqy?<%`)ScNwb%*DF|=Bm3((YrhAu&=t&Yl6E)7=1hmx01ZrH=;YLY8E_z2(7vc
zuz0(1rVzWEdW$<+L5n)TgDbjQP98fi!KJ()>~lTs;wC&mg1x*XU5C5rvF*Isdh0wg
z@0mQ~N$$8)E1$fQ)Zn~oPz5}!`FA`*e_1-oII}wt0000000000000000000000000
z00000000000000000000000000000000000000000000000000000000000000000
z0RR910002ZO`tk>IApwabD_LO&>y`iMb$j3Z{fT;shqrmt<^j=laf5EftS3OFT6YM
zs0cls4+A{FiDJB0?PR+tm<K)8q4~N=Bs)BpL8Q8dGHE=o5o|m&X%oFvffl?DOCG(z
zHikUs!Z^JK+RnTVDFnSabE3SBSxmd2TA4hPO~pGske0g*t~WgM!7sd7=LI}K6Vtpo
z?&dtl*KNELNTfV)TuQsi+jcy|8*x0Y&lo#!jLJI{;*>n4Dqp-2QKLJ>UidqQv&g$-
z{Lj2{j_^C}%@RF%a$Pw2S022)J&8OW>%F|x?Fzkon0`D3^c}nI)hj(~(uBO%WqG`d
zHg`LzwZ6Q0#j(0B>+C#X;%zz*wP-vW#lXBvH#Iz@(c!xo6w5rdPR+Y`RSrFb28lZ)
z2qL`jE;zgdIS0C#0xvvj<Y&Ba-(bAKb@4prO5?nNhbp|88Q?r(P#-->L?pbEEdV?{
zRQ)*0%l$jE`pG*`U(vjZmz%qQ)Au_{Ok6y4alO2m`H;L}0L!~n%DX!AqS8F1fkwLb
z%aT0D$7(!Rt027az7f1e6>`1!o8~(YSarQ?d?q}cYrj0OzUaIQqpCb*AO$@>y3{*I
z6Fog^G7&r8=V3f&Ag{Yekn=foI1N2L*NHphX6-r&cQU+&(5^dAmXN$EBjCCee1JTP
z>%%-(-08fN6xO^?z6reVI?X)Fv0OZ0dlfqgj&3}88dAI1&tN=Lfl|A@#RR>5DG<Hv
z@l!k?j*&bSf6=__1m8NYGX%YN11P*}WX(KG%3Qq0dl)>5WX8PkyllOJYDc^?#8$hK
zb`(7#<7~R24*ok!{wzBMQaC!&NIX0_6x%#JHOM<a*cQAwjUc^UbHY4oJOw((*2g@*
z5eL2RC%ij+%-cL%WsbbIq6<BK1V+54YW=+7B)vN)i<vtlazQ=9v8KBMzq~woj+eaU
zpI5v{&w@HB70f%-8NIxXF_k-a{|vqAWpcZimPfkc8HzhImc_icC`&!pX|6m1p>jM3
zDT}<jxg<PShqF8=goHeb|CBqY-`c!Rq!K#mx<Wm#mias>e$2TbVgS65G)%oMt-ic2
z*8RM1B#66wWyw1PueUt5M5Vm{O&Yy~<H|gkwOzfW#@oAvqLe#}0UEvCpPD<T94ox}
zxf{HD(JZ=~1<yPAM!~$0W(Pa`7xFqU9OFESl~O$OFr_@nYNWj8=CV8@tYJI=yWP5U
zhqF8laH_hL*iXIm@1HyLtWdf_%gDUxIiNftvg^FCo#eY3)D%2v_QJf>;v~ET`VG95
z%w9V_9pF3ph44Hj2g^K^GK@SM;BvbsF*m%cR;IkPs3E*{&%-$}kP^BO0000000000
z00000000000000000000000000000000000000000000000000000000000000000
z0000000000000000RR910002KjS;-q))YHls8PHJ%<{XmR`$D!n<2gZxo*5p?(n=1
zWs1B8Inz8#U8X!Q6p_5`fVRB%6pg&}*9N`812j9*xT!ql^w&Ht$fY~h?vT8cQp7y{
z0u;KRNs&Ab_{2J=>cl#RcA+}SeV#n^5|%q#Ursu&-Xp#FSg<?LS8KZa<itEVpqM;a
zU@bkgjHbK;WsE$Sp~<}V_5eIm{b4)Z)gnC+Rs}pc+Csduq&B@KK>$3WtKmAPT1z=D
zT~fTttC2bz`ZhdMSlPNDw}ZSe#)v$IkEc8x_ntiG37kBiHjg~vCUU#x3Ol^1nsvNL
zIgdP<%w4=e*s8nG1;IOFhe$jy{Rg^65EZ?T;^aFQ%LctS<fuHM36i_8dh9y5m&`m`
z=axLOxYWGp&=$N?nQ*&8p5nZH`+d6?WpO*rGR3_6?Ap9JRAW58-*`OURM))p%s0G}
z4;DSk=c~MM$%?$sjP|>SAx=DjG7!5@mef1R!+X1>Q17{^p~^c|YqvWvu-81>EC9PS
z)q=e0G!(s|BmF!fLF~Jy?^HYp<cmBs0KK|1ogKZdZZ*5y7hSv1V!^sWrx3hH#%sI=
zn^`<PXZgBxP^P?B75zN2?9RL345~cEs8Tww*FHNPcM!dEk6FD<#)7<K*af`sgfcu$
z=?1;0O})CM7eGAMAEmq_f*HN1x<<IPE}Xn@zco9NJ^;KN<DooPBk8<1qLjO2dAYn)
ztD?M!sR}$WkbArtLutDvitoD++bq33xRJaR&~&_oxGg+=!Kb{P_wu;AKU+O=0>r$^
z=1RPpw}iX)R02I!L*6?TljuAG2!TAIi`l!B!J|7_EUY}?;jz1}V6r<41_V155FWiD
zpG&-Nih(>w1Rgur=07~oy$?Oj5DB~G)91WXj&HoTILo?lY8^b-zA8LvzCgOia`n7q
zYCXKhWe>ckF2Fo_HxfO$lij@QEWkV|No%_qrV_pHk$gQ~9JxF6T4y{;?f|{UCCED&
zC$PJ53ZJ~2$&9=p^KCl^+mk#T{~bG?Jrlefx2-%oB@H|zh$_7X0@%AdR=~S~2I9Q#
z>94!SP=&k&-{w1_DMh=@d>*`R8wER?d<eaq=~lcQq1HUlYn;4z#NoR@uO~gy&4;{X
zCjq=YP6NHqui`u%i)6fSLHxV$mIXWNnh-oSg6uq93FJFZ-c!8t)H6JF&q2J;-*`OS
z06024Xn#CZc)dDP@%lP1{jWU3Op!cS#caHe`J1~gV8A>f<7vF?5DdNk41+v3O-Q`-
zhta$`<R3atP!Bx5hf+L`UfVqRB$hl-1hPEE3YxrzZ*Mz0f%iOk?u9(?k>a{E0d%}1
z3cx%N000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000G00000000000DwOL0DwOL0DwOL0DwOL0DwOL
z0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOb0000000000
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000$0RR910000QgrK|$IWxSZ3y-`rNH#p501G_H$>6&N7V^5y+{(Ne
z!+|^<o#{M8#-=-94%IrwTg|&AMku=r&V4)?>eoB`i7dS^?2WtI=N`Pj_6WOcj9EJZ
z5SqLxP@+3mfgQYZ`-VK6L@K?>mF_$j=Sn-hE$Tb%1*JUblAyZGh}Aq(R60AQ85g`O
z(aXD82uZy1kTN{j?$W$w#<M#$>y$Z_fT27IB7?jB8dW<aAtXGmoTod@^{>1VyrR6I
zDj7WVJW{)1n?Sry0*X9SgyK6_&EPrM9FM$RZ8tpE%-g&q(I-89B=9^?@7lbph~&HW
zd4W8=GlsnE9M8O9Ub8$W+SWU>VSBudA27UfGa$Ua78yOD#_qg_7{t4rP_MiaqRKp-
ziod+Tx<EULw1GUEKCQbKeiS|FdDc8vV#&Ol$I-hIZF;*UD<r%7pbfn#@lm{jASOI;
zsggW7$(%gkZlOE^59vF7k^;NRV{bdSQ{lUA*z!E!B7HhZ9JV}S`jI?<U97wNQpUVT
z5A8cn`aC-mhC01VnN>W_VZ^-4h!8wg(v>{@cJ8`UycN7SBd@$mYf3xY7L&ZhhnqZ-
z%ho)gw`DxniUT{i0hT&&1uQ*qAJe=<oT<FKN%=cFwE{eM$ksdK%J{n<tAsqFiUd7=
zUAw!o_hGxUDQ&!9lU=*kk`+DVLY+M6y{9}-O5D7D;^4c!_5nP*>6W}tHZ?rww!A#m
zzso#!zmGcG81y?6RK>eTN3A>^R3|;jZb&>v5i-3O0c$)sb3DBEU5`BXNSeI2o_{<(
zN-(_Ml_$JJyA?fRe$G5tLv1_;=c>Fl3unAkW6``|R6M-b=Q2Fk3|=~>Q{22AWnsKL
z35YyGWDh&E9lSf)y$`+cID<Urd1*W9BqqI9b&ot5pW(Zh7Yn-^P-VPJT*y2<-#a{(
z&Z4|vHA%cY&}q9HHmExv2p>K2b`ZQz3GTc!<|jQAv7|gn9z;EE$;Z1KzNkFe0-L<v
zgLXTxT<AM98*{v+uJk(>OclKpH=MkoLAJaTf6lwu0K+=$aAQ0aQUJXMUWPnSfoi-4
z>O{Pk98|n?xlKHyl!m*fES$W;t|B}hPXW9pH@Ca6aq~QfjV(P##}T}X%FH`*8=E{Z
zbpSms_|`k2hUdFn3fnxS<VHLJY$Cmjy9vDBr%gO8Cmy_V_sTq2g%~~JtA)G__Z__5
zD6Tuu6s5dRb?`eNCl<YaMUlL3v?o0BjL$p<J|VpMas0dOjF3EB>CL+?cP~72p(#Ab
zwT3(ilCZpMpO`#ME}A@h(wDsH-EKUzUB0{NcCNeuY&*S*UWB~#f66@cyGK2jeh563
zhL$`quB<z5<-xoa%W*rOev-TlrB6J5Pl&vg`4v4$$wa)2m+!lxv822qOuD?>QRX~d
zIy1aoRD8T*e80N{U}C#fzYDq<RbV`9PEEX$0e3tjeEYlrD(pNfiiW(7L$SM$B@et#
zYKc7J$J0Am;5t27*O)nF$tpZ*`=dMl5f?oWmsmSp^pLza*&4j%-QzsiyS+OMOkumJ
z$``#=*t)y~#nU|P`K`P#`l-Cfn+82_$uB&3%s#p*^$ESZzEQk^^;0}dMf*ER$nLz*
z83VmS$0a?hcbB}-tp_{V>e{>Ks5iTA+B3YT1j9RT8qT~E-_JXRCgQptSnazxn^rs&
z00000000000000000000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000000$0RR910002HT){aLrK3B2
z<zPJ?47t21@<zOz$NIc`_{_X=;~G3p5*xg(716xHG#k8?eL=kIpRGLDNeR6vwZc5A
zs5rdy%UL|3&pf<-4O%?rVe2?ws|!3B&%3)0-XuHg5x6^!>+!r&-b=iz6L~x}y}-M9
z!a6(&BVN6*t+cvTsgJxPFn~L9y|FwAx%oT>4j;Qor-;0CQE9xCr3StI@b^2AR~<b8
z85}+RQ~A6vfU-OsS;ai9tt`BfeqTF;Vh%e#lBBtQ5^K6f_kz5c{O!CQ#cn*nE;hRc
z#Ou36)`2{x4FEc)%NaZe$p1U*&MrM`Wr95La{{}TM~ggL6MZ`Z$5FkL*)lwqJ&HV)
zQQW&JRb0G@2G=~tG$*<h_8`3V627|ckuSVLw5Yt#OsYGWp_V+?j}1Ld)eSo?cb+^?
zk9WJ!X7{|ReS*Asgn~QWR0cd<v|+sZD}y{I0n0qjkS0AMf&jeMF_t@v452zQ47NK)
zhJ?G<SNJ?!iA1|#$a_30hafyK0+c!=2GG39MEN|;2cbI)Hd4Ht=i<D8_tCqCG-SLr
zv_m{VA>2F0cA`7COp83F3RS#!!jn9plY6@tclx`A(g{58d0)Gh&zL+P3l+SK(9*oF
zG^0F4y+}Jy&8ECI{wTe#k(E28$%Z`HgAY9qxePlK=jXhc1m(K~0wX*Q{7bxhForoh
zpAo#ShYh+E5@oz+YGA!Vvb#HgLx{VRn$o<-E95)Dt~R}%Sg5>?-21%kC^<Y1o^!mn
z=?^_~i}<_>KxRC)l@>iSA}Bn{=M_D-6=FR6JnuX$asj*!u`s(E(9=95cp$txbI?0M
zzs9^KeZ#uJGA%pF@*}%%Up+jzoP@orM7O-cT?IV+OlrIh@+v*@21LBRVoJQOLFYV5
z*MYp^I`%s%meRa$xu!f}Cn~(fVtYMF0n)s8=JC3XDHl8)w1qo&ZO%L);59rHJD|E<
zyVJX+cO^YJVU@h99(6i#P@ud#h3h=`b}_s;ea*Z!&5S(KNCLeP{}DYrVtPBAR?R#V
z$Jsm3iI}>f%g(%v9E3cFi-<f4Jf}PL#<4rIG!r}y@!L7r0arYtqZ7Nq{O3B4@xHuX
zRKz@5Ss%RAwIRDO7B4$+t|z?L4H>;jev~|?#xK43`cXTRim|*ivDm!u!h<|Fj+VTq
znTWg$ahp7u@=`pQ@Y}l+EXcfNq8z-%4>r4@bm+WoSB|=roUXf7*|xhCD@{DYfwVi=
zO>MlyO(?u)gg88yD#1Lr4==sHgO0qaOsl(8ZzR0N3+g=bi<P@UeN(&qIQ~55UVS_Q
zzzV&*R4Kdf#TC4G`d~ZoJ-It#zll41@)f*$^Vz$E8`nJDRg1ib$)Y?Y;Hx`H1%^8{
zw=}%v2TZ)KH)lMd0t~yGY9c%=&LX{pRMt8VwCg)e!qdE}yQn-pDIh)YGs!#t@}jya
z<Apr(g)cnk1Y^7=nNd5Y2S~f=F(<uT5GOpeQ6#%L!xOv{s{uVF-s(J8>~cCp7w0^$
z8PmKHZd<(P$uqjs)uFqNG@3k6ranBIM#8%%s0_W=ud=!$_XWChA4NRE2^YQfN%Xst
zAws-9L2|q*h>N_U{bRf-l9@X&GZ;J$jh;MbeO^1c6hk~of*8C-cPYF)_NzS07?Qg^
z5t%!L#-uzxIG#K+e|)@?Kq0+K-J`n@00000000000000000000000000000000000
z000000000000000000000000000000000000000000000000000000G0000000000
z0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL
z0DwOL0DwOL0DwOb00000000000000000000000000000000000000000000000000
z00000000000000000000000000000000000000000RR910002jqLRBkYg{~Mnclme
z8m~O&SdYAnRA9W}?=?N8HO{<X4I4bW1+lxnriMB>kpsN6+NL}y_6R%VXA-*sb}zh-
z43xVoZKpfDE>}D}S|mLCR;0Z1mx#L}vvs&dl4ZM7{PsM38Zf--GqOCpp>w<^!GSzI
zzz{q($Qr%F38OpJc(=T~2pYMSO4vLEX(l|UwTirOZ!^5S-_1M{fuXxxS_r%garC=_
z+pN5VSWmo|dgVL8+V8w2EJi(!X%swa%1S(GdWgI`o&>y)+ebY8hD1EBgE~FL0wugs
zt^>Su{;Rxz{J}gu6pp+PIZM1;s@yuAJczs;UroG0<^H^liK#q}n~OY7r-nR$mg~D7
zDbc*bc%nR;u+2H9>D)S|EZw_ZfGs??#o|0ZR6;y+Q(HV)wE?}f_6NM3`vSU347@xo
zIt)D+aJ{^X<;c6{iR!%m2%S6w1_Hh62VJ}p=d?IrOMJY5!!SLCyPmwPTY|jGFX+0R
zXH-1!!}z;~aK}27%w9XcDiu9ox&l0g$1gqg^HIDC3*|fZ+J`(u-2%G~fTTR&c`du2
z_YFKI%BwpfO<FwA$#XlGcMCoDIWRo`(GR@hWdS@^a<M#8M8LckmBqYXbfUVW5%oLO
zbMid1u5P`7$Fn@w-dsFZmZUs$mO#B`aRfX&3)j0;8a+JFP_sNin(jP4kK8&`UJkv|
z<bgb{g{?dg?t#2llZU+NXz@Cj7r?s?MzcGT{r$Srs7^a-(S1C)3h6r?*C{<j9~?Yh
zHqbnBR#v-UFQPnetN=V@kOMshqx8IU&da)L9(g>WHm|&x8ri%sMWwvwlo7pRNoG7!
z8%#W~(&4<21%tZxSvx%BX<j@g%eTB~6K}jI101};7uLL0q?kP1g`2z};D|iiG0QxZ
zSv9)zT`W0MUHm-TAul{3npwQ}7;?NkS75x-L6bY~MG3sA+v~hm8w9*Ts;9h|fp$Ij
zp$I#Z;e5Lx62(0A3B^3QFh0EeF8w>NU7I`3i3>c^2~WJtu$R1(qd2>MNfy1JyRAIT
z8sxmVRTMp&uthrup$5Iz7DPS5kSe`!^PW6;J6gQ(rCvMDG@ZM8*Bd<jMY23g5S2T)
z<)OQ2gwDG`>4>~bv~N5+FWbDDo(MhU(xE%*RmZ!Xd~v*cqq#c|FI>DUIg&hmSM0of
z2UWYKJ+VAY911-Bi-tT6Gypv>ttGv(8}U42>J&VNEV?_F0eHQM6&t<$uyZ=OrFuO3
z=rX%=-dj6qMI*g{(F;5uqXs<>=-)eQ{D-{MGZ8&f?jk*G-1)mp=8QY!wiP{JIFmei
z$pk$v`>8ycW;#6?`U5;xSs6UOn4~-q00000000000000000000000000000000000
z000000000000000000000000000000000000000000000000000000G0000000000
z00000000000000000000000000000000000000000000000000000000000000000
z0000000000000000RR910000h{8T(=Y8^cE!=JoVJ48G$&dWSor|r5)c7wba2Ee-u
zZ1+3ArIWg>^isW>T#>t(I=(zh8PB|q_1im7+C#h3U)nliIgPw2g<riN1Qk5ft0%m&
zPBgtf15&)@*QvY<yY)MVKmEKM&cZy1S^zzUnPEKCLWsN>&l5b>5*a(wr5?PkVwO8~
zHWoZGK^MF-HM6|zw|+dz|L{AvnT$JJR_#0h4g@?)qa!_O#=bkpRj0Z+@rt~0I)pu1
zq=dYb37)**DVIFz0CYTOq{KXCS(Q9O6?weW6tlbq#I`#*wH`Z!wFf<mQ3}170Jl6x
zQkOhxwRF54q5nLdmw-HLGCVsKrI5Ue)2F<*qHMZYf4)2m1022V2mCxFSX8{2L?pUu
zfkrzp8{WJIb~8LoYTLUnYE?WlEmb_A75ux(6}7xwf2q7dbSgaV48}YHx4t^ED@eTs
znr%CLg~~i`QiePw`(QlInovD^EET*#h|#-(cjr8bGwVB$6IeQuJBhoE8-l!IGGIJV
z*h9Pwk#D>*)z-T>RVzE{ZOuIQ#=^WtXAiwu)yuqVzSg?_#u_}K?7lmUkM+4ly8*n<
z0hGJy|B1X2goZqr!n`~LQyo0;i5WfW_D{Rz0slM;;L^N5&5%4-C15<T#<{u#w2i##
ze$+eq#g4oIPMo~QWGp?xu1h^ZRRK97nz%b*vhY0fJO8}Cu_L@?QWLud+eN!omU_H7
zai=`0a67!svZlMcHy=FFfr&g=bAr4F_z}CsqGmcPCmcPr7j3-XYaTtlCl|bp=*2t%
z*6cj8<2yVwQh~dqs8GCHA__bJW&XPaEYrI=7X!SHCP%!B5==ZMyAeFT1C2Z6wn{vr
zH0(StOG`W$9>_dK3X;58NjN=8KGQpIz!SY5B)q%XgDbqpD8{^<%eFiQ#h1EwFs?l7
zH9<UYP>ehsRxv!dM~*utI+VOoT93RUDownH>kGUlttve=gK50MA&k7@?iW1(OE<gJ
zHJH3HV;j3aGK{=pfKxm-=`p->iaopru!KDGvUa>B@cTPv+VeZh;|)CTw|l%+GXuPj
zhRi&welWXYf6BWPOr$%Gt=+tsw7NUrub({UdsVzyi>SP?KTW)qhNHX`(pNi42(CM6
zt_Z#NOanc*A;i21RMxuZO98!qw~4$*y(B!wQI$M$Pwl)Nn=ZT;>XAI#<{Lbm5_&vK
zMhZN*Z?!x7wdXt%u#i0G?f|?6hfF*Vv7o#Bi&ebxH<r8tb+Np!Ifyy{C4@YCTb{g)
zKAbx(5e2<oE`7F3a{xX5`%yg@&j-BU+=4uU_cFZX3V^za2ctYDac4X$d_p}AowmH#
z72!Jo0RR910001D;Nd%BH)*?bEpI#mkb*o0BYeDU_1iqn*1Np0HZncad(}E8z@xlo
zf4jWPhB7=pbiusD_1nCll|wu{jGa8*JPN#9JOR9dGXcDdZ2G%++*ds89DKa*mwG#r
z#=<-)KaxANY^u5e1miqL1);ntatyua5aGP0HS9bc_5-~XOU61hKifNYpq{*i0z|y0
zEg8H7m8-fHOp3d=#2vicHherJ;1E6YqrW?sRWUt;e-ylc4E4Lb4L7`@-0D1-XL~&U
z(0jWArM^6YjU>FmRrEYWu^&9#s?I#FU=h7X0hBzk-h#X`DZ#v$6Fa;)6YRW_%-*{U
z9O=A-ha^0n8a=#4V@W)nzL>o41qZ#`G%vgYtT{WO8&EvoT@}4kqwKruHIBRlixRyY
z7fC&@*+e|kQ=~lWk|4ZCXrVdwfGfOqSeQF4eXBg=vgSNB1O>f(7pXkz-6OnEUx7Tt
zI=s9y^Etc1j5Iu5MzuVK%#^%*48uB5Pq#cCJaWA5$Ui*b=%2jG`ba#1O7*+oEs{Lm
zq|iL6v0b}Nd{sPxo7y|T%bGl;(%U?+c^^Hr03*GULS{UhuUEX+Y?nL+eQ-SQI7vK{
zVs$)UDTlnSbT+)b0+Bov<Tg8lZTGtwJcqpDVR}4YHjBG{{IfYehT^+T`K-7+{)W7`
zUpPEP5#l>#?Mpl=wMV@3dRV>fD8Rg94;?(A3&cD%Wxl&tQPsTiMvA<qks7>*x{*8v
z1j{?B6Vg0@wT8Qo2OYhw-pIU1#W=hM9_l;f7Y{ugAjCWyQQf@8!UMfWZcIEi{i3|C
zfdM^UFc!RKLJGPfDdfCK%bdJj=i$8Dc`rPKtggHl?dZJJ<fpuK{^>k0lRUhp;|#qI
ztdP8!)Qh~*yZk&GaWA~RrNF#I)(gGwMIbz;-AlYM>p;8%`+q!SbuGM<^^ZGB<uSbF
z{%E?L_Aorc7$Ch9)~h@Bn7_R0{7|_tHg7xyz^=O(K|?&My=}aXrkA`G_aVKM`=+}t
zYIZzB4r#rD1<kl}5dFK~beg-BiqbrkNFKUrGK4(cEYUk+>bJZeAjLePmBc%51pqvO
zV5~es`pdkrl9aqt-Rrwkc1*lrh#0(gIB2`hU>7}5;T63No2)#<TU<N2<`=y*00q6X
z72G_2IRQLtY^}UYdda+EYQH?`udX|^&J;XLxir1fUqrmP|LMCc#DF{<9~nGGZH>Io
zGflmU-JZGvtqnYF=o-ACidH)Vg#tb6p0vC#R8c*-nF77MJ%+qm`3^lLOe{MhUemmj
z$FI0*o<qI6PBJ}4dx1R0CzU*YK()L<13SDd<Iy}1E|xr(gHgMiAu_xVmL|Or00000
z00000000000000000000000000000000000000000000000000000000000000000
z0000000000000000000$1ONa40001x<N~~z_OLun*ATqaSz<i**^WFF0|Gq~#&JBW
ztDZcksARoN8eKe&1~$C=QDVE{xbwU2pH)2J+{8P|6Dzx!ie0>0s1dv(6~#MVwkSMc
zY!|&Sv*x?~7dyN=YyvzJ-0QkVmO4Gh)-t@52${QS_24@lKB+vW*eAW<Am=<*+9|!}
z@z1+iG%LLqD{(xloyj{V3LU(Z`no*V#2P%@VNW`K(U?4Ql`%YFT|T|1`^GynzX833
z5{^71=>)pjD$TqhoXa~#!j3$2zX3gh_Dwuw>(M-rRDU~^svJGw;qtr*h{C*#M|-@=
zDA>Ci&>_5)?aaKc_aZzAWQIH}J|jHf*nhk!ZeTpNqn*5BPmVk?8VkAxYCpUW8j?Ju
zg$TS8u9Un$ViCL!juX5>GoCzc!4|#z7(G00`*ypOxr@6}5ZSxktEjvdT@$_cudlrG
z8;-kDm`uFYGeA3c9*w+rqDZ{c(q_AWqffh6ce=Y!EhfE~ZMr;<m$<x(8FW12KQ=qA
zOR&3@gLyj829&&~8WB9kF4a7VkwQC0q!qo-49`4nnlHV3#@0L>9l<+4%o4ixo*lev
zl773eQ3gDq0%5!Jt~|U4otQjSc^o{e;kLW$^VhsyBFVhG>7Ti{OR+pe(gnT75(B+n
zkmtMDIg32j#+tjY0#Q92&!xN5u{u48PE|eM8J)aro(nvnqPDziW-hyl#tS`VjugBC
ztBAU8MghEQ`=Go8)jzyfg;P9B9hkf@_<lQQ;gP&s|5v)f0FgW%AHceG3)DN{71X@y
z0_;2YYH+*{89KbQQn9?CeX2Z4r8qotu*f=>L%6*5K!Lk8Ie)xP5>!00&tg1quYfr{
z9k@I_=z%<!^ewzA-xIvEMXEcj<?uV#)<Qic45vE^JUP6kO8h&mk-oe=K3qI7T7^8B
z(FHu5!5+PO_!T{Z*|j_`(BnK)DcZajxso^!8@{~QJhwY&m4G_|mQuVt_uRUvZZ5sq
zWRg709fQ2^d5gUCHak7y3og9CnH)V2Z<0K|9m%{PI@i1;%ceWXMVq{<Mh(2`lC8X}
z>9o76I-R_w&A`0L49vTsoPfObvpu?Uq^G+SDqp+_<c_?r6$3rv7WTYyj;}l_;M=<i
z;x#)<N3}d>T%kM=P^vtj$(KA;99+CO+EKjZ*h9LXpA)>qb)mZvql>%_AjUh3?qWRW
z<jgxAVqrXegQPo|0-L)gnj<{~9-lmH4(~Y!r8_)hbLl)#?-e{@0v|muD!sdk+1tDi
z$}PNDdA2-FJ_bA;m6SZ}Z*;s+&wackj=VhY8SOktfJ8g8J^wtV1kO8UI!?TjWdS=f
zLn1wd&4#?!jfp&B<3T+C&y+kq0<F6ZFbKVr8#2AEj=MZ_;IKR_g4H~m-~+uoEhfB6
zgu*<cfGxbTP6a$%Ybd?Rela~xhq$~OV)8p@kUYFZbDX<EkxM+s=P5nbQ=U6Q5)eCC
zNFBV@v{}3=IdZ&tljS+3lR~`U8sEG-vS++=;|aaEs-iq{UjRMMlKHwRM5?@)<f1!%
zdT~6(U@bjTb{D-8L<qb>w!*slm!Lcy(C$1hJc7J*Cz`xh7!SLlo-DisM@78kon|_5
zhzq?uBIP?&2C%#m>~_3Y2AI0`;*>e0$%?yzw<Ww<xD>onc;Y-6;3zx=dnLU^5Gp*R
zQVzWOyobDUYdE`Ig}A$J-N3tm>g~M7SVB85rrSHYqo_K!PXs+w?NvO_B~iO;pb0&T
zIn%tJYU(^cx*5FZ4;wm8t(3f#F}u83F?c)48W}xM0C_vy^CdmOf~Y%=9KSqQd!;;8
zWH`K{2rayHIwCv|)XTh<X#+hrkODlCCa62R-vzuI<Z`?{8%MnJ=Ow%NPZ7Lx9cQ~M
z(80S#T5>$W+pW7tUK6}sBq6-Og4nxvmz+CpNE^L8IGsF;74<t})FnGX9>lvx;-S2E
zJBquRj(@uwnb15_E)zV?tH-)tYsx#zjXyj~x~sgfZe+af^?f|$Wf465s+l~(N!+}0
zV3)iWB^Eu_-2go8=!Lxa%45BLiy*xXzDvCJjF`L@HXppsC~-YmItRSAxv;$IJ;1xR
zp$ff$v68%<*P%Qi4P3k)CqO*els-H)4M4mTZlFBEH?+K*=0iLrtx~%?iweC4jZ!->
zEEPO(;+i{GY!N+`vf4aCp3S=Hnv6WN0yaEYLj=1aYFE66(5k#!4I{joYmL0XSE{=9
z_NKf$Qx-hA=zqLLzpFeD%|1Ova|XSSotnDmRWLkuXem9tJH<S1qvSl&5f!`7l-W9h
z-mAP5pYXezfy6sSb_PA*wK%+OajLp-ma07bsY1N<+~Yg5w{SZ=Th=^p&xkz78L+xm
zfgZghS8F_Ob~ZdpzuLR%%u2l5{{_AFxQ05v!jHVpO1?ZZjfXtuDULjWW>GxW#;-fq
zvamdRAFn)3IFdZpSP;CYtB*Vs05ZK@TIjrk<B>a!VW>OfWgt7Y6`s00d~CdQjh{PD
zWXC*L9E7}~FUP#c?LRz0o)^4fx$?ZQ{YJd9!MMC(k=45#J~+D|tQ0)78S6Z_?)keT
z^M1UHhz-3`j_JEOGz+~+tDZb9@1nd?tr9%*zh*olaZ$Rkmy0~WoaQ=<`~y8uRKYyU
zLpHrD)v-Loh3q^)8>l?6XOBDyB7(WuZnZqn$sN19B@(^e?6^E5t|C39H1oQvWYxM4
zetNu;?900<sinHySl2ryd^WmRXuCYlcx^j|r$#)6gYmp$q657p^(s9lDR;ZOaCtlq
zQGh(oX_`DYBfq>IJ&-&poe{iUe=NKrf@M6g`l38Wxd=SM=&rkGF%3P5<sH3`EN?uN
z;tIW&_ba^V8R|T3-UB?%zNov2;uO3V-k-c?kPy5dVGlbz0{pwCoLM~e3+%k&Gju%g
zUv|9V&|AC@*KRyL{7yO#45YkPV4u8RMR2<W-`cxGx|F+wFx9(MW&%9Bs5m|OTDQEa
zU#2=WboaY+JkPrmbT+){lFGY$*QPv}9rwJdh_5^#oP|6aomjhVRHZx}&uF}!)~7s2
z;<>w-8o#^-sR%tm#^}3kG8nxq2jjb8k4L;jR1Uo=6T~|&#jd;e7|=WEaEiPGf_%Ik
zYLUDWr|7$vgTuTR2Ejbkof^79C!RdQJ)OHa;@Z670P#FFyPZ6^rs_Ny7u-BH05ClU
zP|Uj)_Z7V%kWM^@sTn)yM-RO+aS%K;(9XQYP%*p#_7}XKK^{3t{olMLKWaOcbd0<w
zgj+oRm>4~R0aCmFJ_)^3y0W?%f}y;|<_0_o{)xO>ph7%=)3H3HHxs><*A%?dTQ55-
zYu&m@{wTbc<mo$bX6QWkIjlU*oT|KPkvTl4j&!>mm<l{9l<U0t)i68+;R8I}!D>1^
z99KP!NB}$`nmfF6PQN-dfX+Jt2tmDG*Dk!hW`n%>^~}82CFMM!r2)NEuy(ox@`F34
z!8JLQca=H}+Go56Yj3=v$_u;<x@NpteM&o)FPc2KyV5+pnCLuk#sNI=^T<4tOI<vK
ziCa96D?~l)p>90499zAIR;WC(9qc?iK&!lR@Kd~bh^f5QgSxzuz?8f0Ba*zm1x-8#
zMmfFkYp^{0s)jswoy<FA@s+q~PawU3wE{iBLXf;J=dipS5O_NThBG@?(*?Y{peMT{
zbCf)dT^zmqZv{QO6bU>jZy&rjB#Sz)=M=m<ti?Q%R>!;WZ9%;AjjB9%Y}~wHN9($s
z$SS!KVUIk;U7)=8vlF{G6^Oh-UjaNU?e4r8FNC|<%7!~;x9vQ14oW=2R{K2pv;aJ+
z&96KS{gb=P@UOakW*EGO2a>$cyyHBHBr7{q6dS$Lm`%GmFA+U*Vtza!_su*rELl9@
zX9+!0<?6iWdW$@5dat=@{UN+Okea-Fg(N+FZO%NT(MUW^Ru{WISl>J%HV-`tWih=b
zf{MG1`>#Ai(Hp$AT|>Jvn>aiTjsUyoP{TUro2$IoQl7Y8LOwi;vaP!tEu1@){R}*^
z43E3T$R#|lAc{P$oFcs)p2@tysOr0&cQd=?iE6rOF)+O_DfhdnTsXYv&k?%-_Aa~^
zW~IE|5AD1DiCDabD|Ea$`F=bUtS38R9{{}tCc`{L8H_u|Ohvp42S7a~K`Oe&-R?V-
z&)K}i>Z3bzr4_yZ$OOIVJpsJ1<>I?$TOvLCot?aE)U~_mnsPiQmmj=oV}rau>xDd=
zfegGNOFg<t8)mz%o3cC2r$IZ3yQ;ivN))`rl}Ef_p;^5&Od`BrBMCj9b`L#3DLuUB
z(?+|ZB;h+w=U6=OtHr!Du|B+ujcPp1yW+WT|H3=?=6SqkHM6@#9P_(tIuSkduopZ#
zKsGwuHkZ8TyvaL2@E1KC-?2P`HnBX7WHmh^`-42{){4B#@2k2hblW@jRTDca@|ip-
zet5g4lXE*=oTt0%1>(D89*R6B98A17k=MKn&JDb^Lu|a4{}(#|umQcWfvr6F@D;pD
zyBj=rfhxPEUw^#*)bhF;a%Q{$&*wasU=lmi-2y#NT~|C?tQ$RTO*K5Z;~P0wZihT&
zJ|MjUUj;qz(Y8F4y9m61@EE)aS#7(5RY<)B&xkw+@Myhdifg=4M-#ls2LL@_f(1RG
zBLO_tnTx!gTGBiq^TIo2{7gHbS}(m87mU2*4mQ2hwyrz_eRsV2qX#`NCF8r5nQJ`Z
zsJOgdhyuO16q3AYqAb12I2Af?X{kGcKyW<jktsaDQmDH!bp*RcaVflRr3XA|cZWQr
z8WKH@>QB6Wuh%>NlL9=d7w)_UiAOxgvIaa%5*fWU!qz;VaUngH)`z@RcQ?EqFp<2#
zBRjnTy&S!K*(f|YMRL3binl!cE~vX;L$16q(26{`2g^Kn1PDFlBVfGWKo~vU)B!v=
zPBgsplpZ{IRtUXwNf15I)4#mQQG2@NJ`}ubZZf+&H7C5#7v4Oit=zg0*!4V_3&cCX
zqprJ66feBN=3cx=wi>*PKw3QDYYn_!ikiGuJE^?5JP*B1p|iWG#JN1~pzFJ_5tKUN
z(j2|ZVFf%pq8vTM#8tdkZ@s)H4Awi)iPt=j5DB~m&9uBPzWcmu938w}*cLl4O+!3y
z;zT>9mA5+v#7eua4YNF_=%&29@>V)<2P{02pD#T9q>?(3uZuib?VUXPnCLs5{sg=8
zV!XQmzr4CMgO59Ip{qOR;<dX+be%kQMh!bKa>cyB!EieZNM<}-DEYgaIU&3_i;TN3
zO2a%f5}LcJJte&53PU`{cuKrrKGnO&(uuqgz5~5S|3JKAiL5*uA<Q{1lW;tw-{(AT
ztRcJ5jOe`ivgW(#^MAYx_Kv&FFe*ID=`%bB(8;{2F6}$KtgpNoi$J_)WlTCJWZb(u
zL|MFH9JsuD6|g*}{h>N`D22MFPjS5D^CLZj?}j^%>sh=Cc(}W~ttY$!u9`dPe3HDq
zRM5I-mqEKXA1FL{usJ&le~-HWkrupvMuWU#V)eWgm;St)$Vfc95%W86Et0%GA?7@F
zQ%^i88GyYx6zjVbv9UZOWRW})9k;wWnngUBq2)YDsS><*1=PINjc7cC;$6JTzlOZ>
zuClx}L4LgI;0HYZ3c9<(#|OOv5v4pbX(GMH+G@RhSW~>I4XV72emcB=qz*k@wShgQ
zDhNFF8)7`9_e(qcvCKL^Uq!s<mkT?FJHfn%id#IHq8&U7*3&%UATYiC1*<%_TcA64
z&V)ReceXk}0RR910000}@i;vxqAI<llij>*?UFhx06;vDLi4-<4(q$Cjz7Cln%}%{
z%ndz}+XX!{AecOo8;87cNE5xwHz_=Cq3SzXmj}B5a8|vy$p1VBc*#7F?`OR3YnHt5
zPO-fBQz1L6HIuyfkhi=F7_GZ0L2Eo&aI(C`$`HJV^Gv-J?Fc<aLz%o4U_3o?eUiN9
zd;Yxdb)P)6*ke4vLdm?8II_Gh{rbC4mXf<pV8FbW4mv!2jFP;-2$?%D#YZ}wbUD1W
zdilE<Q29ItD3!Yq@TffNJz72Q?OQx+j>No4TyHzOJ_<dj;O{)bxp2I5rmMV=m5RJi
zthYNJPOLZPE(g4jU?n^$H2*u{H2pit9<;pYvGuzmNAA0>9iY3*a;-d5&&fOMj$l3f
z4CXxJdT>0$wr{)Am0mpfU>7{zLrp!-OrX4*d)Pc2l6<>cWeL1n+zUK_^4L7?&SATq
z-p;&9zEL`~DqcH-ZxTJ6-g`XNhkHHR4j(-H(rG;H2s6BB)`&cInhibhrnfwBdjdSA
zZzw&c&q6$>&hNWVC?UM}-FiF#WQ#oa?yb9p$w54w%-=lg2F$z|p<}zEpY*!$+cv!B
zn_oQCl-s;SC*Hh=Z<4#=@h`k&S^+&<yBNL1Q!TtOW(B)q{m{Fq$QiuT9veKomR7u6
z@-aP)u8h0~0Ghmkw|_iReo#D=f1A81crLt&em}gYA1S&Z$XY$2Sn@m1ts*?kA(_1H
z-LE?8iOoE{TBp3{u&un*JZidJALqROq9D8Zdb+%DE!?~|7Kc3P%8k74UMakat1vqy
z+GoAymL9oJV*<M}Jg~g?vj;rWOTj!cZHBz$LB2eELw-BCacesleTqCuWjH+UAuzqH
zeq%iO8^OGSZ$!K(0|PtRQVqSfI~lxEijO?^ZgV``g*rUYz4p8|nG`)B)Dt~I@OU~7
z75TgXkjcEq$jv-?z5Tom?H)arC>cG_EF!(LQ|LUo-~~PH<rh1Ozl*$3Nc%iRGc>!E
zS2DZ_mKHo-?w33?YePJ3NdP;cqFFrtH8VUV)2F-*v%Wj2k^a0usmr{<1iL)!Bh9>2
zOFO&|Gda9QFCsm`Ft59U%-Oqq^b@?t`3F6Z_oX}|!3Vq#%`rQfl4U%JL}$FbOu0Pk
zdpW#QCHJ~g64yL=2DLoLFbll#G=n@RSo1reVJ^HTB=9_Qa;!Y+ys12@<ybtRrTn{!
zf>68?;R3yc_K!S;O6j~u{jxmQ3obnS!ZthXI}1ErFQdFtOSU|r@5ekM!)LsBheJKb
zIfA^+l$N}(pS8TIcq}_gp!m9gvy#02?&!RtcyBzCkK4S06iK|Roz1*?t)ROH)Tlcz
z{qeeU^Nu`Z6^gr?tT{Zy4#K={Cj7h6%9y;d%$7Tn&B?o~YJ@yTFRDEAu6Mj2HHEyO
zZMHlsU!pu>)(AaD+^{^pD^|PVyc0aY)pES9lngyIz>GZRCf_?S4{$t6n<qPTTb;bT
zJ2O2}lWMzHvC+Fi8XLTa92~uB_N%;CMFc(dVhFvD4~D$PwH>@Q8fLu8L5{o*nMXXY
zO)|Wsd#gO_jJvxKe&@VdcL_W$k5auRL)JV~k*~WvvPV1hMYB9PD;K+ocgwrP^yWJ_
z9~eEV6T7?V%8@)|xL~|rw+1}$w(vZ7ckDaYu|~ZH8u2_b01`U(k()fHhTJ<400000
z0001H3^}|CjW@kAeQiA6n;*Rw97sD??hw6vPp3R=D{MW`MzlJ0)405O81y`qwVk}u
zZ(cjMzQ;R3JchjFKh`=B00000000000DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL
m0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwOL0DwPshAwyj

literal 0
KcmV+b0RR6000031

diff --git a/test/data_for_tests/embedding/small_roberta/vocab.json b/test/data_for_tests/embedding/small_roberta/vocab.json
new file mode 100644
index 00000000..376b658f
--- /dev/null
+++ b/test/data_for_tests/embedding/small_roberta/vocab.json
@@ -0,0 +1 @@
+{"<s>": 0, "<pad>": 1, "</s>": 2, "<unk>": 3, "<mask>": 4, "A": 5, "B": 6, "C": 7, "D": 8, "E": 9, "F": 10, "G": 11, "H": 12, "I": 13, "J": 14, "K": 15, "L": 16, "M": 17, "N": 18, "O": 19, "P": 20, "Q": 21, "R": 22, "S": 23, "T": 24, "U": 25, "V": 26, "W": 27, "X": 28, "Y": 29, "Z": 30, "a": 31, "b": 32, "c": 33, "d": 34, "e": 35, "f": 36, "g": 37, "h": 38, "i": 39, "j": 40, "k": 41, "l": 42, "m": 43, "n": 44, "o": 45, "p": 46, "q": 47, "r": 48, "s": 49, "t": 50, "u": 51, "v": 52, "w": 53, "x": 54, "y": 55, "z": 56, "\u0120This": 57, "\u0120is": 58, "\u0120a": 59, "\u0120demo": 60, "\u0120sentence": 61, "\u0120another": 62, "\u0120this": 63, "\u0120text": 64, "\u0120model": 65, "\u0120voc": 66, "ab": 67}
\ No newline at end of file
diff --git a/test/embeddings/test_bert_embedding.py b/test/embeddings/test_bert_embedding.py
index fe4702ab..1593c53f 100644
--- a/test/embeddings/test_bert_embedding.py
+++ b/test/embeddings/test_bert_embedding.py
@@ -3,6 +3,8 @@ from fastNLP import Vocabulary
 from fastNLP.embeddings import BertEmbedding, BertWordPieceEncoder
 import torch
 import os
+from fastNLP import DataSet
+
 
 @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis")
 class TestDownload(unittest.TestCase):
@@ -45,12 +47,83 @@ class TestBertEmbedding(unittest.TestCase):
         result = embed(words)
         self.assertEqual(result.size(), (1, 4, 16))
 
+        # 自动截断而不报错
+        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1,
+                              only_use_pretrain_bpe=True, auto_truncate=True)
+        words = torch.LongTensor([[2, 3, 4, 1]*10,
+                                  [2, 3]+[0]*38])
+        result = embed(words)
+        self.assertEqual(result.size(), (2, 40, 16))
+
+    def test_bert_embedding_2(self):
+        # 测试only_use_pretrain_vocab与truncate_embed是否正常工作
+        with open('test/data_for_tests/embedding/small_bert/vocab.txt', 'r', encoding='utf-8') as f:
+            num_word = len(f.readlines())
+        Embedding = BertEmbedding
+        vocab = Vocabulary().add_word_lst("this is a texta and [SEP] NotInBERT".split())
+        embed1 = Embedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+                              only_use_pretrain_bpe=True, truncate_embed=True, min_freq=1)
+        embed_bpe_vocab_size = len(vocab)-1 + 2  # 排除NotInBERT, 额外加##a, [CLS]
+        self.assertEqual(embed_bpe_vocab_size, len(embed1.model.tokenzier.vocab))
+
+        embed2 = Embedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+                              only_use_pretrain_bpe=True, truncate_embed=False, min_freq=1)
+        embed_bpe_vocab_size = num_word  # 排除NotInBERT
+        self.assertEqual(embed_bpe_vocab_size, len(embed2.model.tokenzier.vocab))
+
+        embed3 = Embedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+                              only_use_pretrain_bpe=False, truncate_embed=True, min_freq=1)
+        embed_bpe_vocab_size = len(vocab)+2  # 新增##a, [CLS]
+        self.assertEqual(embed_bpe_vocab_size, len(embed3.model.tokenzier.vocab))
+
+        embed4 = Embedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+                              only_use_pretrain_bpe=False, truncate_embed=False, min_freq=1)
+        embed_bpe_vocab_size = num_word+1  # 新增##a
+        self.assertEqual(embed_bpe_vocab_size, len(embed4.model.tokenzier.vocab))
+
+        # 测试各种情况下以下tensor的值是相等的
+        embed1.eval()
+        embed2.eval()
+        embed3.eval()
+        embed4.eval()
+        tensor = torch.LongTensor([[vocab.to_index(w) for w in 'this is a texta and'.split()]])
+        t1 = embed1(tensor)
+        t2 = embed2(tensor)
+        t3 = embed3(tensor)
+        t4 = embed4(tensor)
+
+        self.assertEqual((t1-t2).sum(), 0)
+        self.assertEqual((t1-t3).sum(), 0)
+        self.assertEqual((t1-t4).sum(), 0)
+
 
 class TestBertWordPieceEncoder(unittest.TestCase):
     def test_bert_word_piece_encoder(self):
         embed = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert', word_dropout=0.1)
-        from fastNLP import DataSet
         ds = DataSet({'words': ["this is a test . [SEP]".split()]})
         embed.index_datasets(ds, field_name='words')
         self.assertTrue(ds.has_field('word_pieces'))
         result = embed(torch.LongTensor([[1,2,3,4]]))
+
+    def test_bert_embed_eq_bert_piece_encoder(self):
+        ds = DataSet({'words': ["this is a texta model vocab".split(), 'this is'.split()]})
+        encoder = BertWordPieceEncoder(model_dir_or_name='test/data_for_tests/embedding/small_bert')
+        encoder.eval()
+        encoder.index_datasets(ds, field_name='words')
+        word_pieces = torch.LongTensor(ds['word_pieces'].get([0, 1]))
+        word_pieces_res = encoder(word_pieces)
+
+        vocab = Vocabulary()
+        vocab.from_dataset(ds, field_name='words')
+        vocab.index_dataset(ds, field_name='words', new_field_name='words')
+        ds.set_input('words')
+        words = torch.LongTensor(ds['words'].get([0, 1]))
+        embed = BertEmbedding(vocab, model_dir_or_name='test/data_for_tests/embedding/small_bert',
+                              pool_method='first', include_cls_sep=True, pooled_cls=False)
+        embed.eval()
+        words_res = embed(words)
+
+        # 检查word piece什么的是正常work的
+        self.assertEqual((word_pieces_res[0, :5]-words_res[0, :5]).sum(), 0)
+        self.assertEqual((word_pieces_res[0, 6:]-words_res[0, 5:]).sum(), 0)
+        self.assertEqual((word_pieces_res[1, :3]-words_res[1, :3]).sum(), 0)
\ No newline at end of file
diff --git a/test/embeddings/test_gpt2_embedding.py b/test/embeddings/test_gpt2_embedding.py
new file mode 100644
index 00000000..01e00410
--- /dev/null
+++ b/test/embeddings/test_gpt2_embedding.py
@@ -0,0 +1,268 @@
+
+import unittest
+import torch
+import os
+
+from fastNLP.modules.tokenizer.gpt2_tokenizer import GPT2Tokenizer
+from fastNLP.embeddings import GPT2WordPieceEncoder, GPT2Embedding
+from fastNLP import DataSet, Vocabulary
+
+
+class TestGPT2Embedding(unittest.TestCase):
+    @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis")
+    def test_download(self):
+        vocab = Vocabulary().add_word_lst("This is a test .".split())
+        embed = GPT2Embedding(vocab, model_dir_or_name='en')
+        words = torch.LongTensor([[2, 3, 4, 0]])
+        print(embed(words).size())
+
+        for pool_method in ['first', 'last', 'max', 'avg']:
+            embed = GPT2Embedding(vocab, model_dir_or_name='en', pool_method=pool_method)
+            print(embed(words).size())
+
+    def test_gpt2_embedding(self):
+        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        vocab = Vocabulary().add_word_lst("this is a texta sentence".split())
+        embed = GPT2Embedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1)
+        requires_grad = embed.requires_grad
+        embed.requires_grad = not requires_grad
+        embed.train()
+        words = torch.LongTensor([[2, 3, 4, 0]])
+        result = embed(words)
+        self.assertEqual(result.size(), (1, 4, 16))
+
+        embed = GPT2Embedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1,
+                              only_use_pretrain_bpe=False, language_model=True)
+        embed.eval()
+        words = torch.LongTensor([[2, 3, 4, 0]])
+        result = embed(words)
+        self.assertEqual(result.size(), (1, 4, 16))
+        embed.get_lm_loss()
+
+        vocab.add_word("NotInGpt2")
+        embed = GPT2Embedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1,
+                              only_use_pretrain_bpe=False, auto_truncate=True, min_freq=1)
+        words = torch.LongTensor([[2, 3, 4, 0]*20])
+        result = embed(words)
+        self.assertEqual(result.size(), (1, 80, 16))
+
+    def test_gpt2_ebembedding_2(self):
+        # 测试only_use_pretrain_vocab与truncate_embed是否正常工作
+        Embedding = GPT2Embedding
+        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        vocab = Vocabulary().add_word_lst("this is a texta and".split())
+        embed1 = Embedding(vocab, model_dir_or_name=weight_path,layers=list(range(3)),
+                              only_use_pretrain_bpe=True, truncate_embed=True, min_freq=1)
+        # embed_bpe_vocab_size = len(vocab)-1 + 2  # 排除NotInBERT, 额外加##a, [CLS]
+        # self.assertEqual(embed_bpe_vocab_size, len(embed1.model.tokenzier.vocab))
+
+        embed2 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)),
+                              only_use_pretrain_bpe=True, truncate_embed=False, min_freq=1)
+        # embed_bpe_vocab_size = num_word  # 排除NotInBERT
+        # self.assertEqual(embed_bpe_vocab_size, len(embed2.model.tokenzier.vocab))
+
+        embed3 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)),
+                              only_use_pretrain_bpe=False, truncate_embed=True, min_freq=1)
+        # embed_bpe_vocab_size = len(vocab)+2  # 新增##a, [CLS]
+        # self.assertEqual(embed_bpe_vocab_size, len(embed3.model.tokenzier.vocab))
+
+        embed4 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)),
+                              only_use_pretrain_bpe=False, truncate_embed=False, min_freq=1)
+        # embed_bpe_vocab_size = num_word+1  # 新增##a
+        # self.assertEqual(embed_bpe_vocab_size, len(embed4.model.tokenzier.vocab))
+
+        # 测试各种情况下以下tensor的值是相等的
+        embed1.eval()
+        embed2.eval()
+        embed3.eval()
+        embed4.eval()
+        tensor = torch.LongTensor([[vocab.to_index(w) for w in 'this is a texta and'.split()]])
+        t1 = embed1(tensor)
+        t2 = embed2(tensor)
+        t3 = embed3(tensor)
+        t4 = embed4(tensor)
+
+        self.assertEqual((t1-t2).sum(), 0)
+        self.assertEqual((t1-t3).sum(), 0)
+        self.assertEqual((t1-t4).sum(), 0)
+
+    def test_gpt2_tokenizer(self):
+        from fastNLP.modules.tokenizer import GPT2Tokenizer
+
+        tokenizer = GPT2Tokenizer.from_pretrained('test/data_for_tests/embedding/small_gpt2')
+        print(tokenizer.encode("this is a texta a sentence"))
+        print(tokenizer.encode('this is'))
+
+    def test_gpt2_embed_eq_gpt2_piece_encoder(self):
+        # 主要检查一下embedding的结果与wordpieceencoder的结果是否一致
+        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        ds = DataSet({'words': ["this is a texta a sentence".split(), 'this is'.split()]})
+        encoder = GPT2WordPieceEncoder(model_dir_or_name=weight_path)
+        encoder.eval()
+        encoder.index_datasets(ds, field_name='words')
+        word_pieces = torch.LongTensor(ds['word_pieces'].get([0, 1]))
+        word_pieces_res = encoder(word_pieces)
+
+        vocab = Vocabulary()
+        vocab.from_dataset(ds, field_name='words')
+        vocab.index_dataset(ds, field_name='words', new_field_name='words')
+        ds.set_input('words')
+        words = torch.LongTensor(ds['words'].get([0, 1]))
+        embed = GPT2Embedding(vocab, model_dir_or_name=weight_path, pool_method='first')
+        embed.eval()
+        words_res = embed(words)
+
+        # 检查word piece什么的是正常work的
+        self.assertEqual((word_pieces_res[0, :4]-words_res[0, :4]).sum(), 0)
+        self.assertEqual((word_pieces_res[0, 5:]-words_res[0, 4:]).sum(), 0)
+        self.assertEqual((word_pieces_res[1, :2]-words_res[1, :2]).sum(), 0)
+
+
+class TestGPT2WordPieceEncoder(unittest.TestCase):
+    @unittest.skipIf(True, "Only for local debugging")
+    def test_eq_transformers(self):
+        # 测试能否正确得到类似于transformers的结果
+        weight_path = ''
+
+        # tokenizer = transformers.GPT2Tokenizer.from_pretrained(weight_path)
+
+        ds = DataSet({'words': ["this this this a is texta model vocab".split(), 'this is'.split()]})
+
+        import transformers
+        input1 = ' '.join(ds[0]['words'])
+        input2 = ' '.join(ds[1]['words'])
+        tokenizer = transformers.GPT2Tokenizer.from_pretrained(weight_path)
+        idx_list1 = tokenizer.encode(input1)
+        idx_list2 = tokenizer.encode(input2)
+
+        pad_value = tokenizer.encode('<|endoftext|>')[0]
+        tensor = torch.nn.utils.rnn.pad_sequence([torch.LongTensor(idx_list1),
+                                                  torch.LongTensor(idx_list2)],
+                                                 batch_first=True,
+                                                 padding_value=pad_value)
+        gpt2 = transformers.GPT2Model.from_pretrained(weight_path, output_hidden_states=True)
+        gpt2.eval()
+        tensor = tensor
+        output, _, trans_hidden_states = gpt2(tensor, attention_mask=tensor.ne(pad_value))
+
+        encoder = GPT2WordPieceEncoder(model_dir_or_name=weight_path, layers=list(range(13)))
+        encoder.eval()
+        encoder.index_datasets(ds, field_name='words', add_endoftext=False)
+        word_pieces = torch.LongTensor(ds['word_pieces'].get([0, 1]))
+
+        self.assertEqual(idx_list1, ds[0]['word_pieces'])
+        self.assertEqual(idx_list2, ds[1]['word_pieces'])
+
+        word_pieces_res = encoder(word_pieces)
+
+        self.assertEqual((torch.cat(trans_hidden_states, dim=-1)-word_pieces_res).sum(), 0)
+
+    @unittest.skipIf(True, "Only for local usage")
+    def test_generate_small_gpt2(self):
+        # 因为GPT2使用的是GPT2的tokenizer，所以没办法直接生成权重，需要用点下面的方式
+        weight_path = ''
+        tokenizer = GPT2Tokenizer.from_pretrained(weight_path)
+
+        used_pairs = {}
+        used_vocab = {}
+        # 修改这里即可获得更多的sentence的数据
+        sent1 = "This is a demo sentence"
+        sent2 = "another demo"
+        sent3 = 'this is a texta model vocab'
+        all_tokens = []
+
+        for sent in [sent1, sent2, sent3]:
+            tokens = []
+            for word in sent.split():
+                word = ' '+ word
+                token = "".join(
+                    tokenizer.byte_encoder[b] for b in word.encode("utf-8")
+                )
+                _token, _used_pairs = tokenizer.get_used_merge_pair_vocab(token)
+                tokens.extend(_token.split())
+                used_pairs.update(_used_pairs)
+            all_tokens.extend(tokens)
+            token_ids = tokenizer.convert_tokens_to_ids(tokens)
+            used_vocab.update({t:i for t,i in zip(tokens, token_ids)})
+
+        print(used_pairs)
+        import json
+        with open('test/data_for_tests/embedding/small_gpt2/vocab.json', 'w') as f:
+            new_used_vocab = {}
+            for idx, key in enumerate(used_vocab.keys()):
+                new_used_vocab[key] = len(new_used_vocab)
+            new_used_vocab['<|endoftext|>'] = len(new_used_vocab)
+            for i in range(65, 91):
+                if chr(i) not in new_used_vocab:
+                    new_used_vocab[chr(i)] = len(new_used_vocab)
+            for i in range(97, 123):
+                if chr(i) not in new_used_vocab:
+                    new_used_vocab[chr(i)] = len(new_used_vocab)
+
+            json.dump(new_used_vocab, f)
+
+        with open('test/data_for_tests/embedding/small_gpt2/merges.txt', 'w') as f:
+            f.write('#version: small\n')
+            for k,v in sorted(sorted(used_pairs.items(), key=lambda kv:kv[1])):
+                f.write('{} {}\n'.format(k[0], k[1]))
+
+        new_tokenizer = GPT2Tokenizer.from_pretrained('test/data_for_tests/embedding/small_gpt2')
+        new_all_tokens = []
+        for sent in [sent1, sent2, sent3]:
+            tokens = new_tokenizer.tokenize(sent, add_prefix_space=True)
+            new_all_tokens.extend(tokens)
+        print(all_tokens, new_all_tokens)
+
+        self.assertSequenceEqual(all_tokens, new_all_tokens)
+        config = {
+                      "architectures": [
+                        "GPT2LMHeadModel"
+                      ],
+                      "initializer_range": 0.02,
+                      "layer_norm_epsilon": 1e-05,
+                      "n_ctx": 20,
+                      "n_embd": 16,
+                      "n_head": 4,
+                      "n_layer": 2,
+                      "n_positions": 20,
+                      "vocab_size": len(new_used_vocab)
+                    }
+        with open('test/data_for_tests/embedding/small_gpt2/config.json', 'w') as f:
+            json.dump(config, f)
+
+        # 生成更小的merges.txt与vocab.json, 方法是通过记录tokenizer中的值实现
+        from fastNLP.modules.encoder.gpt2 import GPT2LMHeadModel, GPT2Config
+
+        config = GPT2Config.from_pretrained('test/data_for_tests/embedding/small_gpt2')
+
+        model = GPT2LMHeadModel(config)
+        torch.save(model.state_dict(), 'test/data_for_tests/embedding/small_gpt2/small_pytorch_model.bin')
+        print(model(torch.LongTensor([[0,1,2,3]])))
+
+    def test_gpt2_word_piece_encoder(self):
+        # 主要检查可以运行
+        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+        ds = DataSet({'words': ["this is a test sentence".split()]})
+        embed = GPT2WordPieceEncoder(model_dir_or_name=weight_path, word_dropout=0.1)
+        embed.index_datasets(ds, field_name='words')
+        self.assertTrue(ds.has_field('word_pieces'))
+        result = embed(torch.LongTensor([[1, 2, 3, 4]]))
+
+        embed = GPT2WordPieceEncoder(model_dir_or_name=weight_path, word_dropout=0.1,
+                                     language_model=True)
+        embed.index_datasets(ds, field_name='words')
+        self.assertTrue(ds.has_field('word_pieces'))
+        result = embed(torch.LongTensor([[1, 2, 3, 4]]))
+
+    def test_generate(self):
+        weight_path = 'test/data_for_tests/embedding/small_gpt2'
+
+        encoder = GPT2WordPieceEncoder(model_dir_or_name=weight_path, language_model=True)
+
+        # 测试一下各项东西是否正常work
+        print(encoder.generate_from_str('this', max_len=20, do_sample=False, num_beams=1, temperature=1, top_k=50, top_p=1.0,
+                          repetition_penalty=1.0, length_penalty=1.0))
+        print(encoder.generate_from_str('this', max_len=20, do_sample=True, num_beams=3, temperature=1, top_k=50, top_p=1.0,
+                          repetition_penalty=1.0, length_penalty=1.0))
+        print(encoder.generate_from_str('this', max_len=20, do_sample=True, num_beams=3, temperature=2, top_k=20, top_p=2.0,
+                          repetition_penalty=2.0, length_penalty=1.5))
diff --git a/test/embeddings/test_roberta_embedding.py b/test/embeddings/test_roberta_embedding.py
new file mode 100644
index 00000000..c2e80a8a
--- /dev/null
+++ b/test/embeddings/test_roberta_embedding.py
@@ -0,0 +1,252 @@
+
+import unittest
+
+import torch
+import os
+
+from fastNLP import DataSet, Vocabulary
+from fastNLP.embeddings.roberta_embedding import RobertaWordPieceEncoder, RobertaEmbedding
+
+
+class TestRobertWordPieceEncoder(unittest.TestCase):
+    @unittest.skipIf('TRAVIS' in os.environ, "Skip in travis")
+    def test_download(self):
+        vocab = Vocabulary().add_word_lst("This is a test .".split())
+        embed = RobertaEmbedding(vocab, model_dir_or_name='en')
+        words = torch.LongTensor([[2, 3, 4, 0]])
+        print(embed(words).size())
+
+        for pool_method in ['first', 'last', 'max', 'avg']:
+            for include_cls_sep in [True, False]:
+                embed = RobertaEmbedding(vocab, model_dir_or_name='en', pool_method=pool_method,
+                                      include_cls_sep=include_cls_sep)
+                print(embed(words).size())
+
+    def test_robert_word_piece_encoder(self):
+        # 可正常运行即可
+        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        encoder = RobertaWordPieceEncoder(model_dir_or_name=weight_path, word_dropout=0.1)
+        ds = DataSet({'words': ["this is a test . [SEP]".split()]})
+        encoder.index_datasets(ds, field_name='words')
+        self.assertTrue(ds.has_field('word_pieces'))
+        result = encoder(torch.LongTensor([[1,2,3,4]]))
+
+    def test_roberta_embed_eq_roberta_piece_encoder(self):
+        # 主要检查一下embedding的结果与wordpieceencoder的结果是否一致
+        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        ds = DataSet({'words': ["this is a texta a sentence".split(), 'this is'.split()]})
+        encoder = RobertaWordPieceEncoder(model_dir_or_name=weight_path)
+        encoder.eval()
+        encoder.index_datasets(ds, field_name='words')
+        word_pieces = torch.LongTensor(ds['word_pieces'].get([0, 1]))
+        word_pieces_res = encoder(word_pieces)
+
+        vocab = Vocabulary()
+        vocab.from_dataset(ds, field_name='words')
+        vocab.index_dataset(ds, field_name='words', new_field_name='words')
+        ds.set_input('words')
+        words = torch.LongTensor(ds['words'].get([0, 1]))
+        embed = RobertaEmbedding(vocab, model_dir_or_name=weight_path,
+                                pool_method='first', include_cls_sep=True, pooled_cls=False)
+        embed.eval()
+        words_res = embed(words)
+
+        # 检查word piece什么的是正常work的
+        self.assertEqual((word_pieces_res[0, :5]-words_res[0, :5]).sum(), 0)
+        self.assertEqual((word_pieces_res[0, 6:]-words_res[0, 5:]).sum(), 0)
+        self.assertEqual((word_pieces_res[1, :3]-words_res[1, :3]).sum(), 0)
+
+    @unittest.skipIf(True, "Only for local debugging")
+    def test_eq_transformers(self):
+        weight_path = ''
+        ds = DataSet({'words': ["this is a texta model vocab".split(), 'this is'.split()]})
+        encoder = RobertaWordPieceEncoder(model_dir_or_name=weight_path)
+        encoder.eval()
+        encoder.index_datasets(ds, field_name='words')
+        word_pieces = torch.LongTensor(ds['word_pieces'].get([0, 1]))
+        word_pieces_res = encoder(word_pieces)
+
+        import transformers
+        input1 = ' '.join(ds[0]['words'])
+        input2 = ' '.join(ds[1]['words'])
+        tokenizer = transformers.RobertaTokenizer.from_pretrained(weight_path)
+        idx_list1 = tokenizer.encode(input1)
+        idx_list2 = tokenizer.encode(input2)
+        self.assertEqual(idx_list1, ds[0]['word_pieces'])
+        self.assertEqual(idx_list2, ds[1]['word_pieces'])
+
+        pad_value = tokenizer.encode('<pad>')[0]
+        tensor = torch.nn.utils.rnn.pad_sequence([torch.LongTensor(idx_list1),
+                                                  torch.LongTensor(idx_list2)],
+                                                 batch_first=True,
+                                                 padding_value=pad_value)
+        roberta = transformers.RobertaModel.from_pretrained(weight_path, output_hidden_states=True)
+        roberta.eval()
+        output, pooled_output, hidden_states = roberta(tensor, attention_mask=tensor.ne(pad_value))
+
+        self.assertEqual((output-word_pieces_res).sum(), 0)
+
+    @unittest.skipIf(True, "Only for local usage")
+    def test_generate_small_roberta(self):
+        """
+        因为Roberta使用的是GPT2的tokenizer，所以没办法直接生成权重，需要用点下面的方式
+
+        :return:
+        """
+        weight_path = ''
+        from fastNLP.modules.tokenizer import RobertaTokenizer
+        tokenizer = RobertaTokenizer.from_pretrained(weight_path)
+
+        used_pairs = {}
+        used_vocab = {}
+        # 修改这里即可获得更多的sentence的数据
+        sent1 = "This is a demo sentence"
+        sent2 = "another demo"
+        sent3 = 'this is a texta model vocab'
+        all_tokens = []
+
+        for sent in [sent1, sent2, sent3]:
+            tokens = []
+            for word in sent.split():
+                word = ' '+ word
+                token = "".join(
+                    tokenizer.byte_encoder[b] for b in word.encode("utf-8")
+                )
+                _token, _used_pairs = tokenizer.get_used_merge_pair_vocab(token)
+                tokens.extend(_token.split())
+                used_pairs.update(_used_pairs)
+            all_tokens.extend(tokens)
+            token_ids = tokenizer.convert_tokens_to_ids(tokens)
+            used_vocab.update({t:i for t,i in zip(tokens, token_ids)})
+
+        import json
+        with open('test/data_for_tests/embedding/small_roberta/vocab.json', 'w') as f:
+            new_used_vocab = {}
+            for token in ['<s>', '<pad>', '</s>', '<unk>', '<mask>']:  # <pad>必须为1
+                new_used_vocab[token] = len(new_used_vocab)
+            for i in range(65, 91):
+                if chr(i) not in new_used_vocab:
+                    new_used_vocab[chr(i)] = len(new_used_vocab)
+            for i in range(97, 123):
+                if chr(i) not in new_used_vocab:
+                    new_used_vocab[chr(i)] = len(new_used_vocab)
+            for idx, key in enumerate(used_vocab.keys()):
+                if key not in new_used_vocab:
+                 new_used_vocab[key] = len(new_used_vocab)
+            json.dump(new_used_vocab, f)
+
+        with open('test/data_for_tests/embedding/small_roberta/merges.txt', 'w') as f:
+            f.write('#version: tiny\n')
+            for k,v in sorted(sorted(used_pairs.items(), key=lambda kv:kv[1])):
+                f.write('{} {}\n'.format(k[0], k[1]))
+
+        config = {
+              "architectures": [
+                "RobertaForMaskedLM"
+              ],
+              "attention_probs_dropout_prob": 0.1,
+              "finetuning_task": None,
+              "hidden_act": "gelu",
+              "hidden_dropout_prob": 0.1,
+              "hidden_size": 16,
+              "initializer_range": 0.02,
+              "intermediate_size": 20,
+              "layer_norm_eps": 1e-05,
+              "max_position_embeddings": 20,
+              "num_attention_heads": 4,
+              "num_hidden_layers": 2,
+              "num_labels": 2,
+              "output_attentions": False,
+              "output_hidden_states": False,
+              "torchscript": False,
+              "type_vocab_size": 1,
+              "vocab_size": len(new_used_vocab)
+            }
+        with open('test/data_for_tests/embedding/small_roberta/config.json', 'w') as f:
+            json.dump(config, f)
+
+        new_tokenizer = RobertaTokenizer.from_pretrained('test/data_for_tests/embedding/small_roberta')
+        new_all_tokens = []
+        for sent in [sent1, sent2, sent3]:
+            tokens = new_tokenizer.tokenize(sent, add_prefix_space=True)
+            new_all_tokens.extend(tokens)
+        print(all_tokens, new_all_tokens)
+
+        self.assertSequenceEqual(all_tokens, new_all_tokens)
+
+        # 生成更小的merges.txt与vocab.json, 方法是通过记录tokenizer中的值实现
+        from fastNLP.modules.encoder.roberta import RobertaModel, BertConfig
+
+        config = BertConfig.from_json_file('test/data_for_tests/embedding/small_roberta/config.json')
+
+        model = RobertaModel(config)
+        torch.save(model.state_dict(), 'test/data_for_tests/embedding/small_roberta/small_pytorch_model.bin')
+        print(model(torch.LongTensor([[0,1,2,3]])))
+
+
+class TestRobertaEmbedding(unittest.TestCase):
+    def test_roberta_embedding_1(self):
+        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        vocab = Vocabulary().add_word_lst("this is a test . [SEP] NotInRoberta".split())
+        embed = RobertaEmbedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1)
+        requires_grad = embed.requires_grad
+        embed.requires_grad = not requires_grad
+        embed.train()
+        words = torch.LongTensor([[2, 3, 4, 1]])
+        result = embed(words)
+        self.assertEqual(result.size(), (1, 4, 16))
+
+        embed = RobertaEmbedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1,
+                              only_use_pretrain_bpe=True)
+        embed.eval()
+        words = torch.LongTensor([[2, 3, 4, 1]])
+        result = embed(words)
+        self.assertEqual(result.size(), (1, 4, 16))
+
+        # 自动截断而不报错
+        embed = RobertaEmbedding(vocab, model_dir_or_name=weight_path, word_dropout=0.1,
+                              only_use_pretrain_bpe=True, auto_truncate=True)
+        words = torch.LongTensor([[2, 3, 4, 1]*10,
+                                  [2, 3]+[0]*38])
+        result = embed(words)
+        self.assertEqual(result.size(), (2, 40, 16))
+
+    def test_roberta_ebembedding_2(self):
+        # 测试only_use_pretrain_vocab与truncate_embed是否正常工作
+        Embedding = RobertaEmbedding
+        weight_path = 'test/data_for_tests/embedding/small_roberta'
+        vocab = Vocabulary().add_word_lst("this is a texta and".split())
+        embed1 = Embedding(vocab, model_dir_or_name=weight_path,layers=list(range(3)),
+                              only_use_pretrain_bpe=True, truncate_embed=True, min_freq=1)
+        # embed_bpe_vocab_size = len(vocab)-1 + 2  # 排除NotInBERT, 额外加##a, [CLS]
+        # self.assertEqual(embed_bpe_vocab_size, len(embed1.model.tokenzier.vocab))
+
+        embed2 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)),
+                              only_use_pretrain_bpe=True, truncate_embed=False, min_freq=1)
+        # embed_bpe_vocab_size = num_word  # 排除NotInBERT
+        # self.assertEqual(embed_bpe_vocab_size, len(embed2.model.tokenzier.vocab))
+
+        embed3 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)),
+                              only_use_pretrain_bpe=False, truncate_embed=True, min_freq=1)
+        # embed_bpe_vocab_size = len(vocab)+2  # 新增##a, [CLS]
+        # self.assertEqual(embed_bpe_vocab_size, len(embed3.model.tokenzier.vocab))
+
+        embed4 = Embedding(vocab, model_dir_or_name=weight_path, layers=list(range(3)),
+                              only_use_pretrain_bpe=False, truncate_embed=False, min_freq=1)
+        # embed_bpe_vocab_size = num_word+1  # 新增##a
+        # self.assertEqual(embed_bpe_vocab_size, len(embed4.model.tokenzier.vocab))
+
+        # 测试各种情况下以下tensor的值是相等的
+        embed1.eval()
+        embed2.eval()
+        embed3.eval()
+        embed4.eval()
+        tensor = torch.LongTensor([[vocab.to_index(w) for w in 'this is a texta and'.split()]])
+        t1 = embed1(tensor)
+        t2 = embed2(tensor)
+        t3 = embed3(tensor)
+        t4 = embed4(tensor)
+
+        self.assertEqual((t1-t2).sum(), 0)
+        self.assertEqual((t1-t3).sum(), 0)
+        self.assertEqual((t1-t4).sum(), 0)
diff --git a/test/modules/encoder/test_bert.py b/test/modules/encoder/test_bert.py
new file mode 100644
index 00000000..35802811
--- /dev/null
+++ b/test/modules/encoder/test_bert.py
@@ -0,0 +1,24 @@
+import unittest
+
+
+from fastNLP.modules import BertTokenizer
+
+
+class TestBertTokenizer(unittest.TestCase):
+    def test_run(self):
+        # 测试支持的两种encode方式
+        tokenizer = BertTokenizer.from_pretrained('test/data_for_tests/embedding/small_bert')
+
+        tokens1 = tokenizer.encode("This is a demo")
+        tokens2 = tokenizer.encode("This is a demo")
+        tokens3 = tokenizer.encode("This is a demo".split())
+        tokens4 = tokenizer.encode("This is a demo".split())
+
+        self.assertEqual(len(tokens1)-2, len(tokens2))
+        self.assertEqual(len(tokens3)-2, len(tokens4))
+
+        self.assertEqual(tokens1[0], tokenizer.cls_index)
+        self.assertEqual(tokens1[-1], tokenizer.sep_index)
+
+
+