From 2dee67129ad823a312c5aefe4f65e161cb0bd712 Mon Sep 17 00:00:00 2001 From: yh_cc Date: Sun, 12 Apr 2020 21:13:33 +0800 Subject: [PATCH] Solve the GPTTokenizer dependency problem --- fastNLP/modules/tokenizer/gpt2_tokenizer.py | 21 --------------------- requirements.txt | 3 ++- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/fastNLP/modules/tokenizer/gpt2_tokenizer.py b/fastNLP/modules/tokenizer/gpt2_tokenizer.py index fec8ef15..9cfa8f2c 100644 --- a/fastNLP/modules/tokenizer/gpt2_tokenizer.py +++ b/fastNLP/modules/tokenizer/gpt2_tokenizer.py @@ -71,24 +71,6 @@ VOCAB_FILES_NAMES = { } -PRETRAINED_VOCAB_FILES_MAP = { - "vocab_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-vocab.json", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-vocab.json", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-vocab.json", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-vocab.json", - }, - "merges_file": { - "gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt", - "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-merges.txt", - "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-merges.txt", - "gpt2-xl": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-xl-merges.txt", - "distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-merges.txt", - }, -} - - PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { "en-small": 1024, 'en': 1024, @@ -128,9 +110,6 @@ class GPT2Tokenizer: the spaces at the beginning of a string: `tokenizer.decode(tokenizer.encode(" Hello")) = "Hello"` """ - vocab_files_names = VOCAB_FILES_NAMES - pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP - SPECIAL_TOKENS_ATTRIBUTES = [ "bos_token", "eos_token", diff --git a/requirements.txt b/requirements.txt index b07aed3f..242301be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ nltk>=3.4.1 prettytable>=0.7.2 requests spacy -prettytable>=0.7.2 \ No newline at end of file +prettytable>=0.7.2 +regex!=2019.12.17 \ No newline at end of file