You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_bert.py 729 B

123456789101112131415161718192021222324
  1. import unittest
  2. from fastNLP.modules import BertTokenizer
  3. class TestBertTokenizer(unittest.TestCase):
  4. def test_run(self):
  5. # 测试支持的两种encode方式
  6. tokenizer = BertTokenizer.from_pretrained('test/data_for_tests/embedding/small_bert')
  7. tokens1 = tokenizer.encode("This is a demo")
  8. tokens2 = tokenizer.encode("This is a demo")
  9. tokens3 = tokenizer.encode("This is a demo".split())
  10. tokens4 = tokenizer.encode("This is a demo".split())
  11. self.assertEqual(len(tokens1)-2, len(tokens2))
  12. self.assertEqual(len(tokens3)-2, len(tokens4))
  13. self.assertEqual(tokens1[0], tokenizer.cls_index)
  14. self.assertEqual(tokens1[-1], tokenizer.sep_index)