You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_summary.py 2.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # __author__="Danqing Wang"
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # ==============================================================================
  17. import unittest
  18. import os
  19. from fastNLP.io import DataBundle
  20. from fastNLP.io.pipe.summarization import ExtCNNDMPipe
  21. class TestRunExtCNNDMPipe(unittest.TestCase):
  22. def test_load(self):
  23. data_dir = 'test/data_for_tests/io/cnndm'
  24. vocab_size = 100000
  25. VOCAL_FILE = 'test/data_for_tests/io/cnndm/vocab'
  26. sent_max_len = 100
  27. doc_max_timesteps = 50
  28. dbPipe = ExtCNNDMPipe(vocab_size=vocab_size,
  29. vocab_path=VOCAL_FILE,
  30. sent_max_len=sent_max_len,
  31. doc_max_timesteps=doc_max_timesteps)
  32. dbPipe2 = ExtCNNDMPipe(vocab_size=vocab_size,
  33. vocab_path=VOCAL_FILE,
  34. sent_max_len=sent_max_len,
  35. doc_max_timesteps=doc_max_timesteps,
  36. domain=True)
  37. db = dbPipe.process_from_file(data_dir)
  38. db2 = dbPipe2.process_from_file(data_dir)
  39. self.assertTrue(isinstance(db, DataBundle))
  40. self.assertTrue(isinstance(db2, DataBundle))
  41. dbPipe3 = ExtCNNDMPipe(vocab_size=vocab_size,
  42. sent_max_len=sent_max_len,
  43. doc_max_timesteps=doc_max_timesteps,
  44. domain=True)
  45. db3 = dbPipe3.process_from_file(data_dir)
  46. self.assertTrue(isinstance(db3, DataBundle))
  47. with self.assertRaises(RuntimeError):
  48. dbPipe4 = ExtCNNDMPipe(vocab_size=vocab_size,
  49. sent_max_len=sent_max_len,
  50. doc_max_timesteps=doc_max_timesteps)
  51. db4 = dbPipe4.process_from_file(os.path.join(data_dir, 'train.cnndm.jsonl'))
  52. dbPipe5 = ExtCNNDMPipe(vocab_size=vocab_size,
  53. vocab_path=VOCAL_FILE,
  54. sent_max_len=sent_max_len,
  55. doc_max_timesteps=doc_max_timesteps,)
  56. db5 = dbPipe5.process_from_file(os.path.join(data_dir, 'train.cnndm.jsonl'))
  57. self.assertIsInstance(db5, DataBundle)