You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_summary.py 4.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # __author__="Danqing Wang"
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # ==============================================================================
  17. import pytest
  18. import os
  19. import pytest
  20. from fastNLP.io import DataBundle
  21. from fastNLP.io.pipe.summarization import ExtCNNDMPipe
  22. class TestRunExtCNNDMPipe:
  23. def test_load(self):
  24. data_dir = 'tests/data_for_tests/io/cnndm'
  25. vocab_size = 100000
  26. VOCAL_FILE = 'tests/data_for_tests/io/cnndm/vocab'
  27. sent_max_len = 100
  28. doc_max_timesteps = 50
  29. dbPipe = ExtCNNDMPipe(vocab_size=vocab_size,
  30. vocab_path=VOCAL_FILE,
  31. sent_max_len=sent_max_len,
  32. doc_max_timesteps=doc_max_timesteps)
  33. dbPipe2 = ExtCNNDMPipe(vocab_size=vocab_size,
  34. vocab_path=VOCAL_FILE,
  35. sent_max_len=sent_max_len,
  36. doc_max_timesteps=doc_max_timesteps,
  37. domain=True)
  38. db = dbPipe.process_from_file(data_dir)
  39. db2 = dbPipe2.process_from_file(data_dir)
  40. assert(isinstance(db, DataBundle))
  41. assert(isinstance(db2, DataBundle))
  42. dbPipe3 = ExtCNNDMPipe(vocab_size=vocab_size,
  43. sent_max_len=sent_max_len,
  44. doc_max_timesteps=doc_max_timesteps,
  45. domain=True)
  46. db3 = dbPipe3.process_from_file(data_dir)
  47. assert(isinstance(db3, DataBundle))
  48. with pytest.raises(RuntimeError):
  49. dbPipe4 = ExtCNNDMPipe(vocab_size=vocab_size,
  50. sent_max_len=sent_max_len,
  51. doc_max_timesteps=doc_max_timesteps)
  52. db4 = dbPipe4.process_from_file(os.path.join(data_dir, 'train.cnndm.jsonl'))
  53. dbPipe5 = ExtCNNDMPipe(vocab_size=vocab_size,
  54. vocab_path=VOCAL_FILE,
  55. sent_max_len=sent_max_len,
  56. doc_max_timesteps=doc_max_timesteps,)
  57. db5 = dbPipe5.process_from_file(os.path.join(data_dir, 'train.cnndm.jsonl'))
  58. assert(isinstance(db5, DataBundle))
  59. def test_load_proc(self):
  60. data_dir = 'tests/data_for_tests/io/cnndm'
  61. vocab_size = 100000
  62. VOCAL_FILE = 'tests/data_for_tests/io/cnndm/vocab'
  63. sent_max_len = 100
  64. doc_max_timesteps = 50
  65. dbPipe = ExtCNNDMPipe(vocab_size=vocab_size,
  66. vocab_path=VOCAL_FILE,
  67. sent_max_len=sent_max_len,
  68. doc_max_timesteps=doc_max_timesteps, num_proc=2)
  69. dbPipe2 = ExtCNNDMPipe(vocab_size=vocab_size,
  70. vocab_path=VOCAL_FILE,
  71. sent_max_len=sent_max_len,
  72. doc_max_timesteps=doc_max_timesteps,
  73. domain=True, num_proc=2)
  74. db = dbPipe.process_from_file(data_dir)
  75. db2 = dbPipe2.process_from_file(data_dir)
  76. assert(isinstance(db, DataBundle))
  77. assert(isinstance(db2, DataBundle))
  78. dbPipe3 = ExtCNNDMPipe(vocab_size=vocab_size,
  79. sent_max_len=sent_max_len,
  80. doc_max_timesteps=doc_max_timesteps,
  81. domain=True, num_proc=2)
  82. db3 = dbPipe3.process_from_file(data_dir)
  83. assert(isinstance(db3, DataBundle))
  84. with pytest.raises(RuntimeError):
  85. dbPipe4 = ExtCNNDMPipe(vocab_size=vocab_size,
  86. sent_max_len=sent_max_len,
  87. doc_max_timesteps=doc_max_timesteps, num_proc=2)
  88. db4 = dbPipe4.process_from_file(os.path.join(data_dir, 'train.cnndm.jsonl'))
  89. dbPipe5 = ExtCNNDMPipe(vocab_size=vocab_size,
  90. vocab_path=VOCAL_FILE,
  91. sent_max_len=sent_max_len,
  92. doc_max_timesteps=doc_max_timesteps, num_proc=2)
  93. db5 = dbPipe5.process_from_file(os.path.join(data_dir, 'train.cnndm.jsonl'))
  94. assert(isinstance(db5, DataBundle))