You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_fastNLP.py 3.4 kB

6 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import sys
  2. sys.path.append("..")
  3. from fastNLP.fastnlp import FastNLP
  4. from fastNLP.fastnlp import interpret_word_seg_results, interpret_cws_pos_results
  5. PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/"
  6. PATH_TO_POS_TAG_PICKLE_FILES = "/home/zyfeng/data/crf_seg/"
  7. PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES = "/home/zyfeng/data/text_classify/"
  8. def word_seg():
  9. nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES)
  10. nlp.load("cws_basic_model", config_file="cws.cfg", section_name="POS_test")
  11. text = ["这是最好的基于深度学习的中文分词系统。",
  12. "大王叫我来巡山。",
  13. "我党多年来致力于改善人民生活水平。"]
  14. results = nlp.run(text)
  15. print(results)
  16. for example in results:
  17. words, labels = [], []
  18. for res in example:
  19. words.append(res[0])
  20. labels.append(res[1])
  21. print(interpret_word_seg_results(words, labels))
  22. def text_class():
  23. nlp = FastNLP("./data_for_tests/")
  24. nlp.load("text_class_model")
  25. text = "这是最好的基于深度学习的中文分词系统。"
  26. result = nlp.run(text)
  27. print(result)
  28. print("FastNLP finished!")
  29. def test_word_seg_interpret():
  30. foo = [[('这', 'S'), ('是', 'S'), ('最', 'S'), ('好', 'S'), ('的', 'S'), ('基', 'B'), ('于', 'E'), ('深', 'B'), ('度', 'E'),
  31. ('学', 'B'), ('习', 'E'), ('的', 'S'), ('中', 'B'), ('文', 'E'), ('分', 'B'), ('词', 'E'), ('系', 'B'), ('统', 'E'),
  32. ('。', 'S')]]
  33. chars = [x[0] for x in foo[0]]
  34. labels = [x[1] for x in foo[0]]
  35. print(interpret_word_seg_results(chars, labels))
  36. def test_interpret_cws_pos_results():
  37. foo = [
  38. [('这', 'S-r'), ('是', 'S-v'), ('最', 'S-d'), ('好', 'S-a'), ('的', 'S-u'), ('基', 'B-p'), ('于', 'E-p'), ('深', 'B-d'),
  39. ('度', 'E-d'), ('学', 'B-v'), ('习', 'E-v'), ('的', 'S-u'), ('中', 'B-nz'), ('文', 'E-nz'), ('分', 'B-vn'),
  40. ('词', 'E-vn'), ('系', 'B-n'), ('统', 'E-n'), ('。', 'S-w')]
  41. ]
  42. chars = [x[0] for x in foo[0]]
  43. labels = [x[1] for x in foo[0]]
  44. print(interpret_cws_pos_results(chars, labels))
  45. def pos_tag():
  46. nlp = FastNLP(model_dir=PATH_TO_POS_TAG_PICKLE_FILES)
  47. nlp.load("pos_tag_model", config_file="pos_tag.config", section_name="pos_tag_model")
  48. text = ["这是最好的基于深度学习的中文分词系统。",
  49. "大王叫我来巡山。",
  50. "我党多年来致力于改善人民生活水平。"]
  51. results = nlp.run(text)
  52. for example in results:
  53. words, labels = [], []
  54. for res in example:
  55. words.append(res[0])
  56. labels.append(res[1])
  57. print(interpret_cws_pos_results(words, labels))
  58. def text_classify():
  59. nlp = FastNLP(model_dir=PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES)
  60. nlp.load("text_classify_model", config_file="text_classify.cfg", section_name="model")
  61. text = [
  62. "世界物联网大会明日在京召开龙头股启动在即",
  63. "乌鲁木齐市新增一处城市中心旅游目的地",
  64. "朱元璋的大明朝真的源于明教吗?——告诉你一个真实的“明教”"]
  65. results = nlp.run(text)
  66. print(results)
  67. """
  68. ['finance', 'travel', 'history']
  69. """
  70. if __name__ == "__main__":
  71. text_classify()

一款轻量级的自然语言处理(NLP)工具包,目标是减少用户项目中的工程型代码,例如数据处理循环、训练循环、多卡运行等