You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

StanfordParser.java 1.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. package nlp.tool;
  2. import java.io.StringReader;
  3. import java.util.List;
  4. import edu.stanford.nlp.ling.CoreLabel;
  5. import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
  6. import edu.stanford.nlp.process.CoreLabelTokenFactory;
  7. import edu.stanford.nlp.process.PTBTokenizer;
  8. import edu.stanford.nlp.trees.GrammaticalStructure;
  9. import edu.stanford.nlp.trees.GrammaticalStructureFactory;
  10. import edu.stanford.nlp.trees.PennTreebankLanguagePack;
  11. import edu.stanford.nlp.trees.Tree;
  12. import edu.stanford.nlp.trees.TreebankLanguagePack;
  13. import edu.stanford.nlp.trees.TypedDependency;
  14. import edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure;
  15. public class StanfordParser {
  16. private LexicalizedParser lp;
  17. private ChineseGrammaticalStructure gs;
  18. // private TokenizerFactory<CoreLabel> tokenizerFactory;
  19. // private TreebankLanguagePack tlp;
  20. // private GrammaticalStructureFactory gsf;
  21. public StanfordParser() {
  22. // lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
  23. // tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
  24. // tlp = new PennTreebankLanguagePack();
  25. // gsf = tlp.grammaticalStructureFactory();
  26. lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz");
  27. }
  28. // public GrammaticalStructure getGrammaticalStructure (String sentence) {
  29. // List<CoreLabel> rawWords2 =
  30. // tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
  31. //
  32. // Tree parse = lp.apply(rawWords2);
  33. //
  34. // return gsf.newGrammaticalStructure(parse);
  35. // }
  36. public List<TypedDependency> getTypedDependencyList(List<CoreLabel> rawWords)
  37. {
  38. Tree parse = lp.apply(rawWords);
  39. gs = new ChineseGrammaticalStructure(parse);
  40. return gs.typedDependenciesCCprocessed();
  41. }
  42. }

GAnswer system is a natural language QA system developed by Institute of Computer Science & Techonology Data Management Lab, Peking University, led by Prof. Zou Lei. GAnswer is able to translate natural language questions to query graphs containing semant