You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

GAnswer.java 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. package qa;
  2. import java.io.*;
  3. import java.net.Socket;
  4. import java.util.ArrayList;
  5. import java.util.Collections;
  6. import java.util.HashSet;
  7. import java.util.List;
  8. import jgsc.GstoreConnector;
  9. import log.QueryLogger;
  10. import nlp.ds.Sentence;
  11. import nlp.ds.Sentence.SentenceType;
  12. import qa.parsing.QuestionParsing;
  13. import qa.parsing.BuildQueryGraph;
  14. import rdf.Sparql;
  15. import utils.FileUtil;
  16. import addition.AddtionalFix;
  17. import qa.Globals;
  18. public class GAnswer {
  19. public static final int MAX_SPQ_NUM = 3;
  20. public static void init() {
  21. System.out.println("gAnswer2 init ...");
  22. Globals.init();
  23. System.out.println("gAnswer2 init ... ok!");
  24. }
  25. public QueryLogger getSparqlList(String input)
  26. {
  27. QueryLogger qlog = null;
  28. try
  29. {
  30. if (input.length() <= 5)
  31. return null;
  32. System.out.println("[Input:] "+input);
  33. // step 0: Node (entity & type & literal) Recognition
  34. long t0 = System.currentTimeMillis(), t, NRtime;
  35. Query query = new Query(input);
  36. qlog = new QueryLogger(query);
  37. ArrayList<Sparql> rankedSparqls = new ArrayList<Sparql>();
  38. NRtime = (int)(System.currentTimeMillis()-t0);
  39. System.out.println("step0 [Node Recognition] : "+ NRtime +"ms");
  40. // Try to solve each NR plan, and combine the ranked SPARQLs.
  41. // We only reserve LOG of BEST NR plan for convenience.
  42. for(int i=query.sList.size()-1; i>=0; i--)
  43. {
  44. Sentence possibleSentence = query.sList.get(i);
  45. qlog.reloadSentence(possibleSentence);
  46. // qlog.isMaltParserUsed = true;
  47. // LOG
  48. System.out.println("transQ: "+qlog.s.plainText);
  49. qlog.NRlog = query.preLog;
  50. qlog.SQGlog = "Id: "+query.queryId+"\nQuery: "+query.NLQuestion+"\n";
  51. qlog.SQGlog += qlog.NRlog;
  52. qlog.timeTable.put("step0", (int)NRtime);
  53. // step 1: question parsing (dependency tree, sentence type)
  54. t = System.currentTimeMillis();
  55. QuestionParsing step1 = new QuestionParsing();
  56. step1.process(qlog);
  57. qlog.timeTable.put("step1", (int)(System.currentTimeMillis()-t));
  58. // step 2: build query graph (structure construction, relation extraction, top-k join)
  59. t = System.currentTimeMillis();
  60. BuildQueryGraph step2 = new BuildQueryGraph();
  61. step2.process(qlog);
  62. // step2.processEXP(qlog);
  63. qlog.timeTable.put("step2", (int)(System.currentTimeMillis()-t));
  64. // step 3: some fix (such as "one-node" or "ask-one-triple") and aggregation
  65. t = System.currentTimeMillis();
  66. AddtionalFix step3 = new AddtionalFix();
  67. step3.process(qlog);
  68. // Collect SPARQLs.
  69. rankedSparqls.addAll(qlog.rankedSparqls);
  70. qlog.timeTable.put("step3", (int)(System.currentTimeMillis()-t));
  71. }
  72. // deduplicate in SPARQL
  73. for(Sparql spq: rankedSparqls)
  74. spq.deduplicate();
  75. // Sort (descending order).
  76. Collections.sort(rankedSparqls);
  77. qlog.rankedSparqls = rankedSparqls;
  78. System.out.println("number of rankedSparqls = " + qlog.rankedSparqls.size());
  79. // Detect question focus.
  80. for (int i=0; i<qlog.rankedSparqls.size(); i++)
  81. {
  82. // First detect by SPARQLs.
  83. Sparql spq = qlog.rankedSparqls.get(i);
  84. String questionFocus = QuestionParsing.detectQuestionFocus(spq);
  85. // If failed, use TARGET directly.
  86. if(questionFocus == null)
  87. questionFocus = "?"+qlog.target.originalForm;
  88. spq.questionFocus = questionFocus;
  89. }
  90. return qlog;
  91. }
  92. catch (Exception e) {
  93. e.printStackTrace();
  94. return qlog;
  95. }
  96. }
  97. public String getStdSparqlWoPrefix(QueryLogger qlog, Sparql curSpq)
  98. {
  99. if(qlog == null || curSpq == null)
  100. return null;
  101. String res = "";
  102. if (qlog.s.sentenceType==SentenceType.GeneralQuestion)
  103. res += "ask where";
  104. else
  105. {
  106. if(!curSpq.countTarget)
  107. res += ("select DISTINCT " + curSpq.questionFocus + " where");
  108. else
  109. res += ("select COUNT(DISTINCT " + curSpq.questionFocus + ") where");
  110. }
  111. res += "\n";
  112. res += curSpq.toStringForGStore();
  113. if(curSpq.moreThanStr != null)
  114. {
  115. res += curSpq.moreThanStr+"\n";
  116. }
  117. if(curSpq.mostStr != null)
  118. {
  119. res += curSpq.mostStr+"\n";
  120. }
  121. return res;
  122. }
  123. // Notice, this will change the original SPARQL.
  124. public Sparql getUntypedSparql (Sparql spq)
  125. {
  126. if(spq == null)
  127. return null;
  128. spq.removeAllTypeInfo();
  129. if (spq.tripleList.size() == 0) return null;
  130. return spq;
  131. }
  132. /**
  133. * Get answers from Virtuoso + DBpedia, this function require OLD version Virtuoso + Virtuoso Handler.
  134. * Virtuoso can solve "Aggregation"
  135. **/
  136. // public Matches getAnswerFromVirtuoso (QueryLogger qlog, Sparql spq)
  137. // {
  138. // Matches ret = new Matches();
  139. // try
  140. // {
  141. // Socket socket = new Socket(Globals.QueryEngineIP, 1112);
  142. // DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream()));
  143. //
  144. // //formatting SPARQL & evaluate
  145. // String formatedSpq = spq.toStringForVirtuoso();
  146. // dos.writeUTF(formatedSpq);
  147. // dos.flush();
  148. // System.out.println("STD SPARQL:\n"+formatedSpq+"\n");
  149. //
  150. // ArrayList<String> rawLines = new ArrayList<String>();
  151. // DataInputStream dis = new DataInputStream(new BufferedInputStream(socket.getInputStream()));
  152. // while (true)
  153. // {
  154. // String line = dis.readUTF();
  155. // if (line.equals("[[finish]]")) break;
  156. // rawLines.add(line);
  157. // }
  158. //
  159. // // ASK query was translated to SELECT query, whose answer need translation.
  160. // // It is no need to translate, use "ASK WHERE" directly ! 2018-12-11
  161. // if(qlog.s.sentenceType == SentenceType.GeneralQuestion)
  162. // {
  163. // ret.answersNum = 1;
  164. // ret.answers = new String[1][1];
  165. // if(rawLines.size() == 0)
  166. // {
  167. // ret.answers[0][0] = "general:false";
  168. // }
  169. // else
  170. // {
  171. // ret.answers[0][0] = "general:true";
  172. // }
  173. // System.out.println("general question answer:" + ret.answers[0][0]);
  174. // dos.close();
  175. // dis.close();
  176. // socket.close();
  177. // return ret;
  178. // }
  179. //
  180. // //select but no results
  181. // if (rawLines.size() == 0)
  182. // {
  183. // ret.answersNum = 0;
  184. // dos.close();
  185. // dis.close();
  186. // socket.close();
  187. // return ret;
  188. // }
  189. //
  190. // int ansNum = rawLines.size();
  191. // int varNum = variables.size();
  192. // ArrayList<String> valist = new ArrayList<String>(variables);
  193. // ret.answers = new String[ansNum][varNum];
  194. //
  195. // System.out.println("ansNum=" + ansNum);
  196. // System.out.println("varNum=" + varNum);
  197. // for (int i=0;i<rawLines.size();i++)
  198. // {
  199. // String[] ansLineContents = rawLines.get(i).split("\t");
  200. // for (int j=0;j<varNum;j++)
  201. // {
  202. // ret.answers[i][j] = valist.get(j) + ":" + ansLineContents[j];
  203. // }
  204. // }
  205. //
  206. // dos.close();
  207. // dis.close();
  208. // socket.close();
  209. // }
  210. // catch (Exception e) {
  211. // e.printStackTrace();
  212. // }
  213. //
  214. // return ret;
  215. // }
  216. public Matches getAnswerFromGStore2 (Sparql spq)
  217. {
  218. // modified by Lin Yinnian using ghttp - 2018-9-28
  219. GstoreConnector gc = new GstoreConnector("172.31.222.90", 9001);
  220. String answer = gc.query("root", "123456", "dbpedia16", spq.toStringForGStore2());
  221. System.out.println(answer);
  222. String[] rawLines = answer.split("\n");
  223. Matches ret = new Matches();
  224. if (rawLines.length == 0 || rawLines[0].equals("[empty result]"))
  225. {
  226. ret.answersNum = 0;
  227. return ret;
  228. }
  229. int ansNum = rawLines.length-1;
  230. String[] varLineContents = rawLines[0].split("\t");
  231. int varNum = varLineContents.length;
  232. ret.answers = new String[ansNum][varNum];
  233. ret.answersNum = ansNum;
  234. System.out.println("ansNum=" + ansNum);
  235. System.out.println("varNum=" + varNum);
  236. System.out.println("rawLines.length=" + rawLines.length);
  237. for (int i=1;i<rawLines.length;i++)
  238. {
  239. // if one answer of rawAnswer contains '\n', it may leads error so we just return.
  240. if(i-1 >= ansNum)
  241. break;
  242. String[] ansLineContents = rawLines[i].split("\t");
  243. for (int j=0;j<varNum;j++)
  244. {
  245. ret.answers[i-1][j] = varLineContents[j] + ":" + ansLineContents[j];
  246. }
  247. }
  248. return ret;
  249. }
  250. public static void main (String[] args)
  251. {
  252. Globals.init();
  253. GAnswer ga = new GAnswer();
  254. int i =1;
  255. //file in/output
  256. List<String> inputList = FileUtil.readFile("E:/Linyinnian/qald6_special.txt");
  257. for(String input: inputList)
  258. {
  259. ArrayList<String> outputs = new ArrayList<String>();
  260. ArrayList<String> spqs = new ArrayList<String>();
  261. spqs.add("id:"+String.valueOf(i));
  262. i++;
  263. long parsing_st_time = System.currentTimeMillis();
  264. QueryLogger qlog = ga.getSparqlList(input);
  265. if(qlog == null || qlog.rankedSparqls == null)
  266. continue;
  267. long parsing_ed_time = System.currentTimeMillis();
  268. System.out.println("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms");
  269. System.out.println("TripleCheck time: "+ qlog.timeTable.get("TripleCheck") + "ms");
  270. System.out.println("SparqlCheck time: "+ qlog.timeTable.get("SparqlCheck") + "ms");
  271. System.out.println("Ranked Sparqls: " + qlog.rankedSparqls.size());
  272. outputs.add(qlog.SQGlog);
  273. outputs.add(qlog.SQGlog + "Building HQG time: "+ (qlog.timeTable.get("step0")+qlog.timeTable.get("step1")+qlog.timeTable.get("step2")-qlog.timeTable.get("BQG_topkjoin")) + "ms");
  274. outputs.add("TopKjoin time: "+ qlog.timeTable.get("BQG_topkjoin") + "ms");
  275. outputs.add("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms");
  276. long excuting_st_time = System.currentTimeMillis();
  277. Matches m = null;
  278. System.out.println("[RESULT]");
  279. ArrayList<String> lastSpqList = new ArrayList<String>();
  280. int idx;
  281. // Consider top-5 SPARQLs
  282. for(idx=1; idx<=Math.min(qlog.rankedSparqls.size(), 5); idx++)
  283. {
  284. Sparql curSpq = qlog.rankedSparqls.get(idx-1);
  285. String stdSPQwoPrefix = ga.getStdSparqlWoPrefix(qlog, curSpq);
  286. lastSpqList.add(stdSPQwoPrefix);
  287. System.out.println("[" + idx + "]" + "score=" + curSpq.score);
  288. System.out.println(stdSPQwoPrefix);
  289. // Print top-3 SPARQLs to file.
  290. if(idx <= MAX_SPQ_NUM)
  291. // spqs.add("[" + idx + "]" + "score=" + curSpq.score + "\n" + stdSPQwoPrefix);
  292. outputs.add("[" + idx + "]" + "score=" + curSpq.score + "\n" + stdSPQwoPrefix);
  293. // // Execute by Virtuoso or GStore when answers not found
  294. if(m == null || m.answers == null)
  295. {
  296. if (curSpq.tripleList.size()>0 && curSpq.questionFocus!=null)
  297. {
  298. // if(ga.isBGP(qlog, curSpq))
  299. m = ga.getAnswerFromGStore2(curSpq);
  300. // else
  301. // m = ga.getAnswerFromVirtuoso(qlog, curSpq);
  302. }
  303. if (m != null && m.answers != null)
  304. {
  305. // Found results using current SPQ, then we can break and print result.
  306. qlog.sparql = curSpq;
  307. qlog.match = m;
  308. qlog.reviseAnswers();
  309. System.out.println("Query Executing time: "+ (int)(System.currentTimeMillis() - excuting_st_time)+ "ms");
  310. }
  311. }
  312. }
  313. // Some TYPEs can be omitted, (such as <type> <yago:Wife>)
  314. if(!qlog.rankedSparqls.isEmpty())
  315. {
  316. Sparql untypedSparql = ga.getUntypedSparql(qlog.rankedSparqls.get(0));
  317. if(untypedSparql != null)
  318. {
  319. String stdSPQwoPrefix = ga.getStdSparqlWoPrefix(qlog, untypedSparql);
  320. if(!lastSpqList.contains(stdSPQwoPrefix))
  321. // spqs.add("[" + Math.min(MAX_SPQ_NUM+1, idx) + "]" + "score=" + 1000 + "\n" + stdSPQwoPrefix + "\n");
  322. outputs.add("[" + Math.min(MAX_SPQ_NUM+1, idx) + "]" + "score=" + 1000 + "\n" + stdSPQwoPrefix + "\n");
  323. }
  324. }
  325. outputs.add(qlog.match.toString());
  326. FileUtil.writeFile(outputs, "E:/Linyinnian/qald6_special_out.txt", true);
  327. }
  328. }
  329. }

GAnswer system is a natural language QA system developed by Institute of Computer Science & Techonology Data Management Lab, Peking University, led by Prof. Zou Lei. GAnswer is able to translate natural language questions to query graphs containing semant