delete useless and experimental code; delete useless datapaths and commentstags/v0.1.0
@@ -5,19 +5,13 @@ import java.util.HashMap; | |||||
import paradict.PredicateIDAndSupport; | import paradict.PredicateIDAndSupport; | ||||
import log.QueryLogger; | import log.QueryLogger; | ||||
//import nlp.ds.DependencyTree; | |||||
//import nlp.ds.DependencyTreeNode; | |||||
import nlp.ds.Word; | import nlp.ds.Word; | ||||
import nlp.ds.Sentence.SentenceType; | import nlp.ds.Sentence.SentenceType; | ||||
import qa.Globals; | import qa.Globals; | ||||
//import qa.extract.TypeRecognition; | |||||
//import qa.mapping.SemanticItemMapping; | |||||
//import rdf.EntityMapping; | |||||
import rdf.SemanticUnit; | import rdf.SemanticUnit; | ||||
import rdf.Sparql; | import rdf.Sparql; | ||||
import rdf.Sparql.QueryType; | import rdf.Sparql.QueryType; | ||||
import rdf.Triple; | import rdf.Triple; | ||||
//import fgmt.TypeFragment; | |||||
public class AddtionalFix | public class AddtionalFix | ||||
@@ -4,7 +4,6 @@ import nlp.ds.DependencyTree; | |||||
import nlp.ds.DependencyTreeNode; | import nlp.ds.DependencyTreeNode; | ||||
import nlp.ds.Word; | import nlp.ds.Word; | ||||
import qa.Globals; | import qa.Globals; | ||||
import rdf.SemanticRelation; | |||||
import rdf.Sparql; | import rdf.Sparql; | ||||
import rdf.Triple; | import rdf.Triple; | ||||
import log.QueryLogger; | import log.QueryLogger; | ||||
@@ -33,7 +32,6 @@ public class AggregationRecognition { | |||||
case 1: | case 1: | ||||
return b; | return b; | ||||
case 2: // Words need to be translated into numbers | case 2: // Words need to be translated into numbers | ||||
boolean flag1=true; | |||||
for(i=0;i<8;i++) // 20~99 | for(i=0;i<8;i++) // 20~99 | ||||
{ | { | ||||
for(j=0;j<10;j++) | for(j=0;j<10;j++) | ||||
@@ -37,7 +37,6 @@ public class GinfoHandler extends AbstractHandler{ | |||||
JSONObject infoobj = new JSONObject(); | JSONObject infoobj = new JSONObject(); | ||||
infoobj.put("version", Globals.Version); | infoobj.put("version", Globals.Version); | ||||
infoobj.put("dictionary", Globals.DictionaryPath); | |||||
infoobj.put("dataset", Globals.Dataset); | infoobj.put("dataset", Globals.Dataset); | ||||
infoobj.put("GDB system", Globals.GDBsystem); | infoobj.put("GDB system", Globals.GDBsystem); | ||||
@@ -254,17 +254,6 @@ public class EntityFragment extends Fragment { | |||||
} | } | ||||
} | } | ||||
} | } | ||||
//TODO: fix data for DBpedia 2014 (should be eliminated when update dataset) | |||||
if(eid==2640237) //Barack_Obama | |||||
{ | |||||
inEdges.add(8432); //spouse | |||||
outEdges.add(8432); | |||||
ArrayList<Integer> outEdgeList = new ArrayList<Integer>(); | |||||
outEdgeList.add(8432); | |||||
inEntMap.put(4953443, outEdgeList); | |||||
outEntMap.put(4953443, outEdgeList); | |||||
} | |||||
} | } | ||||
@Override | @Override | ||||
@@ -2,9 +2,7 @@ package jgsc; | |||||
import java.io.*; | import java.io.*; | ||||
import java.net.*; | import java.net.*; | ||||
import java.lang.*; | |||||
import java.net.URLEncoder; | import java.net.URLEncoder; | ||||
import java.net.URLDecoder; | |||||
import java.io.UnsupportedEncodingException; | import java.io.UnsupportedEncodingException; | ||||
import java.util.List; | import java.util.List; | ||||
import java.util.Map; | import java.util.Map; | ||||
@@ -53,39 +51,25 @@ public class GstoreConnector { | |||||
String urlNameString = url + "/" + param; | String urlNameString = url + "/" + param; | ||||
System.out.println("request: "+urlNameString); | System.out.println("request: "+urlNameString); | ||||
URL realUrl = new URL(urlNameString); | URL realUrl = new URL(urlNameString); | ||||
// 閹垫挸绱戦崪瀛禦L娑斿妫块惃鍕箾閹猴拷 | |||||
URLConnection connection = realUrl.openConnection(); | URLConnection connection = realUrl.openConnection(); | ||||
// 鐠佸墽鐤嗛柅姘辨暏閻ㄥ嫯顕Ч鍌氱潣閹拷 | |||||
connection.setRequestProperty("accept", "*/*"); | connection.setRequestProperty("accept", "*/*"); | ||||
connection.setRequestProperty("connection", "Keep-Alive"); | connection.setRequestProperty("connection", "Keep-Alive"); | ||||
//set agent to avoid: speed limited by server if server think the client not a browser | //set agent to avoid: speed limited by server if server think the client not a browser | ||||
connection.setRequestProperty("user-agent", | connection.setRequestProperty("user-agent", | ||||
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); | ||||
// 瀵よ櫣鐝涚�圭偤妾惃鍕箾閹猴拷 | |||||
connection.connect(); | connection.connect(); | ||||
long t0 = System.currentTimeMillis(); //ms | long t0 = System.currentTimeMillis(); //ms | ||||
// 閼惧嘲褰囬幍锟介張澶婃惙鎼存柨銇旂�涙顔� | |||||
Map<String, List<String>> map = connection.getHeaderFields(); | Map<String, List<String>> map = connection.getHeaderFields(); | ||||
// 闁秴宸婚幍锟介張澶屾畱閸濆秴绨叉径鏉戠摟濞堬拷 | |||||
//for (String key : map.keySet()) { | |||||
// System.out.println(key + "--->" + map.get(key)); | |||||
//} | |||||
long t1 = System.currentTimeMillis(); //ms | long t1 = System.currentTimeMillis(); //ms | ||||
//System.out.println("Time to get header: "+(t1 - t0)+" ms"); | |||||
//System.out.println("============================================"); | |||||
// 鐎规矮绠� BufferedReader鏉堟挸鍙嗗ù浣规降鐠囪褰嘦RL閻ㄥ嫬鎼锋惔锟� | |||||
in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); | in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); | ||||
String line; | String line; | ||||
while ((line = in.readLine()) != null) { | while ((line = in.readLine()) != null) { | ||||
//PERFORMANCE: this can be very costly if result is very large, because many temporary Strings are produced | //PERFORMANCE: this can be very costly if result is very large, because many temporary Strings are produced | ||||
//In this case, just print the line directly will be much faster | //In this case, just print the line directly will be much faster | ||||
result.append(line+"\n"); | result.append(line+"\n"); | ||||
//System.out.println("get data size: " + line.length()); | |||||
//System.out.println(line); | |||||
} | } | ||||
long t2 = System.currentTimeMillis(); //ms | long t2 = System.currentTimeMillis(); //ms | ||||
@@ -94,7 +78,6 @@ public class GstoreConnector { | |||||
System.out.println("error in get request: " + e); | System.out.println("error in get request: " + e); | ||||
e.printStackTrace(); | e.printStackTrace(); | ||||
} | } | ||||
// 娴h法鏁inally閸ф娼甸崗鎶芥4鏉堟挸鍙嗗ù锟� | |||||
finally { | finally { | ||||
try { | try { | ||||
if (in != null) { | if (in != null) { | ||||
@@ -132,29 +115,20 @@ public class GstoreConnector { | |||||
String urlNameString = url + "/" + param; | String urlNameString = url + "/" + param; | ||||
System.out.println("request: "+urlNameString); | System.out.println("request: "+urlNameString); | ||||
URL realUrl = new URL(urlNameString); | URL realUrl = new URL(urlNameString); | ||||
// 閹垫挸绱戦崪瀛禦L娑斿妫块惃鍕箾閹猴拷 | |||||
URLConnection connection = realUrl.openConnection(); | URLConnection connection = realUrl.openConnection(); | ||||
// 鐠佸墽鐤嗛柅姘辨暏閻ㄥ嫯顕Ч鍌氱潣閹拷 | |||||
connection.setRequestProperty("accept", "*/*"); | connection.setRequestProperty("accept", "*/*"); | ||||
connection.setRequestProperty("connection", "Keep-Alive"); | connection.setRequestProperty("connection", "Keep-Alive"); | ||||
//set agent to avoid: speed limited by server if server think the client not a browser | //set agent to avoid: speed limited by server if server think the client not a browser | ||||
connection.setRequestProperty("user-agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); | connection.setRequestProperty("user-agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); | ||||
// 瀵よ櫣鐝涚�圭偤妾惃鍕箾閹猴拷 | |||||
connection.connect(); | connection.connect(); | ||||
long t0 = System.currentTimeMillis(); //ms | long t0 = System.currentTimeMillis(); //ms | ||||
// 閼惧嘲褰囬幍锟介張澶婃惙鎼存柨銇旂�涙顔� | |||||
Map<String, List<String>> map = connection.getHeaderFields(); | Map<String, List<String>> map = connection.getHeaderFields(); | ||||
// 闁秴宸婚幍锟介張澶屾畱閸濆秴绨叉径鏉戠摟濞堬拷 | |||||
//for (String key : map.keySet()) { | |||||
// System.out.println(key + "--->" + map.get(key)); | |||||
//} | |||||
long t1 = System.currentTimeMillis(); // ms | long t1 = System.currentTimeMillis(); // ms | ||||
//System.out.println("Time to get header: "+(t1 - t0)+" ms"); | //System.out.println("Time to get header: "+(t1 - t0)+" ms"); | ||||
// 鐎规矮绠� BufferedReader鏉堟挸鍙嗗ù浣规降鐠囪褰嘦RL閻ㄥ嫬鎼锋惔锟� | |||||
in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); | in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); | ||||
char chars[] = new char[2048]; | char chars[] = new char[2048]; | ||||
int b; | int b; | ||||
@@ -170,7 +144,6 @@ public class GstoreConnector { | |||||
//System.out.println("error in get request: " + e); | //System.out.println("error in get request: " + e); | ||||
e.printStackTrace(); | e.printStackTrace(); | ||||
} | } | ||||
// 娴h法鏁inally閸ф娼甸崗鎶芥4鏉堟挸鍙嗗ù锟� | |||||
finally { | finally { | ||||
try { | try { | ||||
if (in != null) { | if (in != null) { | ||||
@@ -477,13 +450,6 @@ public class GstoreConnector { | |||||
//System.out.println(flag); | //System.out.println(flag); | ||||
String answer = gc.query("root", "123456", "dbpedia16", sparql); | String answer = gc.query("root", "123456", "dbpedia16", sparql); | ||||
System.out.println(answer); | System.out.println(answer); | ||||
//To count the time cost | |||||
//long startTime=System.nanoTime(); //ns | |||||
//long startTime=System.currentTimeMillis(); //ms | |||||
//doSomeThing(); //濞村鐦惃鍕敩閻焦顔� | |||||
//long endTime=System.currentTimeMillis(); //閼惧嘲褰囩紒鎾存将閺冨爼妫� | |||||
//System.out.println("缁嬪绨潻鎰攽閺冨爼妫块敍锟� "+(end-start)+"ms"); | |||||
} | } | ||||
} | } | ||||
@@ -3,7 +3,6 @@ package lcn; | |||||
import java.io.BufferedReader; | import java.io.BufferedReader; | ||||
import java.io.File; | import java.io.File; | ||||
import java.io.FileInputStream; | import java.io.FileInputStream; | ||||
//import java.io.IOException; | |||||
import java.io.InputStreamReader; | import java.io.InputStreamReader; | ||||
import java.util.Date; | import java.util.Date; | ||||
@@ -15,20 +14,7 @@ import org.apache.lucene.index.IndexWriter; | |||||
import qa.Globals; | import qa.Globals; | ||||
//import qa.Globals; | |||||
/** | |||||
* Lucene建立索引的基本单元是document,同时其中的域filed可以根据需要自己添加 | |||||
* | |||||
* Document是一个记录,用来表示一个条目,相当于数据库中的一行记录,就是搜索建立的倒排索引的条目。 | |||||
* eg:你要搜索自己电脑上的文件,这个时候就可以创建field(字段,相关于数据库中的列。 然后用field组合成document,最后会变成若干文件。 | |||||
* 这个document和文件系统document不是一个概念。 | |||||
* | |||||
* StandardAnalyzer是lucene中内置的"标准分析器",可以做如下功能: | |||||
* 1、对原有句子按照空格进行了分词 | |||||
* 2、所有的大写字母都可以能转换为小写的字母 | |||||
* 3、可以去掉一些没有用处的单词,例如"is","the","are"等单词,也删除了所有的标点 | |||||
*/ | |||||
public class BuildIndexForEntityFragments{ | public class BuildIndexForEntityFragments{ | ||||
public void indexforentity() throws Exception | public void indexforentity() throws Exception | ||||
{ | { | ||||
@@ -1,11 +1,8 @@ | |||||
package lcn; | package lcn; | ||||
//import java.io.IOException; | |||||
//import java.util.ArrayList; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Scanner; | import java.util.Scanner; | ||||
import fgmt.EntityFragment; | |||||
import qa.Globals; | import qa.Globals; | ||||
import qa.mapping.EntityFragmentDict; | import qa.mapping.EntityFragmentDict; | ||||
@@ -1,10 +1,5 @@ | |||||
package log; | package log; | ||||
//import java.io.File; | |||||
//import java.io.FileNotFoundException; | |||||
//import java.io.FileOutputStream; | |||||
//import java.io.OutputStreamWriter; | |||||
//import java.io.UnsupportedEncodingException; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Collections; | import java.util.Collections; | ||||
import java.util.HashMap; | import java.util.HashMap; | ||||
@@ -12,7 +7,6 @@ import java.util.HashSet; | |||||
import javax.servlet.http.HttpServletRequest; | import javax.servlet.http.HttpServletRequest; | ||||
//import qa.Globals; | |||||
import qa.Matches; | import qa.Matches; | ||||
import qa.Query; | import qa.Query; | ||||
import rdf.EntityMapping; | import rdf.EntityMapping; | ||||
@@ -47,7 +47,6 @@ public class CoreNLP { | |||||
// these are all the sentences in this document | // these are all the sentences in this document | ||||
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types | // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types | ||||
// ��������� | |||||
List<CoreMap> sentences = document.get(SentencesAnnotation.class); | List<CoreMap> sentences = document.get(SentencesAnnotation.class); | ||||
int count = 0; | int count = 0; | ||||
@@ -1,10 +1,7 @@ | |||||
package qa; | package qa; | ||||
import java.io.*; | |||||
import java.net.Socket; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Collections; | import java.util.Collections; | ||||
import java.util.HashSet; | |||||
import java.util.List; | import java.util.List; | ||||
import jgsc.GstoreConnector; | import jgsc.GstoreConnector; | ||||
@@ -73,7 +70,6 @@ public class GAnswer { | |||||
t = System.currentTimeMillis(); | t = System.currentTimeMillis(); | ||||
BuildQueryGraph step2 = new BuildQueryGraph(); | BuildQueryGraph step2 = new BuildQueryGraph(); | ||||
step2.process(qlog); | step2.process(qlog); | ||||
// step2.processEXP(qlog); | |||||
qlog.timeTable.put("step2", (int)(System.currentTimeMillis()-t)); | qlog.timeTable.put("step2", (int)(System.currentTimeMillis()-t)); | ||||
// step 3: some fix (such as "one-node" or "ask-one-triple") and aggregation | // step 3: some fix (such as "one-node" or "ask-one-triple") and aggregation | ||||
@@ -156,97 +152,10 @@ public class GAnswer { | |||||
return spq; | return spq; | ||||
} | } | ||||
/** | |||||
* Get answers from Virtuoso + DBpedia, this function require OLD version Virtuoso + Virtuoso Handler. | |||||
* Virtuoso can solve "Aggregation" | |||||
**/ | |||||
// public Matches getAnswerFromVirtuoso (QueryLogger qlog, Sparql spq) | |||||
// { | |||||
// Matches ret = new Matches(); | |||||
// try | |||||
// { | |||||
// Socket socket = new Socket(Globals.QueryEngineIP, 1112); | |||||
// DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream())); | |||||
// | |||||
// //formatting SPARQL & evaluate | |||||
// String formatedSpq = spq.toStringForVirtuoso(); | |||||
// dos.writeUTF(formatedSpq); | |||||
// dos.flush(); | |||||
// System.out.println("STD SPARQL:\n"+formatedSpq+"\n"); | |||||
// | |||||
// ArrayList<String> rawLines = new ArrayList<String>(); | |||||
// DataInputStream dis = new DataInputStream(new BufferedInputStream(socket.getInputStream())); | |||||
// while (true) | |||||
// { | |||||
// String line = dis.readUTF(); | |||||
// if (line.equals("[[finish]]")) break; | |||||
// rawLines.add(line); | |||||
// } | |||||
// | |||||
// // ASK query was translated to SELECT query, whose answer need translation. | |||||
// // It is no need to translate, use "ASK WHERE" directly ! 2018-12-11 | |||||
// if(qlog.s.sentenceType == SentenceType.GeneralQuestion) | |||||
// { | |||||
// ret.answersNum = 1; | |||||
// ret.answers = new String[1][1]; | |||||
// if(rawLines.size() == 0) | |||||
// { | |||||
// ret.answers[0][0] = "general:false"; | |||||
// } | |||||
// else | |||||
// { | |||||
// ret.answers[0][0] = "general:true"; | |||||
// } | |||||
// System.out.println("general question answer:" + ret.answers[0][0]); | |||||
// dos.close(); | |||||
// dis.close(); | |||||
// socket.close(); | |||||
// return ret; | |||||
// } | |||||
// | |||||
// //select but no results | |||||
// if (rawLines.size() == 0) | |||||
// { | |||||
// ret.answersNum = 0; | |||||
// dos.close(); | |||||
// dis.close(); | |||||
// socket.close(); | |||||
// return ret; | |||||
// } | |||||
// | |||||
// int ansNum = rawLines.size(); | |||||
// int varNum = variables.size(); | |||||
// ArrayList<String> valist = new ArrayList<String>(variables); | |||||
// ret.answers = new String[ansNum][varNum]; | |||||
// | |||||
// System.out.println("ansNum=" + ansNum); | |||||
// System.out.println("varNum=" + varNum); | |||||
// for (int i=0;i<rawLines.size();i++) | |||||
// { | |||||
// String[] ansLineContents = rawLines.get(i).split("\t"); | |||||
// for (int j=0;j<varNum;j++) | |||||
// { | |||||
// ret.answers[i][j] = valist.get(j) + ":" + ansLineContents[j]; | |||||
// } | |||||
// } | |||||
// | |||||
// dos.close(); | |||||
// dis.close(); | |||||
// socket.close(); | |||||
// } | |||||
// catch (Exception e) { | |||||
// e.printStackTrace(); | |||||
// } | |||||
// | |||||
// return ret; | |||||
// } | |||||
public Matches getAnswerFromGStore2 (Sparql spq) | public Matches getAnswerFromGStore2 (Sparql spq) | ||||
{ | { | ||||
// modified by Lin Yinnian using ghttp - 2018-9-28 | // modified by Lin Yinnian using ghttp - 2018-9-28 | ||||
GstoreConnector gc = new GstoreConnector("172.31.222.90", 9001); | |||||
GstoreConnector gc = new GstoreConnector(Globals.QueryEngineIP, Globals.QueryEnginePort); | |||||
String answer = gc.query("root", "123456", "dbpedia16", spq.toStringForGStore2()); | String answer = gc.query("root", "123456", "dbpedia16", spq.toStringForGStore2()); | ||||
System.out.println(answer); | System.out.println(answer); | ||||
String[] rawLines = answer.split("\n"); | String[] rawLines = answer.split("\n"); | ||||
@@ -338,14 +247,11 @@ public class GAnswer { | |||||
// // Execute by Virtuoso or GStore when answers not found | // // Execute by Virtuoso or GStore when answers not found | ||||
if(m == null || m.answers == null) | if(m == null || m.answers == null) | ||||
{ | { | ||||
if (curSpq.tripleList.size()>0 && curSpq.questionFocus!=null) | |||||
if(curSpq.tripleList.size()>0 && curSpq.questionFocus!=null) | |||||
{ | { | ||||
// if(ga.isBGP(qlog, curSpq)) | |||||
m = ga.getAnswerFromGStore2(curSpq); | |||||
// else | |||||
// m = ga.getAnswerFromVirtuoso(qlog, curSpq); | |||||
m = ga.getAnswerFromGStore2(curSpq); | |||||
} | } | ||||
if (m != null && m.answers != null) | |||||
if(m != null && m.answers != null) | |||||
{ | { | ||||
// Found results using current SPQ, then we can break and print result. | // Found results using current SPQ, then we can break and print result. | ||||
qlog.sparql = curSpq; | qlog.sparql = curSpq; | ||||
@@ -28,7 +28,6 @@ public class Globals { | |||||
public static DBpediaLookup dblk; | public static DBpediaLookup dblk; | ||||
public static int MaxAnswerNum = 100; | public static int MaxAnswerNum = 100; | ||||
public static String Dataset = "dbpedia 2016"; | public static String Dataset = "dbpedia 2016"; | ||||
public static String DictionaryPath = "default"; | |||||
public static String Version = "0.1.2"; | public static String Version = "0.1.2"; | ||||
public static String GDBsystem = "gStore v0.7.2"; | public static String GDBsystem = "gStore v0.7.2"; | ||||
@@ -38,24 +37,14 @@ public class Globals { | |||||
* 2. super SQG, allow CIRCLE and WRONG edge. The structure is decided by DS tree, and can be changed in query evaluation(TOP-K match) stage. | * 2. super SQG, allow CIRCLE and WRONG edge. The structure is decided by DS tree, and can be changed in query evaluation(TOP-K match) stage. | ||||
* */ | * */ | ||||
public static int evaluationMethod = 2; | public static int evaluationMethod = 2; | ||||
public static boolean isRunAsWebServer = false; // Run Local: false; Run Server: true | |||||
public static String runningBenchmark = "QALD"; // WQ:WebQuestions; WQSP:WebQuestionsSP; CQ:ComplexQuestions | |||||
// using different method and Freebase Version (in Virtuoso.java) | |||||
public static boolean usingOperationCondition = false; // only for EXP: try state transition operations only when condition are satisfied. | |||||
public static String localPath = "/media/wip/husen/NBgAnswer/"; | |||||
public static String QueryEngineIP = "127.0.0.1"; // Notice, PORT number is in the evaluation function. | |||||
public static String localPath = "./././"; | |||||
public static String QueryEngineIP = "172.31.222.90"; // Notice, PORT number is in the evaluation function. | |||||
public static int QueryEnginePort = 9001; | |||||
public static void init () | public static void init () | ||||
{ | { | ||||
System.out.println("====== gAnswer2.0 over DBpedia ======"); | System.out.println("====== gAnswer2.0 over DBpedia ======"); | ||||
if(isRunAsWebServer == false) | |||||
{ | |||||
localPath = "D:/husen/gAnswer/"; | |||||
QueryEngineIP = "172.31.222.72"; | |||||
} | |||||
long t1, t2, t3, t4, t5, t6, t7, t8, t9; | long t1, t2, t3, t4, t5, t6, t7, t8, t9; | ||||
@@ -1,21 +1,14 @@ | |||||
package qa.extract; | package qa.extract; | ||||
import java.io.BufferedReader; | import java.io.BufferedReader; | ||||
//import java.io.File; | |||||
//import java.io.FileInputStream; | |||||
//import java.io.FileNotFoundException; | |||||
//import java.io.FileOutputStream; | |||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStreamReader; | import java.io.InputStreamReader; | ||||
//import java.io.OutputStreamWriter; | |||||
//import java.io.UnsupportedEncodingException; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Collections; | import java.util.Collections; | ||||
import java.util.Comparator; | import java.util.Comparator; | ||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.List; | import java.util.List; | ||||
import lcn.EntityFragmentFields; | |||||
import fgmt.EntityFragment; | import fgmt.EntityFragment; | ||||
import nlp.ds.Word; | import nlp.ds.Word; | ||||
import qa.Globals; | import qa.Globals; | ||||
@@ -38,7 +31,6 @@ public class EntityRecognition { | |||||
double TypeAcceptedScore = 0.5; | double TypeAcceptedScore = 0.5; | ||||
double AcceptedDiffScore = 1; | double AcceptedDiffScore = 1; | ||||
public HashMap<String, String> m2e = null; | |||||
public ArrayList<MergedWord> mWordList = null; | public ArrayList<MergedWord> mWordList = null; | ||||
public ArrayList<String> stopEntList = null; | public ArrayList<String> stopEntList = null; | ||||
public ArrayList<String> badTagListForEntAndType = null; | public ArrayList<String> badTagListForEntAndType = null; | ||||
@@ -65,12 +57,6 @@ public class EntityRecognition { | |||||
badTagListForEntAndType.add("VBP"); | badTagListForEntAndType.add("VBP"); | ||||
badTagListForEntAndType.add("POS"); | badTagListForEntAndType.add("POS"); | ||||
// !Handwriting entity linking; (lower case) | |||||
m2e = new HashMap<String, String>(); | |||||
m2e.put("bipolar_syndrome", "Bipolar_disorder"); | |||||
m2e.put("battle_in_1836_in_san_antonio", "Battle_of_San_Jacinto"); | |||||
m2e.put("federal_minister_of_finance_in_germany", "Federal_Ministry_of_Finance_(Germany)"); | |||||
// Additional fix for CATEGORY (in DBpedia) | // Additional fix for CATEGORY (in DBpedia) | ||||
af = new AddtionalFix(); | af = new AddtionalFix(); | ||||
tr = new TypeRecognition(); | tr = new TypeRecognition(); | ||||
@@ -156,7 +142,7 @@ public class EntityRecognition { | |||||
allCnt++; | allCnt++; | ||||
/* | /* | ||||
* Filters to save time and drop some bad cases. | |||||
* Filters to speed up and drop some bad cases. | |||||
*/ | */ | ||||
boolean entOmit = false, typeOmit = false; | boolean entOmit = false, typeOmit = false; | ||||
int prep_cnt=0; | int prep_cnt=0; | ||||
@@ -446,8 +432,8 @@ public class EntityRecognition { | |||||
if(likelyEnt.equals(lowerOriginalWord)) | if(likelyEnt.equals(lowerOriginalWord)) | ||||
score *= len; | score *= len; | ||||
// !Award: COVER (eg, Robert Kennedy: [Robert] [Kennedy] [Robert Kennedy]) | // !Award: COVER (eg, Robert Kennedy: [Robert] [Kennedy] [Robert Kennedy]) | ||||
//像Social_Democratic_Party,这三个word任意组合都是ent,导致方案太多;相比较“冲突选哪个”,“连or不应该连”显得更重要(而且实际错误多为连或不连的错误),所以这里直接抛弃被覆盖的小ent | |||||
//像Abraham_Lincoln,在“不连接”的方案中,会把他们识别成两个node,最后得分超过了正确答案的得分;故对于这种词设置为必选 | |||||
//e.g, Social_Democratic_Party -> all ents -> drop the overlapped smaller ones | |||||
//e.g, Abraham_Lincoln -> select the whole word | |||||
if(len>1) | if(len>1) | ||||
{ | { | ||||
boolean[] flag = new boolean[words.length+1]; | boolean[] flag = new boolean[words.length+1]; | ||||
@@ -473,8 +459,6 @@ public class EntityRecognition { | |||||
// WHOLE match || HIGH match & HIGH upper || WHOLE upper | // WHOLE match || HIGH match & HIGH upper || WHOLE upper | ||||
if(hitCnt == len || ((double)hitCnt/(double)len > 0.6 && (double)UpperWordCnt/(double)len > 0.6) || UpperWordCnt == len || len>=4) | if(hitCnt == len || ((double)hitCnt/(double)len > 0.6 && (double)UpperWordCnt/(double)len > 0.6) || UpperWordCnt == len || len>=4) | ||||
{ | { | ||||
//如中间有逗号,则要求两边的词都在mapping的entity中出现 | |||||
//例如 Melbourne_,_Florida: Melbourne, Florida 是必须选的,而 California_,_USA: Malibu, California,认为不一定正确 | |||||
boolean commaTotalRight = true; | boolean commaTotalRight = true; | ||||
if(originalWord.contains(",")) | if(originalWord.contains(",")) | ||||
{ | { | ||||
@@ -741,19 +725,10 @@ public class EntityRecognition { | |||||
String n = entity; | String n = entity; | ||||
ArrayList<EntityMapping> ret= new ArrayList<EntityMapping>(); | ArrayList<EntityMapping> ret= new ArrayList<EntityMapping>(); | ||||
//1. Handwriting | |||||
if(m2e.containsKey(entity)) | |||||
{ | |||||
String eName = m2e.get(entity); | |||||
EntityMapping em = new EntityMapping(EntityFragmentFields.entityName2Id.get(eName), eName, 1000); | |||||
ret.add(em); | |||||
return ret; //handwriting is always correct | |||||
} | |||||
//2. Lucene index | |||||
//1. Lucene index | |||||
ret.addAll(EntityFragment.getEntityMappingList(n)); | ret.addAll(EntityFragment.getEntityMappingList(n)); | ||||
//3. DBpedia Lookup (some cases) | |||||
//2. DBpedia Lookup (some cases) | |||||
if (useDblk) | if (useDblk) | ||||
{ | { | ||||
ret.addAll(Globals.dblk.getEntityMappings(n, null)); | ret.addAll(Globals.dblk.getEntityMappings(n, null)); | ||||
@@ -880,36 +855,7 @@ public class EntityRecognition { | |||||
er.process(question); | er.process(question); | ||||
} | } | ||||
// File inputFile = new File("D:\\husen\\gAnswer\\data\\test\\test_in.txt"); | |||||
// File outputFile = new File("D:\\husen\\gAnswer\\data\\test\\test_out.txt"); | |||||
// BufferedReader fr = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile),"utf-8")); | |||||
// OutputStreamWriter fw = new OutputStreamWriter(new FileOutputStream(outputFile,true),"utf-8"); | |||||
// | |||||
// String input; | |||||
// while((input=fr.readLine())!=null) | |||||
// { | |||||
// String[] strArray = input.split("\t"); | |||||
// String id = ""; | |||||
// String question = strArray[0]; | |||||
// if(strArray.length>1) | |||||
// { | |||||
// question = strArray[1]; | |||||
// id = strArray[0]; | |||||
// } | |||||
// //Notice "?" may leads lucene/dbpedia lookup error | |||||
// if(question.length()>1 && question.charAt(question.length()-1)=='.' || question.charAt(question.length()-1)=='?') | |||||
// question = question.substring(0,question.length()-1); | |||||
// if(question.isEmpty()) | |||||
// continue; | |||||
// er.process(question); | |||||
// fw.write("Id: "+id+"\nQuery: "+question+"\n"); | |||||
// fw.write(er.preLog+"\n"); | |||||
// } | |||||
// | |||||
// fr.close(); | |||||
// fw.close(); | |||||
} catch (IOException e) { | } catch (IOException e) { | ||||
e.printStackTrace(); | e.printStackTrace(); | ||||
} | } | ||||
@@ -1,7 +1,6 @@ | |||||
package qa.extract; | package qa.extract; | ||||
import java.io.BufferedReader; | import java.io.BufferedReader; | ||||
//import java.io.IOException; | |||||
import java.io.InputStreamReader; | import java.io.InputStreamReader; | ||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Collections; | import java.util.Collections; | ||||
@@ -26,7 +25,6 @@ public class ExtractImplicitRelation { | |||||
static final int SamplingNumber = 100; // the maximum sampling number in calculation | static final int SamplingNumber = 100; // the maximum sampling number in calculation | ||||
static final int k = 3; // select top-k when many suitable relations; select top-k entities for a word | static final int k = 3; // select top-k when many suitable relations; select top-k entities for a word | ||||
public HashMap<String, Integer> implicitEntRel = new HashMap<String, Integer>(); | |||||
/* | /* | ||||
* Implicit Relations: | * Implicit Relations: | ||||
@@ -39,9 +37,6 @@ public class ExtractImplicitRelation { | |||||
* */ | * */ | ||||
public ExtractImplicitRelation() | public ExtractImplicitRelation() | ||||
{ | { | ||||
//orignal word to lower case | |||||
implicitEntRel.put("american", Globals.pd.predicate_2_id.get("country")); | |||||
implicitEntRel.put("united_states", Globals.pd.predicate_2_id.get("country")); | |||||
} | } | ||||
// Notice, it is usually UNNECESSARY for two constant, so we unimplemented this function. | // Notice, it is usually UNNECESSARY for two constant, so we unimplemented this function. | ||||
@@ -96,14 +91,6 @@ public class ExtractImplicitRelation { | |||||
String eName = word.emList.get(i).entityName; | String eName = word.emList.get(i).entityName; | ||||
irList = getPrefferdPidListBetween_Entity_TypeVariable(eId, tId); | irList = getPrefferdPidListBetween_Entity_TypeVariable(eId, tId); | ||||
// !Handwriting implicit relations | |||||
if(irList != null && implicitEntRel.containsKey(word.originalForm.toLowerCase())) | |||||
{ | |||||
int pId = implicitEntRel.get(word.originalForm.toLowerCase()); | |||||
ImplicitRelation ir = new ImplicitRelation(tId, eId, pId, 1000); | |||||
irList.add(0, ir); | |||||
} | |||||
if(irList!=null && irList.size()>0) | if(irList!=null && irList.size()>0) | ||||
{ | { | ||||
ImplicitRelation ir = irList.get(0); | ImplicitRelation ir = irList.get(0); | ||||
@@ -10,7 +10,6 @@ import java.util.Queue; | |||||
import log.QueryLogger; | import log.QueryLogger; | ||||
import nlp.ds.DependencyTree; | import nlp.ds.DependencyTree; | ||||
import nlp.ds.DependencyTreeNode; | import nlp.ds.DependencyTreeNode; | ||||
//import nlp.ds.Word; | |||||
import paradict.ParaphraseDictionary; | import paradict.ParaphraseDictionary; | ||||
import qa.Globals; | import qa.Globals; | ||||
import rdf.SimpleRelation; | import rdf.SimpleRelation; | ||||
@@ -8,7 +8,6 @@ import java.util.HashMap; | |||||
import nlp.ds.Word; | import nlp.ds.Word; | ||||
import nlp.tool.StopWordsList; | import nlp.tool.StopWordsList; | ||||
//import fgmt.RelationFragment; | |||||
import fgmt.TypeFragment; | import fgmt.TypeFragment; | ||||
import lcn.SearchInTypeShortName; | import lcn.SearchInTypeShortName; | ||||
import log.QueryLogger; | import log.QueryLogger; | ||||
@@ -44,7 +43,6 @@ public class TypeRecognition { | |||||
{ | { | ||||
extendTypeMap = new HashMap<String, String>(); | extendTypeMap = new HashMap<String, String>(); | ||||
extendVariableMap = new HashMap<String, Triple>(); | extendVariableMap = new HashMap<String, Triple>(); | ||||
Triple triple = null; | |||||
//!Handwriting for convenience | TODO: approximate/semantic match of type | //!Handwriting for convenience | TODO: approximate/semantic match of type | ||||
extendTypeMap.put("NonprofitOrganizations", "dbo:Non-ProfitOrganisation"); | extendTypeMap.put("NonprofitOrganizations", "dbo:Non-ProfitOrganisation"); | ||||
@@ -55,19 +53,6 @@ public class TypeRecognition { | |||||
extendTypeMap.put("USStates", "yago:StatesOfTheUnitedStates"); | extendTypeMap.put("USStates", "yago:StatesOfTheUnitedStates"); | ||||
extendTypeMap.put("Europe", "yago:EuropeanCountries"); | extendTypeMap.put("Europe", "yago:EuropeanCountries"); | ||||
extendTypeMap.put("Africa", "yago:AfricanCountries"); | extendTypeMap.put("Africa", "yago:AfricanCountries"); | ||||
//!The following IDs are based on DBpedia 2014. | |||||
//!extend variable (embedded triples) | eg, [?E|surfers]-?uri dbo:occupation res:Surfing | canadians��<?canadian> <birthPlace> <Canada> | |||||
//1) <?canadians> <birthPlace> <Canada> | [country people] <birthPlace|1639> [country] | |||||
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 1639, 2112902, "Canada", null, 100); | |||||
extendVariableMap.put("canadian", triple); | |||||
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 1639, 883747, "Germany", null, 100); | |||||
extendVariableMap.put("german", triple); | |||||
//2) ?bandleader <occupation|6690> <Bandleader> | |||||
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 6690, 5436853, "Bandleader", null, 100); | |||||
extendVariableMap.put("bandleader", triple); | |||||
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 6690, 5436854, "Surfing>", null, 100); | |||||
extendVariableMap.put("surfer", triple); | |||||
} | } | ||||
public static void recognizeExtendVariable(Word w) | public static void recognizeExtendVariable(Word w) | ||||
@@ -1,7 +1,6 @@ | |||||
package qa.mapping; | package qa.mapping; | ||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Collections; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.HashSet; | import java.util.HashSet; | ||||
import java.util.Iterator; | import java.util.Iterator; | ||||
@@ -19,8 +19,7 @@ import rdf.EntityMapping; | |||||
public class DBpediaLookup { | public class DBpediaLookup { | ||||
//There are two websites of the DBpediaLookup online service. | //There are two websites of the DBpediaLookup online service. | ||||
//public static final String baseURL = "http://en.wikipedia.org/w/api.php?action=opensearch&format=xml&limit=10&search="; | //public static final String baseURL = "http://en.wikipedia.org/w/api.php?action=opensearch&format=xml&limit=10&search="; | ||||
//public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString="; | |||||
public static final String baseURL = "http://172.31.222.72:1234/api/search/KeywordSearch?MaxHits=5&QueryString="; | |||||
public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString="; | |||||
public HttpClient ctripHttpClient = null; | public HttpClient ctripHttpClient = null; | ||||
@@ -2,8 +2,6 @@ package qa.mapping; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
//import lcn.EntityFragmentFields; | |||||
//import qa.Globals; | |||||
import fgmt.EntityFragment; | import fgmt.EntityFragment; | ||||
public class EntityFragmentDict { | public class EntityFragmentDict { | ||||
@@ -1,11 +1,9 @@ | |||||
package qa.parsing; | package qa.parsing; | ||||
//import java.io.IOException; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.HashSet; | import java.util.HashSet; | ||||
import java.util.LinkedList; | import java.util.LinkedList; | ||||
import java.util.PriorityQueue; | |||||
import java.util.Queue; | import java.util.Queue; | ||||
import fgmt.EntityFragment; | import fgmt.EntityFragment; | ||||
@@ -17,16 +15,15 @@ import qa.Globals; | |||||
import qa.extract.*; | import qa.extract.*; | ||||
import qa.mapping.SemanticItemMapping; | import qa.mapping.SemanticItemMapping; | ||||
import rdf.PredicateMapping; | import rdf.PredicateMapping; | ||||
import rdf.SemanticQueryGraph; | |||||
import rdf.Triple; | import rdf.Triple; | ||||
import rdf.SemanticRelation; | import rdf.SemanticRelation; | ||||
import rdf.SimpleRelation; | import rdf.SimpleRelation; | ||||
import rdf.SemanticUnit; | import rdf.SemanticUnit; | ||||
//import paradict.ParaphraseDictionary; | |||||
/* | |||||
* The core class to build query graph, i.e, to generate SPARQL queries. | |||||
* */ | |||||
/** | |||||
* Core class to build query graph, i.e, to generate SPARQL queries. | |||||
* @author husen | |||||
*/ | |||||
public class BuildQueryGraph | public class BuildQueryGraph | ||||
{ | { | ||||
public ArrayList<SemanticUnit> semanticUnitList = new ArrayList<SemanticUnit>(); | public ArrayList<SemanticUnit> semanticUnitList = new ArrayList<SemanticUnit>(); | ||||
@@ -48,7 +45,8 @@ public class BuildQueryGraph | |||||
whList.add("how"); | whList.add("how"); | ||||
whList.add("where"); | whList.add("where"); | ||||
// Bad words for NODE. (base form) | |||||
// Bad words for NODE. (base form) | |||||
// We will train a node recognition model to replace such heuristic rules further. | |||||
stopNodeList.add("list"); | stopNodeList.add("list"); | ||||
stopNodeList.add("give"); | stopNodeList.add("give"); | ||||
stopNodeList.add("show"); | stopNodeList.add("show"); | ||||
@@ -92,7 +90,7 @@ public class BuildQueryGraph | |||||
{ | { | ||||
stopNodeList.add("area"); | stopNodeList.add("area"); | ||||
} | } | ||||
//how much is the total population of european union? | |||||
//how much is the total population of european union? | |||||
if(qStr.contains("how much")) | if(qStr.contains("how much")) | ||||
{ | { | ||||
stopNodeList.add("population"); | stopNodeList.add("population"); | ||||
@@ -202,7 +200,7 @@ public class BuildQueryGraph | |||||
continue; | continue; | ||||
} | } | ||||
//Notice, the following codes guarantee all possible edges (allow CIRCLE). | |||||
//Notice, the following code guarantee all possible edges (allow CIRCLE). | |||||
//Otherwise, NO CIRCLE, and the structure may be different by changing target. | //Otherwise, NO CIRCLE, and the structure may be different by changing target. | ||||
if(Globals.evaluationMethod > 1) | if(Globals.evaluationMethod > 1) | ||||
{ | { | ||||
@@ -279,209 +277,6 @@ public class BuildQueryGraph | |||||
return semanticUnitList; | return semanticUnitList; | ||||
} | } | ||||
/* | |||||
* For experiment. | |||||
*/ | |||||
public ArrayList<SemanticUnit> getNodeList(QueryLogger qlog, DependencyTree ds) | |||||
{ | |||||
semanticUnitList = new ArrayList<SemanticUnit>(); | |||||
// For ComplexQuestions or WebQuestions, only consider wh-word and at most two entities. | |||||
if(Globals.runningBenchmark.equals("CQ") || Globals.runningBenchmark.equals("WQ")) | |||||
{ | |||||
// DependencyTreeNode target = ds.nodesList.get(0); | |||||
// if(Globals.runningBenchmark.equals("CQ")) | |||||
// target = detectTargetForCQ(ds, qlog); | |||||
// qlog.target = target.word; | |||||
// qlog.SQGlog += "++++ Target detect: "+target+"\n"; | |||||
// | |||||
// detectTopicConstraint(qlog); | |||||
// semanticUnitList.add(new SemanticUnit(qlog.target, false)); //Set variable to object | |||||
// if(topicEnt != null) | |||||
// { | |||||
// semanticUnitList.add(new SemanticUnit(topicEnt, true)); //Set entity to subject | |||||
// } | |||||
// if(constraintEnt != null) | |||||
// { | |||||
// semanticUnitList.add(new SemanticUnit(constraintEnt, true)); //Set entity to subject | |||||
// } | |||||
} | |||||
// For general cases (e.g, QALD), consider internal variables. | |||||
else | |||||
{ | |||||
for(DependencyTreeNode dtn: ds.nodesList) | |||||
{ | |||||
if(isNodeWoCorefRe(dtn)) // ! Omit the coreference resolution rules ! | |||||
{ | |||||
semanticUnitList.add(new SemanticUnit(dtn.word, true)); //No prefer subject (default is true) | |||||
} | |||||
} | |||||
} | |||||
return semanticUnitList; | |||||
} | |||||
/* | |||||
* (For Experiment) Build query graph using STATE TRANSITION method based on 4 operations (with 4 conditions). | |||||
* 1. Condition for Connect operation: do and must do | no other nodes on simple path in DS tree. | |||||
* 2. Condition for Merge operation: do and must do | heuristic rules of CoReference Resolution. | |||||
* 3. Condition for Fold operation: do or not do | no matches of low confidence of an edge. | |||||
* 4. Condition for Expand operation: do and must do | has corresponding information. | |||||
* */ | |||||
public ArrayList<SemanticUnit> processEXP(QueryLogger qlog) | |||||
{ | |||||
//0) Fix stop words | |||||
DependencyTree ds = qlog.s.dependencyTreeStanford; | |||||
if(qlog.isMaltParserUsed) | |||||
ds = qlog.s.dependencyTreeMalt; | |||||
fixStopWord(qlog, ds); | |||||
//1) Detect Modifier/Modified | |||||
//rely on sentence (rather than dependency tree) | |||||
//with some ADJUSTMENT (eg, ent+noun(noType&&noEnt) -> noun.omitNode=TRUE) | |||||
for(Word word: qlog.s.words) | |||||
getTheModifiedWordBySentence(qlog.s, word); //Find continuous modifier | |||||
for(Word word: qlog.s.words) | |||||
getDiscreteModifiedWordBySentence(qlog.s, word); //Find discrete modifier | |||||
for(Word word: qlog.s.words) | |||||
if(word.modifiedWord == null) //Other words modify themselves. NOTICE: only can be called after detecting all modifier. | |||||
word.modifiedWord = word; | |||||
//print log | |||||
for(Word word: qlog.s.words) | |||||
{ | |||||
if(word.modifiedWord != null && word.modifiedWord != word) | |||||
{ | |||||
modifierList.add(word); | |||||
qlog.SQGlog += "++++ Modify detect: "+word+" --> " + word.modifiedWord + "\n"; | |||||
} | |||||
} | |||||
//2) Detect target & 3) Coreference resolution | |||||
DependencyTreeNode target = detectTarget(ds,qlog); | |||||
qlog.SQGlog += "++++ Target detect: "+target+"\n"; | |||||
if(target == null) | |||||
return null; | |||||
qlog.target = target.word; | |||||
// !target can NOT be entity. (except general question)| which [city] has most people? | |||||
if(qlog.s.sentenceType != SentenceType.GeneralQuestion && target.word.emList!=null) | |||||
{ | |||||
//Counter example:Give me all Seven_Wonders_of_the_Ancient_World | (in fact, it not ENT, but CATEGORY, ?x subject Seve...) | |||||
target.word.mayEnt = false; | |||||
target.word.emList.clear(); | |||||
} | |||||
try | |||||
{ | |||||
// step1: get node list | |||||
semanticUnitList = getNodeList(qlog, ds); | |||||
if(semanticUnitList == null || semanticUnitList.isEmpty()) | |||||
{ | |||||
qlog.SQGlog += "ERROR: no nodes found."; | |||||
return null; | |||||
} | |||||
// step2: extract all potential relations | |||||
long t = System.currentTimeMillis(); | |||||
System.out.println("Potential Relation Extraction start ..."); | |||||
extractPotentialSemanticRelations(semanticUnitList, qlog); | |||||
qlog.timeTable.put("BQG_relation", (int)(System.currentTimeMillis()-t)); | |||||
// setp3: build query graph structure by 4 operations | |||||
t = System.currentTimeMillis(); | |||||
SemanticQueryGraph bestSQG = null; | |||||
if(Globals.usingOperationCondition) | |||||
{ | |||||
//TODO: use operation condition | |||||
} | |||||
else | |||||
{ | |||||
// for experiment, do not use conditions. | |||||
PriorityQueue<SemanticQueryGraph> QGs = new PriorityQueue<SemanticQueryGraph>(); | |||||
HashSet<SemanticQueryGraph> visited = new HashSet<>(); | |||||
//Initial state: all nodes isolated. | |||||
SemanticQueryGraph head = new SemanticQueryGraph(semanticUnitList); | |||||
QGs.add(head); | |||||
while(!QGs.isEmpty()) | |||||
{ | |||||
head = QGs.poll(); | |||||
visited.add(head); | |||||
//Judge: is it a final state? | |||||
if(head.isFinalState()) | |||||
{ | |||||
bestSQG = head; | |||||
break; // now we just find the top-1 SQG | |||||
} | |||||
//SQG generation | |||||
//Connect (enumerate) | |||||
for(SemanticUnit u: head.semanticUnitList) | |||||
for(SemanticUnit v: head.semanticUnitList) | |||||
if(!u.equals(v) && !u.neighborUnitList.contains(v) && !v.neighborUnitList.contains(u)) | |||||
{ | |||||
SemanticQueryGraph tail = new SemanticQueryGraph(head); | |||||
tail.connect(u, v); | |||||
if(!QGs.contains(tail) && !visited.contains(tail)) | |||||
{ | |||||
tail.calculateScore(qlog.potentialSemanticRelations); | |||||
QGs.add(tail); | |||||
} | |||||
} | |||||
//Merge (coref resolution) | |||||
if(head.semanticUnitList.size() > 2) | |||||
for(SemanticUnit u: head.semanticUnitList) | |||||
for(SemanticUnit v: head.semanticUnitList) | |||||
if(!u.equals(v) && (!u.neighborUnitList.contains(v) && !v.neighborUnitList.contains(u)) || (u.neighborUnitList.contains(v) && v.neighborUnitList.contains(u))) | |||||
{ | |||||
SemanticQueryGraph tail = new SemanticQueryGraph(head); | |||||
tail.merge(u, v); | |||||
if(!QGs.contains(tail) && !visited.contains(tail)) | |||||
{ | |||||
tail.calculateScore(qlog.potentialSemanticRelations); | |||||
QGs.add(tail); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
qlog.timeTable.put("BQG_structure", (int)(System.currentTimeMillis()-t)); | |||||
//Relation Extraction by potentialSR | |||||
qlog.semanticUnitList = new ArrayList<SemanticUnit>(); | |||||
qlog.semanticRelations = bestSQG.semanticRelations; | |||||
semanticUnitList = bestSQG.semanticUnitList; | |||||
matchRelation(semanticUnitList, qlog); | |||||
//Prepare for item mapping | |||||
TypeRecognition.AddTypesOfWhwords(qlog.semanticRelations); // Type supplementary | |||||
TypeRecognition.constantVariableRecognition(qlog.semanticRelations, qlog); // Constant or Variable, embedded triples | |||||
//(just for display) | |||||
recordOriginalTriples(semanticUnitList, qlog); | |||||
//step3: item mapping & top-k join | |||||
t = System.currentTimeMillis(); | |||||
SemanticItemMapping step5 = new SemanticItemMapping(); | |||||
step5.process(qlog, qlog.semanticRelations); //top-k join (generate SPARQL queries), disambiguation | |||||
qlog.timeTable.put("BQG_topkjoin", (int)(System.currentTimeMillis()-t)); | |||||
//step6: implicit relation [modify word] | |||||
t = System.currentTimeMillis(); | |||||
ExtractImplicitRelation step6 = new ExtractImplicitRelation(); | |||||
step6.supplementTriplesByModifyWord(qlog); | |||||
qlog.timeTable.put("BQG_implicit", (int)(System.currentTimeMillis()-t)); | |||||
} catch (Exception e) { | |||||
// TODO Auto-generated catch block | |||||
e.printStackTrace(); | |||||
} | |||||
return semanticUnitList; | |||||
} | |||||
public void extractPotentialSemanticRelations(ArrayList<SemanticUnit> semanticUnitList, QueryLogger qlog) | public void extractPotentialSemanticRelations(ArrayList<SemanticUnit> semanticUnitList, QueryLogger qlog) | ||||
{ | { | ||||
ExtractRelation er = new ExtractRelation(); | ExtractRelation er = new ExtractRelation(); | ||||
@@ -771,35 +566,6 @@ public class BuildQueryGraph | |||||
return false; | return false; | ||||
} | } | ||||
/* | |||||
* Judge nodes strictly. | |||||
* For EXP, do not use COREF resolution rules. | |||||
* */ | |||||
public boolean isNodeWoCorefRe(DependencyTreeNode cur) | |||||
{ | |||||
if(stopNodeList.contains(cur.word.baseForm)) | |||||
return false; | |||||
if(cur.word.omitNode) | |||||
return false; | |||||
// Modifier can NOT be node (They may be added in query graph in the end) e.g., Queen Elizabeth II,Queen(modifier) | |||||
if(modifierList.contains(cur.word)) | |||||
return false; | |||||
// NOUN | |||||
if(cur.word.posTag.startsWith("N")) | |||||
return true; | |||||
// Wh-word | |||||
if(whList.contains(cur.word.baseForm)) | |||||
return true; | |||||
if(cur.word.mayEnt || cur.word.mayType || cur.word.mayCategory) | |||||
return true; | |||||
return false; | |||||
} | |||||
public DependencyTreeNode detectTarget(DependencyTree ds, QueryLogger qlog) | public DependencyTreeNode detectTarget(DependencyTree ds, QueryLogger qlog) | ||||
{ | { | ||||
visited.clear(); | visited.clear(); | ||||
@@ -1,7 +1,5 @@ | |||||
package qa.parsing; | package qa.parsing; | ||||
import org.maltparser.core.exception.MaltChainedException; | |||||
import log.QueryLogger; | import log.QueryLogger; | ||||
import nlp.ds.DependencyTree; | import nlp.ds.DependencyTree; | ||||
import nlp.ds.DependencyTreeNode; | import nlp.ds.DependencyTreeNode; | ||||
@@ -4,7 +4,6 @@ import java.util.ArrayList; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.HashSet; | import java.util.HashSet; | ||||
import qa.Globals; | |||||
import nlp.ds.Word; | import nlp.ds.Word; | ||||
public class SemanticQueryGraph implements Comparable<SemanticQueryGraph> | public class SemanticQueryGraph implements Comparable<SemanticQueryGraph> | ||||
@@ -65,7 +65,7 @@ public class SimpleRelation { | |||||
} | } | ||||
sumSelectivity = matchingScore*sumSelectivity*pidsup.support; | sumSelectivity = matchingScore*sumSelectivity*pidsup.support; | ||||
int pid = pidsup.predicateID; | int pid = pidsup.predicateID; | ||||
if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; //����dbo�е�predicate //pid ���ܲ��� dbo �У� | |||||
if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; | |||||
if (!pasList.containsKey(pid)) | if (!pasList.containsKey(pid)) | ||||
pasList.put(pid, sumSelectivity); | pasList.put(pid, sumSelectivity); | ||||
@@ -2,13 +2,9 @@ package rdf; | |||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Collections; | import java.util.Collections; | ||||
//import java.util.Comparator; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.HashSet; | import java.util.HashSet; | ||||
import log.QueryLogger; | |||||
import nlp.ds.Sentence; | |||||
import nlp.ds.Sentence.SentenceType; | |||||
import qa.Globals; | import qa.Globals; | ||||
public class Sparql implements Comparable<Sparql> | public class Sparql implements Comparable<Sparql> | ||||
@@ -137,21 +133,21 @@ public class Sparql implements Comparable<Sparql> | |||||
} | } | ||||
// part2: triples | // part2: triples | ||||
ret += " where\n{\n"; | |||||
ret += " where { "; | |||||
for(Triple t : tripleList) | for(Triple t : tripleList) | ||||
{ | { | ||||
if (!t.object.equals("literal_HRZ")) { // need not display literal | if (!t.object.equals("literal_HRZ")) { // need not display literal | ||||
ret += t.toStringForGStore(); | ret += t.toStringForGStore(); | ||||
ret += " .\n"; | |||||
ret += ". "; | |||||
} | } | ||||
} | } | ||||
ret += "}\n"; | |||||
ret += "} "; | |||||
// part3: order by / group by ... | // part3: order by / group by ... | ||||
if(moreThanStr != null) | if(moreThanStr != null) | ||||
ret += moreThanStr+"\n"; | |||||
ret += moreThanStr+" "; | |||||
if(mostStr != null) | if(mostStr != null) | ||||
ret += mostStr+"\n"; | |||||
ret += mostStr+" "; | |||||
// part4: limit | // part4: limit | ||||
if(queryType != QueryType.Ask && (mostStr == null || !mostStr.contains("LIMIT"))) | if(queryType != QueryType.Ask && (mostStr == null || !mostStr.contains("LIMIT"))) | ||||
@@ -159,64 +155,6 @@ public class Sparql implements Comparable<Sparql> | |||||
return ret; | return ret; | ||||
} | } | ||||
//Use to execute (select all variables; format 'aggregation' and 'ask') | |||||
public String toStringForVirtuoso() | |||||
{ | |||||
String ret = ""; | |||||
HashSet<String> variables = new HashSet<String>(); | |||||
// prefix | |||||
if (queryType==QueryType.Ask) | |||||
ret += "ask where"; | |||||
else if(countTarget) | |||||
ret += ("select COUNT(DISTINCT " + questionFocus + ") where"); | |||||
else | |||||
{ | |||||
// AGG: select question focus | |||||
if(moreThanStr != null || mostStr != null) | |||||
ret += ("select DISTINCT " + questionFocus + " where"); | |||||
// BGP: select all variables | |||||
else | |||||
{ | |||||
for (Triple t: tripleList) | |||||
{ | |||||
if (!t.isSubjConstant()) variables.add(t.subject.replaceAll(" ", "_")); | |||||
if (!t.isObjConstant()) variables.add(t.object.replaceAll(" ", "_")); | |||||
} | |||||
ret += "select "; | |||||
for (String v : variables) | |||||
ret += v + " "; | |||||
ret += "where"; | |||||
} | |||||
} | |||||
ret += "\n{\n"; | |||||
if(variables.size() == 0) | |||||
variables.add(questionFocus); | |||||
// triples | |||||
for (Triple t : tripleList) | |||||
{ | |||||
if (!t.object.equals("literal_HRZ")) { | |||||
ret += t.toStringForGStore(); | |||||
ret += " .\n"; | |||||
} | |||||
} | |||||
ret += "}\n"; | |||||
// suffix | |||||
if(moreThanStr != null) | |||||
{ | |||||
ret += moreThanStr+"\n"; | |||||
} | |||||
if(mostStr != null) | |||||
{ | |||||
ret += mostStr+"\n"; | |||||
} | |||||
return ret; | |||||
} | |||||
public int getVariableNumber() | public int getVariableNumber() | ||||
{ | { | ||||
@@ -258,7 +196,6 @@ public class Sparql implements Comparable<Sparql> | |||||
public boolean equals(Object spq) | public boolean equals(Object spq) | ||||
{ | { | ||||
Sparql tempSparql= (Sparql) spq; | Sparql tempSparql= (Sparql) spq; | ||||
String s1 = this.toStringForGStore2(), s2 = tempSparql.toStringForGStore2(); | |||||
if(this.toStringForGStore2().equals(tempSparql.toStringForGStore2())) | if(this.toStringForGStore2().equals(tempSparql.toStringForGStore2())) | ||||
return true; | return true; | ||||
else | else | ||||
@@ -1,114 +0,0 @@ | |||||
package utils; | |||||
import java.io.BufferedReader; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.io.InputStreamReader; | |||||
import java.io.PrintWriter; | |||||
import java.net.URL; | |||||
import java.net.URLConnection; | |||||
import java.util.List; | |||||
import java.util.Map; | |||||
public class HttpRequest | |||||
{ | |||||
public static String sendGet(String url, String param) { | |||||
String result = ""; | |||||
BufferedReader in = null; | |||||
try { | |||||
String urlNameString = url + "?" + param; | |||||
URL realUrl = new URL(urlNameString); | |||||
URLConnection connection = realUrl.openConnection(); | |||||
connection.setRequestProperty("accept", "*/*"); | |||||
connection.setRequestProperty("connection", "Keep-Alive"); | |||||
connection.setRequestProperty("user-agent", | |||||
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); | |||||
connection.connect(); | |||||
Map<String, List<String>> map = connection.getHeaderFields(); | |||||
for (String key : map.keySet()) { | |||||
System.out.println(key + "--->" + map.get(key)); | |||||
} | |||||
in = new BufferedReader(new InputStreamReader( | |||||
connection.getInputStream())); | |||||
String line; | |||||
while ((line = in.readLine()) != null) { | |||||
result += line; | |||||
} | |||||
} catch (Exception e) { | |||||
System.out.println("Error when sending GET request: " + e); | |||||
e.printStackTrace(); | |||||
} | |||||
finally { | |||||
try { | |||||
if (in != null) { | |||||
in.close(); | |||||
} | |||||
} catch (Exception e2) { | |||||
e2.printStackTrace(); | |||||
} | |||||
} | |||||
return result; | |||||
} | |||||
public static String sendPost(String url, String param) { | |||||
PrintWriter out = null; | |||||
BufferedReader in = null; | |||||
String result = ""; | |||||
try { | |||||
URL realUrl = new URL(url); | |||||
URLConnection conn = realUrl.openConnection(); | |||||
conn.setRequestProperty("accept", "*/*"); | |||||
conn.setRequestProperty("connection", "Keep-Alive"); | |||||
conn.setRequestProperty("user-agent", | |||||
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); | |||||
conn.setDoOutput(true); | |||||
conn.setDoInput(true); | |||||
out = new PrintWriter(conn.getOutputStream()); | |||||
out.print(param); | |||||
out.flush(); | |||||
in = new BufferedReader( | |||||
new InputStreamReader(conn.getInputStream())); | |||||
String line; | |||||
while ((line = in.readLine()) != null) { | |||||
result += line; | |||||
} | |||||
} catch (Exception e) { | |||||
System.out.println("Error when sending POST request: "+e); | |||||
e.printStackTrace(); | |||||
} | |||||
finally{ | |||||
try{ | |||||
if(out!=null){ | |||||
out.close(); | |||||
} | |||||
if(in!=null){ | |||||
in.close(); | |||||
} | |||||
} | |||||
catch(IOException ex){ | |||||
ex.printStackTrace(); | |||||
} | |||||
} | |||||
return result; | |||||
} | |||||
public static String getPostData(InputStream in, int size, String charset) { | |||||
if (in != null && size > 0) { | |||||
byte[] buf = new byte[size]; | |||||
try { | |||||
in.read(buf); | |||||
if (charset == null || charset.length() == 0) | |||||
return new String(buf); | |||||
else { | |||||
return new String(buf, charset); | |||||
} | |||||
} catch (IOException e) { | |||||
e.printStackTrace(); | |||||
} | |||||
} | |||||
return null; | |||||
} | |||||
} |