From d145aa5e66bcd6ea0e14506840596e5b747310f5 Mon Sep 17 00:00:00 2001 From: knightmarehs Date: Thu, 27 Dec 2018 23:31:26 +0800 Subject: [PATCH] delete some code/paths/comments delete useless and experimental code; delete useless datapaths and comments --- src/addition/AddtionalFix.java | 6 - src/addition/AggregationRecognition.java | 2 - src/application/GinfoHandler.java | 1 - src/fgmt/EntityFragment.java | 11 -- src/jgsc/GstoreConnector.java | 34 ---- src/lcn/BuildIndexForEntityFragments.java | 14 -- src/lcn/Main.java | 3 - src/log/QueryLogger.java | 6 - src/nlp/tool/CoreNLP.java | 1 - src/qa/GAnswer.java | 102 +----------- src/qa/Globals.java | 17 +- src/qa/extract/EntityRecognition.java | 66 +------- src/qa/extract/ExtractImplicitRelation.java | 13 -- src/qa/extract/ExtractRelation.java | 1 - src/qa/extract/TypeRecognition.java | 15 -- src/qa/mapping/CompatibilityChecker.java | 1 - src/qa/mapping/DBpediaLookup.java | 3 +- src/qa/mapping/EntityFragmentDict.java | 2 - src/qa/parsing/BuildQueryGraph.java | 250 +--------------------------- src/qa/parsing/QuestionParsing.java | 2 - src/rdf/SemanticQueryGraph.java | 1 - src/rdf/SimpleRelation.java | 2 +- src/rdf/Sparql.java | 73 +------- src/utils/HttpRequest.java | 114 ------------- 24 files changed, 28 insertions(+), 712 deletions(-) delete mode 100644 src/utils/HttpRequest.java diff --git a/src/addition/AddtionalFix.java b/src/addition/AddtionalFix.java index e7235cb..7c82982 100644 --- a/src/addition/AddtionalFix.java +++ b/src/addition/AddtionalFix.java @@ -5,19 +5,13 @@ import java.util.HashMap; import paradict.PredicateIDAndSupport; import log.QueryLogger; -//import nlp.ds.DependencyTree; -//import nlp.ds.DependencyTreeNode; import nlp.ds.Word; import nlp.ds.Sentence.SentenceType; import qa.Globals; -//import qa.extract.TypeRecognition; -//import qa.mapping.SemanticItemMapping; -//import rdf.EntityMapping; import rdf.SemanticUnit; import rdf.Sparql; import rdf.Sparql.QueryType; import rdf.Triple; -//import fgmt.TypeFragment; public class AddtionalFix diff --git a/src/addition/AggregationRecognition.java b/src/addition/AggregationRecognition.java index bb3d92e..07ab422 100644 --- a/src/addition/AggregationRecognition.java +++ b/src/addition/AggregationRecognition.java @@ -4,7 +4,6 @@ import nlp.ds.DependencyTree; import nlp.ds.DependencyTreeNode; import nlp.ds.Word; import qa.Globals; -import rdf.SemanticRelation; import rdf.Sparql; import rdf.Triple; import log.QueryLogger; @@ -33,7 +32,6 @@ public class AggregationRecognition { case 1: return b; case 2: // Words need to be translated into numbers - boolean flag1=true; for(i=0;i<8;i++) // 20~99 { for(j=0;j<10;j++) diff --git a/src/application/GinfoHandler.java b/src/application/GinfoHandler.java index c61d00d..ac4e36b 100644 --- a/src/application/GinfoHandler.java +++ b/src/application/GinfoHandler.java @@ -37,7 +37,6 @@ public class GinfoHandler extends AbstractHandler{ JSONObject infoobj = new JSONObject(); infoobj.put("version", Globals.Version); - infoobj.put("dictionary", Globals.DictionaryPath); infoobj.put("dataset", Globals.Dataset); infoobj.put("GDB system", Globals.GDBsystem); diff --git a/src/fgmt/EntityFragment.java b/src/fgmt/EntityFragment.java index 3175c1e..41997a4 100644 --- a/src/fgmt/EntityFragment.java +++ b/src/fgmt/EntityFragment.java @@ -254,17 +254,6 @@ public class EntityFragment extends Fragment { } } } - - //TODO: fix data for DBpedia 2014 (should be eliminated when update dataset) - if(eid==2640237) //Barack_Obama - { - inEdges.add(8432); //spouse - outEdges.add(8432); - ArrayList outEdgeList = new ArrayList(); - outEdgeList.add(8432); - inEntMap.put(4953443, outEdgeList); - outEntMap.put(4953443, outEdgeList); - } } @Override diff --git a/src/jgsc/GstoreConnector.java b/src/jgsc/GstoreConnector.java index 960fd07..5b50c97 100644 --- a/src/jgsc/GstoreConnector.java +++ b/src/jgsc/GstoreConnector.java @@ -2,9 +2,7 @@ package jgsc; import java.io.*; import java.net.*; -import java.lang.*; import java.net.URLEncoder; -import java.net.URLDecoder; import java.io.UnsupportedEncodingException; import java.util.List; import java.util.Map; @@ -53,39 +51,25 @@ public class GstoreConnector { String urlNameString = url + "/" + param; System.out.println("request: "+urlNameString); URL realUrl = new URL(urlNameString); - // 閹垫挸绱戦崪瀛禦L娑斿妫块惃鍕箾閹猴拷 URLConnection connection = realUrl.openConnection(); - // 鐠佸墽鐤嗛柅姘辨暏閻ㄥ嫯顕Ч鍌氱潣閹拷 connection.setRequestProperty("accept", "*/*"); connection.setRequestProperty("connection", "Keep-Alive"); //set agent to avoid: speed limited by server if server think the client not a browser connection.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); - // 瀵よ櫣鐝涚�圭偤妾惃鍕箾閹猴拷 connection.connect(); long t0 = System.currentTimeMillis(); //ms - // 閼惧嘲褰囬幍锟介張澶婃惙鎼存柨銇旂�涙顔� Map> map = connection.getHeaderFields(); - // 闁秴宸婚幍锟介張澶屾畱閸濆秴绨叉径鏉戠摟濞堬拷 - //for (String key : map.keySet()) { - // System.out.println(key + "--->" + map.get(key)); - //} long t1 = System.currentTimeMillis(); //ms - //System.out.println("Time to get header: "+(t1 - t0)+" ms"); - //System.out.println("============================================"); - - // 鐎规矮绠� BufferedReader鏉堟挸鍙嗗ù浣规降鐠囪褰嘦RL閻ㄥ嫬鎼锋惔锟� in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); String line; while ((line = in.readLine()) != null) { //PERFORMANCE: this can be very costly if result is very large, because many temporary Strings are produced //In this case, just print the line directly will be much faster result.append(line+"\n"); - //System.out.println("get data size: " + line.length()); - //System.out.println(line); } long t2 = System.currentTimeMillis(); //ms @@ -94,7 +78,6 @@ public class GstoreConnector { System.out.println("error in get request: " + e); e.printStackTrace(); } - // 娴h法鏁inally閸ф娼甸崗鎶芥4鏉堟挸鍙嗗ù锟� finally { try { if (in != null) { @@ -132,29 +115,20 @@ public class GstoreConnector { String urlNameString = url + "/" + param; System.out.println("request: "+urlNameString); URL realUrl = new URL(urlNameString); - // 閹垫挸绱戦崪瀛禦L娑斿妫块惃鍕箾閹猴拷 URLConnection connection = realUrl.openConnection(); - // 鐠佸墽鐤嗛柅姘辨暏閻ㄥ嫯顕Ч鍌氱潣閹拷 connection.setRequestProperty("accept", "*/*"); connection.setRequestProperty("connection", "Keep-Alive"); //set agent to avoid: speed limited by server if server think the client not a browser connection.setRequestProperty("user-agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); - // 瀵よ櫣鐝涚�圭偤妾惃鍕箾閹猴拷 connection.connect(); long t0 = System.currentTimeMillis(); //ms - // 閼惧嘲褰囬幍锟介張澶婃惙鎼存柨銇旂�涙顔� Map> map = connection.getHeaderFields(); - // 闁秴宸婚幍锟介張澶屾畱閸濆秴绨叉径鏉戠摟濞堬拷 - //for (String key : map.keySet()) { - // System.out.println(key + "--->" + map.get(key)); - //} long t1 = System.currentTimeMillis(); // ms //System.out.println("Time to get header: "+(t1 - t0)+" ms"); - // 鐎规矮绠� BufferedReader鏉堟挸鍙嗗ù浣规降鐠囪褰嘦RL閻ㄥ嫬鎼锋惔锟� in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); char chars[] = new char[2048]; int b; @@ -170,7 +144,6 @@ public class GstoreConnector { //System.out.println("error in get request: " + e); e.printStackTrace(); } - // 娴h法鏁inally閸ф娼甸崗鎶芥4鏉堟挸鍙嗗ù锟� finally { try { if (in != null) { @@ -477,13 +450,6 @@ public class GstoreConnector { //System.out.println(flag); String answer = gc.query("root", "123456", "dbpedia16", sparql); System.out.println(answer); - - //To count the time cost - //long startTime=System.nanoTime(); //ns - //long startTime=System.currentTimeMillis(); //ms - //doSomeThing(); //濞村鐦惃鍕敩閻焦顔� - //long endTime=System.currentTimeMillis(); //閼惧嘲褰囩紒鎾存将閺冨爼妫� - //System.out.println("缁嬪绨潻鎰攽閺冨爼妫块敍锟� "+(end-start)+"ms"); } } diff --git a/src/lcn/BuildIndexForEntityFragments.java b/src/lcn/BuildIndexForEntityFragments.java index e96e71e..0a2ec16 100644 --- a/src/lcn/BuildIndexForEntityFragments.java +++ b/src/lcn/BuildIndexForEntityFragments.java @@ -3,7 +3,6 @@ package lcn; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -//import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; @@ -15,20 +14,7 @@ import org.apache.lucene.index.IndexWriter; import qa.Globals; -//import qa.Globals; -/** - * Lucene建立索引的基本单元是document,同时其中的域filed可以根据需要自己添加 - * - * Document是一个记录,用来表示一个条目,相当于数据库中的一行记录,就是搜索建立的倒排索引的条目。 - * eg:你要搜索自己电脑上的文件,这个时候就可以创建field(字段,相关于数据库中的列。 然后用field组合成document,最后会变成若干文件。 - * 这个document和文件系统document不是一个概念。 - * - * StandardAnalyzer是lucene中内置的"标准分析器",可以做如下功能: - * 1、对原有句子按照空格进行了分词 - * 2、所有的大写字母都可以能转换为小写的字母 - * 3、可以去掉一些没有用处的单词,例如"is","the","are"等单词,也删除了所有的标点 - */ public class BuildIndexForEntityFragments{ public void indexforentity() throws Exception { diff --git a/src/lcn/Main.java b/src/lcn/Main.java index 2b5850b..9a5b24a 100644 --- a/src/lcn/Main.java +++ b/src/lcn/Main.java @@ -1,11 +1,8 @@ package lcn; -//import java.io.IOException; -//import java.util.ArrayList; import java.util.ArrayList; import java.util.Scanner; -import fgmt.EntityFragment; import qa.Globals; import qa.mapping.EntityFragmentDict; diff --git a/src/log/QueryLogger.java b/src/log/QueryLogger.java index 901ff7b..454c37d 100644 --- a/src/log/QueryLogger.java +++ b/src/log/QueryLogger.java @@ -1,10 +1,5 @@ package log; -//import java.io.File; -//import java.io.FileNotFoundException; -//import java.io.FileOutputStream; -//import java.io.OutputStreamWriter; -//import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -12,7 +7,6 @@ import java.util.HashSet; import javax.servlet.http.HttpServletRequest; -//import qa.Globals; import qa.Matches; import qa.Query; import rdf.EntityMapping; diff --git a/src/nlp/tool/CoreNLP.java b/src/nlp/tool/CoreNLP.java index ae8b355..3905fda 100644 --- a/src/nlp/tool/CoreNLP.java +++ b/src/nlp/tool/CoreNLP.java @@ -47,7 +47,6 @@ public class CoreNLP { // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types - // �������о��� List sentences = document.get(SentencesAnnotation.class); int count = 0; diff --git a/src/qa/GAnswer.java b/src/qa/GAnswer.java index 1f25e63..564b7b9 100644 --- a/src/qa/GAnswer.java +++ b/src/qa/GAnswer.java @@ -1,10 +1,7 @@ package qa; -import java.io.*; -import java.net.Socket; import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; import java.util.List; import jgsc.GstoreConnector; @@ -73,7 +70,6 @@ public class GAnswer { t = System.currentTimeMillis(); BuildQueryGraph step2 = new BuildQueryGraph(); step2.process(qlog); -// step2.processEXP(qlog); qlog.timeTable.put("step2", (int)(System.currentTimeMillis()-t)); // step 3: some fix (such as "one-node" or "ask-one-triple") and aggregation @@ -156,97 +152,10 @@ public class GAnswer { return spq; } - - - /** - * Get answers from Virtuoso + DBpedia, this function require OLD version Virtuoso + Virtuoso Handler. - * Virtuoso can solve "Aggregation" - **/ -// public Matches getAnswerFromVirtuoso (QueryLogger qlog, Sparql spq) -// { -// Matches ret = new Matches(); -// try -// { -// Socket socket = new Socket(Globals.QueryEngineIP, 1112); -// DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream())); -// -// //formatting SPARQL & evaluate -// String formatedSpq = spq.toStringForVirtuoso(); -// dos.writeUTF(formatedSpq); -// dos.flush(); -// System.out.println("STD SPARQL:\n"+formatedSpq+"\n"); -// -// ArrayList rawLines = new ArrayList(); -// DataInputStream dis = new DataInputStream(new BufferedInputStream(socket.getInputStream())); -// while (true) -// { -// String line = dis.readUTF(); -// if (line.equals("[[finish]]")) break; -// rawLines.add(line); -// } -// -// // ASK query was translated to SELECT query, whose answer need translation. -// // It is no need to translate, use "ASK WHERE" directly ! 2018-12-11 -// if(qlog.s.sentenceType == SentenceType.GeneralQuestion) -// { -// ret.answersNum = 1; -// ret.answers = new String[1][1]; -// if(rawLines.size() == 0) -// { -// ret.answers[0][0] = "general:false"; -// } -// else -// { -// ret.answers[0][0] = "general:true"; -// } -// System.out.println("general question answer:" + ret.answers[0][0]); -// dos.close(); -// dis.close(); -// socket.close(); -// return ret; -// } -// -// //select but no results -// if (rawLines.size() == 0) -// { -// ret.answersNum = 0; -// dos.close(); -// dis.close(); -// socket.close(); -// return ret; -// } -// -// int ansNum = rawLines.size(); -// int varNum = variables.size(); -// ArrayList valist = new ArrayList(variables); -// ret.answers = new String[ansNum][varNum]; -// -// System.out.println("ansNum=" + ansNum); -// System.out.println("varNum=" + varNum); -// for (int i=0;i0 && curSpq.questionFocus!=null) + if(curSpq.tripleList.size()>0 && curSpq.questionFocus!=null) { -// if(ga.isBGP(qlog, curSpq)) - m = ga.getAnswerFromGStore2(curSpq); -// else -// m = ga.getAnswerFromVirtuoso(qlog, curSpq); + m = ga.getAnswerFromGStore2(curSpq); } - if (m != null && m.answers != null) + if(m != null && m.answers != null) { // Found results using current SPQ, then we can break and print result. qlog.sparql = curSpq; diff --git a/src/qa/Globals.java b/src/qa/Globals.java index 70dfdea..dbca9bc 100644 --- a/src/qa/Globals.java +++ b/src/qa/Globals.java @@ -28,7 +28,6 @@ public class Globals { public static DBpediaLookup dblk; public static int MaxAnswerNum = 100; public static String Dataset = "dbpedia 2016"; - public static String DictionaryPath = "default"; public static String Version = "0.1.2"; public static String GDBsystem = "gStore v0.7.2"; @@ -38,24 +37,14 @@ public class Globals { * 2. super SQG, allow CIRCLE and WRONG edge. The structure is decided by DS tree, and can be changed in query evaluation(TOP-K match) stage. * */ public static int evaluationMethod = 2; - public static boolean isRunAsWebServer = false; // Run Local: false; Run Server: true - public static String runningBenchmark = "QALD"; // WQ:WebQuestions; WQSP:WebQuestionsSP; CQ:ComplexQuestions - // using different method and Freebase Version (in Virtuoso.java) - public static boolean usingOperationCondition = false; // only for EXP: try state transition operations only when condition are satisfied. - - public static String localPath = "/media/wip/husen/NBgAnswer/"; - public static String QueryEngineIP = "127.0.0.1"; // Notice, PORT number is in the evaluation function. + public static String localPath = "./././"; + public static String QueryEngineIP = "172.31.222.90"; // Notice, PORT number is in the evaluation function. + public static int QueryEnginePort = 9001; public static void init () { System.out.println("====== gAnswer2.0 over DBpedia ======"); - - if(isRunAsWebServer == false) - { - localPath = "D:/husen/gAnswer/"; - QueryEngineIP = "172.31.222.72"; - } long t1, t2, t3, t4, t5, t6, t7, t8, t9; diff --git a/src/qa/extract/EntityRecognition.java b/src/qa/extract/EntityRecognition.java index 0901d06..bad4ac9 100644 --- a/src/qa/extract/EntityRecognition.java +++ b/src/qa/extract/EntityRecognition.java @@ -1,21 +1,14 @@ package qa.extract; import java.io.BufferedReader; -//import java.io.File; -//import java.io.FileInputStream; -//import java.io.FileNotFoundException; -//import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; -//import java.io.OutputStreamWriter; -//import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; -import lcn.EntityFragmentFields; import fgmt.EntityFragment; import nlp.ds.Word; import qa.Globals; @@ -38,7 +31,6 @@ public class EntityRecognition { double TypeAcceptedScore = 0.5; double AcceptedDiffScore = 1; - public HashMap m2e = null; public ArrayList mWordList = null; public ArrayList stopEntList = null; public ArrayList badTagListForEntAndType = null; @@ -65,12 +57,6 @@ public class EntityRecognition { badTagListForEntAndType.add("VBP"); badTagListForEntAndType.add("POS"); - // !Handwriting entity linking; (lower case) - m2e = new HashMap(); - m2e.put("bipolar_syndrome", "Bipolar_disorder"); - m2e.put("battle_in_1836_in_san_antonio", "Battle_of_San_Jacinto"); - m2e.put("federal_minister_of_finance_in_germany", "Federal_Ministry_of_Finance_(Germany)"); - // Additional fix for CATEGORY (in DBpedia) af = new AddtionalFix(); tr = new TypeRecognition(); @@ -156,7 +142,7 @@ public class EntityRecognition { allCnt++; /* - * Filters to save time and drop some bad cases. + * Filters to speed up and drop some bad cases. */ boolean entOmit = false, typeOmit = false; int prep_cnt=0; @@ -446,8 +432,8 @@ public class EntityRecognition { if(likelyEnt.equals(lowerOriginalWord)) score *= len; // !Award: COVER (eg, Robert Kennedy: [Robert] [Kennedy] [Robert Kennedy]) - //像Social_Democratic_Party,这三个word任意组合都是ent,导致方案太多;相比较“冲突选哪个”,“连or不应该连”显得更重要(而且实际错误多为连或不连的错误),所以这里直接抛弃被覆盖的小ent - //像Abraham_Lincoln,在“不连接”的方案中,会把他们识别成两个node,最后得分超过了正确答案的得分;故对于这种词设置为必选 + //e.g, Social_Democratic_Party -> all ents -> drop the overlapped smaller ones + //e.g, Abraham_Lincoln -> select the whole word if(len>1) { boolean[] flag = new boolean[words.length+1]; @@ -473,8 +459,6 @@ public class EntityRecognition { // WHOLE match || HIGH match & HIGH upper || WHOLE upper if(hitCnt == len || ((double)hitCnt/(double)len > 0.6 && (double)UpperWordCnt/(double)len > 0.6) || UpperWordCnt == len || len>=4) { - //如中间有逗号,则要求两边的词都在mapping的entity中出现 - //例如 Melbourne_,_Florida: Melbourne, Florida 是必须选的,而 California_,_USA: Malibu, California,认为不一定正确 boolean commaTotalRight = true; if(originalWord.contains(",")) { @@ -741,19 +725,10 @@ public class EntityRecognition { String n = entity; ArrayList ret= new ArrayList(); - //1. Handwriting - if(m2e.containsKey(entity)) - { - String eName = m2e.get(entity); - EntityMapping em = new EntityMapping(EntityFragmentFields.entityName2Id.get(eName), eName, 1000); - ret.add(em); - return ret; //handwriting is always correct - } - - //2. Lucene index + //1. Lucene index ret.addAll(EntityFragment.getEntityMappingList(n)); - //3. DBpedia Lookup (some cases) + //2. DBpedia Lookup (some cases) if (useDblk) { ret.addAll(Globals.dblk.getEntityMappings(n, null)); @@ -880,36 +855,7 @@ public class EntityRecognition { er.process(question); } - -// File inputFile = new File("D:\\husen\\gAnswer\\data\\test\\test_in.txt"); -// File outputFile = new File("D:\\husen\\gAnswer\\data\\test\\test_out.txt"); -// BufferedReader fr = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile),"utf-8")); -// OutputStreamWriter fw = new OutputStreamWriter(new FileOutputStream(outputFile,true),"utf-8"); -// -// String input; -// while((input=fr.readLine())!=null) -// { -// String[] strArray = input.split("\t"); -// String id = ""; -// String question = strArray[0]; -// if(strArray.length>1) -// { -// question = strArray[1]; -// id = strArray[0]; -// } -// //Notice "?" may leads lucene/dbpedia lookup error -// if(question.length()>1 && question.charAt(question.length()-1)=='.' || question.charAt(question.length()-1)=='?') -// question = question.substring(0,question.length()-1); -// if(question.isEmpty()) -// continue; -// er.process(question); -// fw.write("Id: "+id+"\nQuery: "+question+"\n"); -// fw.write(er.preLog+"\n"); -// } -// -// fr.close(); -// fw.close(); - + } catch (IOException e) { e.printStackTrace(); } diff --git a/src/qa/extract/ExtractImplicitRelation.java b/src/qa/extract/ExtractImplicitRelation.java index 598788a..80a4900 100644 --- a/src/qa/extract/ExtractImplicitRelation.java +++ b/src/qa/extract/ExtractImplicitRelation.java @@ -1,7 +1,6 @@ package qa.extract; import java.io.BufferedReader; -//import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; @@ -26,7 +25,6 @@ public class ExtractImplicitRelation { static final int SamplingNumber = 100; // the maximum sampling number in calculation static final int k = 3; // select top-k when many suitable relations; select top-k entities for a word - public HashMap implicitEntRel = new HashMap(); /* * Implicit Relations: @@ -39,9 +37,6 @@ public class ExtractImplicitRelation { * */ public ExtractImplicitRelation() { - //orignal word to lower case - implicitEntRel.put("american", Globals.pd.predicate_2_id.get("country")); - implicitEntRel.put("united_states", Globals.pd.predicate_2_id.get("country")); } // Notice, it is usually UNNECESSARY for two constant, so we unimplemented this function. @@ -96,14 +91,6 @@ public class ExtractImplicitRelation { String eName = word.emList.get(i).entityName; irList = getPrefferdPidListBetween_Entity_TypeVariable(eId, tId); - // !Handwriting implicit relations - if(irList != null && implicitEntRel.containsKey(word.originalForm.toLowerCase())) - { - int pId = implicitEntRel.get(word.originalForm.toLowerCase()); - ImplicitRelation ir = new ImplicitRelation(tId, eId, pId, 1000); - irList.add(0, ir); - } - if(irList!=null && irList.size()>0) { ImplicitRelation ir = irList.get(0); diff --git a/src/qa/extract/ExtractRelation.java b/src/qa/extract/ExtractRelation.java index a04f3d3..fda99ae 100644 --- a/src/qa/extract/ExtractRelation.java +++ b/src/qa/extract/ExtractRelation.java @@ -10,7 +10,6 @@ import java.util.Queue; import log.QueryLogger; import nlp.ds.DependencyTree; import nlp.ds.DependencyTreeNode; -//import nlp.ds.Word; import paradict.ParaphraseDictionary; import qa.Globals; import rdf.SimpleRelation; diff --git a/src/qa/extract/TypeRecognition.java b/src/qa/extract/TypeRecognition.java index 18f4496..b9bc1bb 100644 --- a/src/qa/extract/TypeRecognition.java +++ b/src/qa/extract/TypeRecognition.java @@ -8,7 +8,6 @@ import java.util.HashMap; import nlp.ds.Word; import nlp.tool.StopWordsList; -//import fgmt.RelationFragment; import fgmt.TypeFragment; import lcn.SearchInTypeShortName; import log.QueryLogger; @@ -44,7 +43,6 @@ public class TypeRecognition { { extendTypeMap = new HashMap(); extendVariableMap = new HashMap(); - Triple triple = null; //!Handwriting for convenience | TODO: approximate/semantic match of type extendTypeMap.put("NonprofitOrganizations", "dbo:Non-ProfitOrganisation"); @@ -55,19 +53,6 @@ public class TypeRecognition { extendTypeMap.put("USStates", "yago:StatesOfTheUnitedStates"); extendTypeMap.put("Europe", "yago:EuropeanCountries"); extendTypeMap.put("Africa", "yago:AfricanCountries"); - - //!The following IDs are based on DBpedia 2014. - //!extend variable (embedded triples) | eg, [?E|surfers]-?uri dbo:occupation res:Surfing | canadians�� - //1) | [country people] [country] - triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 1639, 2112902, "Canada", null, 100); - extendVariableMap.put("canadian", triple); - triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 1639, 883747, "Germany", null, 100); - extendVariableMap.put("german", triple); - //2) ?bandleader - triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 6690, 5436853, "Bandleader", null, 100); - extendVariableMap.put("bandleader", triple); - triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 6690, 5436854, "Surfing>", null, 100); - extendVariableMap.put("surfer", triple); } public static void recognizeExtendVariable(Word w) diff --git a/src/qa/mapping/CompatibilityChecker.java b/src/qa/mapping/CompatibilityChecker.java index ef4d974..c873554 100644 --- a/src/qa/mapping/CompatibilityChecker.java +++ b/src/qa/mapping/CompatibilityChecker.java @@ -1,7 +1,6 @@ package qa.mapping; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; diff --git a/src/qa/mapping/DBpediaLookup.java b/src/qa/mapping/DBpediaLookup.java index 54e027b..bc5225f 100644 --- a/src/qa/mapping/DBpediaLookup.java +++ b/src/qa/mapping/DBpediaLookup.java @@ -19,8 +19,7 @@ import rdf.EntityMapping; public class DBpediaLookup { //There are two websites of the DBpediaLookup online service. //public static final String baseURL = "http://en.wikipedia.org/w/api.php?action=opensearch&format=xml&limit=10&search="; - //public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString="; - public static final String baseURL = "http://172.31.222.72:1234/api/search/KeywordSearch?MaxHits=5&QueryString="; + public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString="; public HttpClient ctripHttpClient = null; diff --git a/src/qa/mapping/EntityFragmentDict.java b/src/qa/mapping/EntityFragmentDict.java index 302912f..c27649d 100644 --- a/src/qa/mapping/EntityFragmentDict.java +++ b/src/qa/mapping/EntityFragmentDict.java @@ -2,8 +2,6 @@ package qa.mapping; import java.util.HashMap; -//import lcn.EntityFragmentFields; -//import qa.Globals; import fgmt.EntityFragment; public class EntityFragmentDict { diff --git a/src/qa/parsing/BuildQueryGraph.java b/src/qa/parsing/BuildQueryGraph.java index c3cf7f7..f4758f1 100644 --- a/src/qa/parsing/BuildQueryGraph.java +++ b/src/qa/parsing/BuildQueryGraph.java @@ -1,11 +1,9 @@ package qa.parsing; -//import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; -import java.util.PriorityQueue; import java.util.Queue; import fgmt.EntityFragment; @@ -17,16 +15,15 @@ import qa.Globals; import qa.extract.*; import qa.mapping.SemanticItemMapping; import rdf.PredicateMapping; -import rdf.SemanticQueryGraph; import rdf.Triple; import rdf.SemanticRelation; import rdf.SimpleRelation; import rdf.SemanticUnit; -//import paradict.ParaphraseDictionary; -/* - * The core class to build query graph, i.e, to generate SPARQL queries. - * */ +/** + * Core class to build query graph, i.e, to generate SPARQL queries. + * @author husen + */ public class BuildQueryGraph { public ArrayList semanticUnitList = new ArrayList(); @@ -48,7 +45,8 @@ public class BuildQueryGraph whList.add("how"); whList.add("where"); - // Bad words for NODE. (base form) + // Bad words for NODE. (base form) + // We will train a node recognition model to replace such heuristic rules further. stopNodeList.add("list"); stopNodeList.add("give"); stopNodeList.add("show"); @@ -92,7 +90,7 @@ public class BuildQueryGraph { stopNodeList.add("area"); } - //how much is the total population of european union? + //how much is the total population of european union? if(qStr.contains("how much")) { stopNodeList.add("population"); @@ -202,7 +200,7 @@ public class BuildQueryGraph continue; } - //Notice, the following codes guarantee all possible edges (allow CIRCLE). + //Notice, the following code guarantee all possible edges (allow CIRCLE). //Otherwise, NO CIRCLE, and the structure may be different by changing target. if(Globals.evaluationMethod > 1) { @@ -279,209 +277,6 @@ public class BuildQueryGraph return semanticUnitList; } - /* - * For experiment. - */ - public ArrayList getNodeList(QueryLogger qlog, DependencyTree ds) - { - semanticUnitList = new ArrayList(); - - // For ComplexQuestions or WebQuestions, only consider wh-word and at most two entities. - if(Globals.runningBenchmark.equals("CQ") || Globals.runningBenchmark.equals("WQ")) - { -// DependencyTreeNode target = ds.nodesList.get(0); -// if(Globals.runningBenchmark.equals("CQ")) -// target = detectTargetForCQ(ds, qlog); -// qlog.target = target.word; -// qlog.SQGlog += "++++ Target detect: "+target+"\n"; -// -// detectTopicConstraint(qlog); -// semanticUnitList.add(new SemanticUnit(qlog.target, false)); //Set variable to object -// if(topicEnt != null) -// { -// semanticUnitList.add(new SemanticUnit(topicEnt, true)); //Set entity to subject -// } -// if(constraintEnt != null) -// { -// semanticUnitList.add(new SemanticUnit(constraintEnt, true)); //Set entity to subject -// } - } - // For general cases (e.g, QALD), consider internal variables. - else - { - for(DependencyTreeNode dtn: ds.nodesList) - { - if(isNodeWoCorefRe(dtn)) // ! Omit the coreference resolution rules ! - { - semanticUnitList.add(new SemanticUnit(dtn.word, true)); //No prefer subject (default is true) - } - } - } - return semanticUnitList; - } - - /* - * (For Experiment) Build query graph using STATE TRANSITION method based on 4 operations (with 4 conditions). - * 1. Condition for Connect operation: do and must do | no other nodes on simple path in DS tree. - * 2. Condition for Merge operation: do and must do | heuristic rules of CoReference Resolution. - * 3. Condition for Fold operation: do or not do | no matches of low confidence of an edge. - * 4. Condition for Expand operation: do and must do | has corresponding information. - * */ - public ArrayList processEXP(QueryLogger qlog) - { - //0) Fix stop words - DependencyTree ds = qlog.s.dependencyTreeStanford; - if(qlog.isMaltParserUsed) - ds = qlog.s.dependencyTreeMalt; - fixStopWord(qlog, ds); - - //1) Detect Modifier/Modified - //rely on sentence (rather than dependency tree) - //with some ADJUSTMENT (eg, ent+noun(noType&&noEnt) -> noun.omitNode=TRUE) - for(Word word: qlog.s.words) - getTheModifiedWordBySentence(qlog.s, word); //Find continuous modifier - for(Word word: qlog.s.words) - getDiscreteModifiedWordBySentence(qlog.s, word); //Find discrete modifier - for(Word word: qlog.s.words) - if(word.modifiedWord == null) //Other words modify themselves. NOTICE: only can be called after detecting all modifier. - word.modifiedWord = word; - - //print log - for(Word word: qlog.s.words) - { - if(word.modifiedWord != null && word.modifiedWord != word) - { - modifierList.add(word); - qlog.SQGlog += "++++ Modify detect: "+word+" --> " + word.modifiedWord + "\n"; - } - } - - //2) Detect target & 3) Coreference resolution - DependencyTreeNode target = detectTarget(ds,qlog); - qlog.SQGlog += "++++ Target detect: "+target+"\n"; - - if(target == null) - return null; - - qlog.target = target.word; - // !target can NOT be entity. (except general question)| which [city] has most people? - if(qlog.s.sentenceType != SentenceType.GeneralQuestion && target.word.emList!=null) - { - //Counter example:Give me all Seven_Wonders_of_the_Ancient_World | (in fact, it not ENT, but CATEGORY, ?x subject Seve...) - target.word.mayEnt = false; - target.word.emList.clear(); - } - - try - { - // step1: get node list - semanticUnitList = getNodeList(qlog, ds); - if(semanticUnitList == null || semanticUnitList.isEmpty()) - { - qlog.SQGlog += "ERROR: no nodes found."; - return null; - } - - // step2: extract all potential relations - long t = System.currentTimeMillis(); - System.out.println("Potential Relation Extraction start ..."); - extractPotentialSemanticRelations(semanticUnitList, qlog); - qlog.timeTable.put("BQG_relation", (int)(System.currentTimeMillis()-t)); - - // setp3: build query graph structure by 4 operations - t = System.currentTimeMillis(); - SemanticQueryGraph bestSQG = null; - if(Globals.usingOperationCondition) - { - //TODO: use operation condition - } - else - { - // for experiment, do not use conditions. - PriorityQueue QGs = new PriorityQueue(); - HashSet visited = new HashSet<>(); - //Initial state: all nodes isolated. - SemanticQueryGraph head = new SemanticQueryGraph(semanticUnitList); - QGs.add(head); - - while(!QGs.isEmpty()) - { - head = QGs.poll(); - visited.add(head); - - //Judge: is it a final state? - if(head.isFinalState()) - { - bestSQG = head; - break; // now we just find the top-1 SQG - } - - //SQG generation - //Connect (enumerate) - for(SemanticUnit u: head.semanticUnitList) - for(SemanticUnit v: head.semanticUnitList) - if(!u.equals(v) && !u.neighborUnitList.contains(v) && !v.neighborUnitList.contains(u)) - { - SemanticQueryGraph tail = new SemanticQueryGraph(head); - tail.connect(u, v); - if(!QGs.contains(tail) && !visited.contains(tail)) - { - tail.calculateScore(qlog.potentialSemanticRelations); - QGs.add(tail); - } - } - - //Merge (coref resolution) - if(head.semanticUnitList.size() > 2) - for(SemanticUnit u: head.semanticUnitList) - for(SemanticUnit v: head.semanticUnitList) - if(!u.equals(v) && (!u.neighborUnitList.contains(v) && !v.neighborUnitList.contains(u)) || (u.neighborUnitList.contains(v) && v.neighborUnitList.contains(u))) - { - SemanticQueryGraph tail = new SemanticQueryGraph(head); - tail.merge(u, v); - if(!QGs.contains(tail) && !visited.contains(tail)) - { - tail.calculateScore(qlog.potentialSemanticRelations); - QGs.add(tail); - } - } - } - } - qlog.timeTable.put("BQG_structure", (int)(System.currentTimeMillis()-t)); - - //Relation Extraction by potentialSR - qlog.semanticUnitList = new ArrayList(); - qlog.semanticRelations = bestSQG.semanticRelations; - semanticUnitList = bestSQG.semanticUnitList; - matchRelation(semanticUnitList, qlog); - - //Prepare for item mapping - TypeRecognition.AddTypesOfWhwords(qlog.semanticRelations); // Type supplementary - TypeRecognition.constantVariableRecognition(qlog.semanticRelations, qlog); // Constant or Variable, embedded triples - - //(just for display) - recordOriginalTriples(semanticUnitList, qlog); - - //step3: item mapping & top-k join - t = System.currentTimeMillis(); - SemanticItemMapping step5 = new SemanticItemMapping(); - step5.process(qlog, qlog.semanticRelations); //top-k join (generate SPARQL queries), disambiguation - qlog.timeTable.put("BQG_topkjoin", (int)(System.currentTimeMillis()-t)); - - //step6: implicit relation [modify word] - t = System.currentTimeMillis(); - ExtractImplicitRelation step6 = new ExtractImplicitRelation(); - step6.supplementTriplesByModifyWord(qlog); - qlog.timeTable.put("BQG_implicit", (int)(System.currentTimeMillis()-t)); - - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - return semanticUnitList; - } - public void extractPotentialSemanticRelations(ArrayList semanticUnitList, QueryLogger qlog) { ExtractRelation er = new ExtractRelation(); @@ -771,35 +566,6 @@ public class BuildQueryGraph return false; } - /* - * Judge nodes strictly. - * For EXP, do not use COREF resolution rules. - * */ - public boolean isNodeWoCorefRe(DependencyTreeNode cur) - { - if(stopNodeList.contains(cur.word.baseForm)) - return false; - - if(cur.word.omitNode) - return false; - - // Modifier can NOT be node (They may be added in query graph in the end) e.g., Queen Elizabeth II,Queen(modifier) - if(modifierList.contains(cur.word)) - return false; - - // NOUN - if(cur.word.posTag.startsWith("N")) - return true; - - // Wh-word - if(whList.contains(cur.word.baseForm)) - return true; - - if(cur.word.mayEnt || cur.word.mayType || cur.word.mayCategory) - return true; - return false; - } - public DependencyTreeNode detectTarget(DependencyTree ds, QueryLogger qlog) { visited.clear(); diff --git a/src/qa/parsing/QuestionParsing.java b/src/qa/parsing/QuestionParsing.java index ddc0cfd..d9b86b1 100644 --- a/src/qa/parsing/QuestionParsing.java +++ b/src/qa/parsing/QuestionParsing.java @@ -1,7 +1,5 @@ package qa.parsing; -import org.maltparser.core.exception.MaltChainedException; - import log.QueryLogger; import nlp.ds.DependencyTree; import nlp.ds.DependencyTreeNode; diff --git a/src/rdf/SemanticQueryGraph.java b/src/rdf/SemanticQueryGraph.java index de95df1..9ec2f76 100644 --- a/src/rdf/SemanticQueryGraph.java +++ b/src/rdf/SemanticQueryGraph.java @@ -4,7 +4,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; -import qa.Globals; import nlp.ds.Word; public class SemanticQueryGraph implements Comparable diff --git a/src/rdf/SimpleRelation.java b/src/rdf/SimpleRelation.java index a3b5334..98a79dd 100644 --- a/src/rdf/SimpleRelation.java +++ b/src/rdf/SimpleRelation.java @@ -65,7 +65,7 @@ public class SimpleRelation { } sumSelectivity = matchingScore*sumSelectivity*pidsup.support; int pid = pidsup.predicateID; - if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; //����dbo�е�predicate //pid ���ܲ��� dbo �У� + if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; if (!pasList.containsKey(pid)) pasList.put(pid, sumSelectivity); diff --git a/src/rdf/Sparql.java b/src/rdf/Sparql.java index a139e8e..ab84429 100644 --- a/src/rdf/Sparql.java +++ b/src/rdf/Sparql.java @@ -2,13 +2,9 @@ package rdf; import java.util.ArrayList; import java.util.Collections; -//import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import log.QueryLogger; -import nlp.ds.Sentence; -import nlp.ds.Sentence.SentenceType; import qa.Globals; public class Sparql implements Comparable @@ -137,21 +133,21 @@ public class Sparql implements Comparable } // part2: triples - ret += " where\n{\n"; + ret += " where { "; for(Triple t : tripleList) { if (!t.object.equals("literal_HRZ")) { // need not display literal ret += t.toStringForGStore(); - ret += " .\n"; + ret += ". "; } } - ret += "}\n"; + ret += "} "; // part3: order by / group by ... if(moreThanStr != null) - ret += moreThanStr+"\n"; + ret += moreThanStr+" "; if(mostStr != null) - ret += mostStr+"\n"; + ret += mostStr+" "; // part4: limit if(queryType != QueryType.Ask && (mostStr == null || !mostStr.contains("LIMIT"))) @@ -159,64 +155,6 @@ public class Sparql implements Comparable return ret; } - - //Use to execute (select all variables; format 'aggregation' and 'ask') - public String toStringForVirtuoso() - { - String ret = ""; - HashSet variables = new HashSet(); - - // prefix - if (queryType==QueryType.Ask) - ret += "ask where"; - else if(countTarget) - ret += ("select COUNT(DISTINCT " + questionFocus + ") where"); - else - { - // AGG: select question focus - if(moreThanStr != null || mostStr != null) - ret += ("select DISTINCT " + questionFocus + " where"); - // BGP: select all variables - else - { - for (Triple t: tripleList) - { - if (!t.isSubjConstant()) variables.add(t.subject.replaceAll(" ", "_")); - if (!t.isObjConstant()) variables.add(t.object.replaceAll(" ", "_")); - } - - ret += "select "; - for (String v : variables) - ret += v + " "; - ret += "where"; - } - } - ret += "\n{\n"; - if(variables.size() == 0) - variables.add(questionFocus); - - // triples - for (Triple t : tripleList) - { - if (!t.object.equals("literal_HRZ")) { - ret += t.toStringForGStore(); - ret += " .\n"; - } - } - ret += "}\n"; - - // suffix - if(moreThanStr != null) - { - ret += moreThanStr+"\n"; - } - if(mostStr != null) - { - ret += mostStr+"\n"; - } - - return ret; - } public int getVariableNumber() { @@ -258,7 +196,6 @@ public class Sparql implements Comparable public boolean equals(Object spq) { Sparql tempSparql= (Sparql) spq; - String s1 = this.toStringForGStore2(), s2 = tempSparql.toStringForGStore2(); if(this.toStringForGStore2().equals(tempSparql.toStringForGStore2())) return true; else diff --git a/src/utils/HttpRequest.java b/src/utils/HttpRequest.java deleted file mode 100644 index 454e300..0000000 --- a/src/utils/HttpRequest.java +++ /dev/null @@ -1,114 +0,0 @@ -package utils; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.net.URL; -import java.net.URLConnection; -import java.util.List; -import java.util.Map; - -public class HttpRequest -{ - public static String sendGet(String url, String param) { - String result = ""; - BufferedReader in = null; - try { - String urlNameString = url + "?" + param; - URL realUrl = new URL(urlNameString); - - URLConnection connection = realUrl.openConnection(); - connection.setRequestProperty("accept", "*/*"); - connection.setRequestProperty("connection", "Keep-Alive"); - connection.setRequestProperty("user-agent", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); - - connection.connect(); - Map> map = connection.getHeaderFields(); - for (String key : map.keySet()) { - System.out.println(key + "--->" + map.get(key)); - } - in = new BufferedReader(new InputStreamReader( - connection.getInputStream())); - String line; - while ((line = in.readLine()) != null) { - result += line; - } - } catch (Exception e) { - System.out.println("Error when sending GET request: " + e); - e.printStackTrace(); - } - finally { - try { - if (in != null) { - in.close(); - } - } catch (Exception e2) { - e2.printStackTrace(); - } - } - return result; - } - - public static String sendPost(String url, String param) { - PrintWriter out = null; - BufferedReader in = null; - String result = ""; - try { - URL realUrl = new URL(url); - URLConnection conn = realUrl.openConnection(); - conn.setRequestProperty("accept", "*/*"); - conn.setRequestProperty("connection", "Keep-Alive"); - conn.setRequestProperty("user-agent", - "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)"); - conn.setDoOutput(true); - conn.setDoInput(true); - out = new PrintWriter(conn.getOutputStream()); - out.print(param); - out.flush(); - in = new BufferedReader( - new InputStreamReader(conn.getInputStream())); - String line; - while ((line = in.readLine()) != null) { - result += line; - } - } catch (Exception e) { - System.out.println("Error when sending POST request: "+e); - e.printStackTrace(); - } - finally{ - try{ - if(out!=null){ - out.close(); - } - if(in!=null){ - in.close(); - } - } - catch(IOException ex){ - ex.printStackTrace(); - } - } - return result; - } - - - public static String getPostData(InputStream in, int size, String charset) { - if (in != null && size > 0) { - byte[] buf = new byte[size]; - try { - in.read(buf); - if (charset == null || charset.length() == 0) - return new String(buf); - else { - return new String(buf, charset); - } - } catch (IOException e) { - e.printStackTrace(); - } - } - return null; - } -} \ No newline at end of file