Browse Source

delete some code/paths/comments

delete useless and experimental code;
delete useless datapaths and comments
tags/v0.1.0
knightmarehs 6 years ago
parent
commit
d145aa5e66
24 changed files with 28 additions and 712 deletions
  1. +0
    -6
      src/addition/AddtionalFix.java
  2. +0
    -2
      src/addition/AggregationRecognition.java
  3. +0
    -1
      src/application/GinfoHandler.java
  4. +0
    -11
      src/fgmt/EntityFragment.java
  5. +0
    -34
      src/jgsc/GstoreConnector.java
  6. +0
    -14
      src/lcn/BuildIndexForEntityFragments.java
  7. +0
    -3
      src/lcn/Main.java
  8. +0
    -6
      src/log/QueryLogger.java
  9. +0
    -1
      src/nlp/tool/CoreNLP.java
  10. +4
    -98
      src/qa/GAnswer.java
  11. +3
    -14
      src/qa/Globals.java
  12. +6
    -60
      src/qa/extract/EntityRecognition.java
  13. +0
    -13
      src/qa/extract/ExtractImplicitRelation.java
  14. +0
    -1
      src/qa/extract/ExtractRelation.java
  15. +0
    -15
      src/qa/extract/TypeRecognition.java
  16. +0
    -1
      src/qa/mapping/CompatibilityChecker.java
  17. +1
    -2
      src/qa/mapping/DBpediaLookup.java
  18. +0
    -2
      src/qa/mapping/EntityFragmentDict.java
  19. +8
    -242
      src/qa/parsing/BuildQueryGraph.java
  20. +0
    -2
      src/qa/parsing/QuestionParsing.java
  21. +0
    -1
      src/rdf/SemanticQueryGraph.java
  22. +1
    -1
      src/rdf/SimpleRelation.java
  23. +5
    -68
      src/rdf/Sparql.java
  24. +0
    -114
      src/utils/HttpRequest.java

+ 0
- 6
src/addition/AddtionalFix.java View File

@@ -5,19 +5,13 @@ import java.util.HashMap;

import paradict.PredicateIDAndSupport;
import log.QueryLogger;
//import nlp.ds.DependencyTree;
//import nlp.ds.DependencyTreeNode;
import nlp.ds.Word;
import nlp.ds.Sentence.SentenceType;
import qa.Globals;
//import qa.extract.TypeRecognition;
//import qa.mapping.SemanticItemMapping;
//import rdf.EntityMapping;
import rdf.SemanticUnit;
import rdf.Sparql;
import rdf.Sparql.QueryType;
import rdf.Triple;
//import fgmt.TypeFragment;


public class AddtionalFix


+ 0
- 2
src/addition/AggregationRecognition.java View File

@@ -4,7 +4,6 @@ import nlp.ds.DependencyTree;
import nlp.ds.DependencyTreeNode;
import nlp.ds.Word;
import qa.Globals;
import rdf.SemanticRelation;
import rdf.Sparql;
import rdf.Triple;
import log.QueryLogger;
@@ -33,7 +32,6 @@ public class AggregationRecognition {
case 1:
return b;
case 2: // Words need to be translated into numbers
boolean flag1=true;
for(i=0;i<8;i++) // 20~99
{
for(j=0;j<10;j++)


+ 0
- 1
src/application/GinfoHandler.java View File

@@ -37,7 +37,6 @@ public class GinfoHandler extends AbstractHandler{
JSONObject infoobj = new JSONObject();
infoobj.put("version", Globals.Version);
infoobj.put("dictionary", Globals.DictionaryPath);
infoobj.put("dataset", Globals.Dataset);
infoobj.put("GDB system", Globals.GDBsystem);



+ 0
- 11
src/fgmt/EntityFragment.java View File

@@ -254,17 +254,6 @@ public class EntityFragment extends Fragment {
}
}
}
//TODO: fix data for DBpedia 2014 (should be eliminated when update dataset)
if(eid==2640237) //Barack_Obama
{
inEdges.add(8432); //spouse
outEdges.add(8432);
ArrayList<Integer> outEdgeList = new ArrayList<Integer>();
outEdgeList.add(8432);
inEntMap.put(4953443, outEdgeList);
outEntMap.put(4953443, outEdgeList);
}
}
@Override


+ 0
- 34
src/jgsc/GstoreConnector.java View File

@@ -2,9 +2,7 @@ package jgsc;

import java.io.*;
import java.net.*;
import java.lang.*;
import java.net.URLEncoder;
import java.net.URLDecoder;
import java.io.UnsupportedEncodingException;
import java.util.List;
import java.util.Map;
@@ -53,39 +51,25 @@ public class GstoreConnector {
String urlNameString = url + "/" + param;
System.out.println("request: "+urlNameString);
URL realUrl = new URL(urlNameString);
// 閹垫挸绱戦崪瀛禦L娑斿妫块惃鍕箾閹猴拷
URLConnection connection = realUrl.openConnection();
// 鐠佸墽鐤嗛柅姘辨暏閻ㄥ嫯顕Ч鍌氱潣閹拷
connection.setRequestProperty("accept", "*/*");
connection.setRequestProperty("connection", "Keep-Alive");
//set agent to avoid: speed limited by server if server think the client not a browser
connection.setRequestProperty("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
// 瀵よ櫣鐝涚�圭偤妾惃鍕箾閹猴拷
connection.connect();

long t0 = System.currentTimeMillis(); //ms

// 閼惧嘲褰囬幍锟介張澶婃惙鎼存柨銇旂�涙顔�
Map<String, List<String>> map = connection.getHeaderFields();
// 闁秴宸婚幍锟介張澶屾畱閸濆秴绨叉径鏉戠摟濞堬拷
//for (String key : map.keySet()) {
// System.out.println(key + "--->" + map.get(key));
//}

long t1 = System.currentTimeMillis(); //ms
//System.out.println("Time to get header: "+(t1 - t0)+" ms");
//System.out.println("============================================");

// 鐎规矮绠� BufferedReader鏉堟挸鍙嗗ù浣规降鐠囪褰嘦RL閻ㄥ嫬鎼锋惔锟�
in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8"));
String line;
while ((line = in.readLine()) != null) {
//PERFORMANCE: this can be very costly if result is very large, because many temporary Strings are produced
//In this case, just print the line directly will be much faster
result.append(line+"\n");
//System.out.println("get data size: " + line.length());
//System.out.println(line);
}

long t2 = System.currentTimeMillis(); //ms
@@ -94,7 +78,6 @@ public class GstoreConnector {
System.out.println("error in get request: " + e);
e.printStackTrace();
}
// 娴h法鏁inally閸ф娼甸崗鎶芥4鏉堟挸鍙嗗ù锟�
finally {
try {
if (in != null) {
@@ -132,29 +115,20 @@ public class GstoreConnector {
String urlNameString = url + "/" + param;
System.out.println("request: "+urlNameString);
URL realUrl = new URL(urlNameString);
// 閹垫挸绱戦崪瀛禦L娑斿妫块惃鍕箾閹猴拷
URLConnection connection = realUrl.openConnection();
// 鐠佸墽鐤嗛柅姘辨暏閻ㄥ嫯顕Ч鍌氱潣閹拷
connection.setRequestProperty("accept", "*/*");
connection.setRequestProperty("connection", "Keep-Alive");
//set agent to avoid: speed limited by server if server think the client not a browser
connection.setRequestProperty("user-agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
// 瀵よ櫣鐝涚�圭偤妾惃鍕箾閹猴拷
connection.connect();
long t0 = System.currentTimeMillis(); //ms
// 閼惧嘲褰囬幍锟介張澶婃惙鎼存柨銇旂�涙顔�
Map<String, List<String>> map = connection.getHeaderFields();
// 闁秴宸婚幍锟介張澶屾畱閸濆秴绨叉径鏉戠摟濞堬拷
//for (String key : map.keySet()) {
// System.out.println(key + "--->" + map.get(key));
//}

long t1 = System.currentTimeMillis(); // ms
//System.out.println("Time to get header: "+(t1 - t0)+" ms");

// 鐎规矮绠� BufferedReader鏉堟挸鍙嗗ù浣规降鐠囪褰嘦RL閻ㄥ嫬鎼锋惔锟�
in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8"));
char chars[] = new char[2048];
int b;
@@ -170,7 +144,6 @@ public class GstoreConnector {
//System.out.println("error in get request: " + e);
e.printStackTrace();
}
// 娴h法鏁inally閸ф娼甸崗鎶芥4鏉堟挸鍙嗗ù锟�
finally {
try {
if (in != null) {
@@ -477,13 +450,6 @@ public class GstoreConnector {
//System.out.println(flag);
String answer = gc.query("root", "123456", "dbpedia16", sparql);
System.out.println(answer);

//To count the time cost
//long startTime=System.nanoTime(); //ns
//long startTime=System.currentTimeMillis(); //ms
//doSomeThing(); //濞村鐦惃鍕敩閻焦顔�
//long endTime=System.currentTimeMillis(); //閼惧嘲褰囩紒鎾存将閺冨爼妫�
//System.out.println("缁嬪绨潻鎰攽閺冨爼妫块敍锟� "+(end-start)+"ms");
}
}


+ 0
- 14
src/lcn/BuildIndexForEntityFragments.java View File

@@ -3,7 +3,6 @@ package lcn;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
//import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

@@ -15,20 +14,7 @@ import org.apache.lucene.index.IndexWriter;

import qa.Globals;

//import qa.Globals;

/**
* Lucene建立索引的基本单元是document,同时其中的域filed可以根据需要自己添加
*
* Document是一个记录,用来表示一个条目,相当于数据库中的一行记录,就是搜索建立的倒排索引的条目。
* eg:你要搜索自己电脑上的文件,这个时候就可以创建field(字段,相关于数据库中的列。 然后用field组合成document,最后会变成若干文件。
* 这个document和文件系统document不是一个概念。
*
* StandardAnalyzer是lucene中内置的"标准分析器",可以做如下功能:
* 1、对原有句子按照空格进行了分词
* 2、所有的大写字母都可以能转换为小写的字母
* 3、可以去掉一些没有用处的单词,例如"is","the","are"等单词,也删除了所有的标点
*/
public class BuildIndexForEntityFragments{
public void indexforentity() throws Exception
{


+ 0
- 3
src/lcn/Main.java View File

@@ -1,11 +1,8 @@
package lcn;

//import java.io.IOException;
//import java.util.ArrayList;
import java.util.ArrayList;
import java.util.Scanner;

import fgmt.EntityFragment;
import qa.Globals;
import qa.mapping.EntityFragmentDict;



+ 0
- 6
src/log/QueryLogger.java View File

@@ -1,10 +1,5 @@
package log;

//import java.io.File;
//import java.io.FileNotFoundException;
//import java.io.FileOutputStream;
//import java.io.OutputStreamWriter;
//import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -12,7 +7,6 @@ import java.util.HashSet;

import javax.servlet.http.HttpServletRequest;

//import qa.Globals;
import qa.Matches;
import qa.Query;
import rdf.EntityMapping;


+ 0
- 1
src/nlp/tool/CoreNLP.java View File

@@ -47,7 +47,6 @@ public class CoreNLP {

// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
// ���������
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
int count = 0;


+ 4
- 98
src/qa/GAnswer.java View File

@@ -1,10 +1,7 @@
package qa;

import java.io.*;
import java.net.Socket;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;

import jgsc.GstoreConnector;
@@ -73,7 +70,6 @@ public class GAnswer {
t = System.currentTimeMillis();
BuildQueryGraph step2 = new BuildQueryGraph();
step2.process(qlog);
// step2.processEXP(qlog);
qlog.timeTable.put("step2", (int)(System.currentTimeMillis()-t));
// step 3: some fix (such as "one-node" or "ask-one-triple") and aggregation
@@ -156,97 +152,10 @@ public class GAnswer {
return spq;
}

/**
* Get answers from Virtuoso + DBpedia, this function require OLD version Virtuoso + Virtuoso Handler.
* Virtuoso can solve "Aggregation"
**/
// public Matches getAnswerFromVirtuoso (QueryLogger qlog, Sparql spq)
// {
// Matches ret = new Matches();
// try
// {
// Socket socket = new Socket(Globals.QueryEngineIP, 1112);
// DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream()));
//
// //formatting SPARQL & evaluate
// String formatedSpq = spq.toStringForVirtuoso();
// dos.writeUTF(formatedSpq);
// dos.flush();
// System.out.println("STD SPARQL:\n"+formatedSpq+"\n");
//
// ArrayList<String> rawLines = new ArrayList<String>();
// DataInputStream dis = new DataInputStream(new BufferedInputStream(socket.getInputStream()));
// while (true)
// {
// String line = dis.readUTF();
// if (line.equals("[[finish]]")) break;
// rawLines.add(line);
// }
//
// // ASK query was translated to SELECT query, whose answer need translation.
// // It is no need to translate, use "ASK WHERE" directly ! 2018-12-11
// if(qlog.s.sentenceType == SentenceType.GeneralQuestion)
// {
// ret.answersNum = 1;
// ret.answers = new String[1][1];
// if(rawLines.size() == 0)
// {
// ret.answers[0][0] = "general:false";
// }
// else
// {
// ret.answers[0][0] = "general:true";
// }
// System.out.println("general question answer:" + ret.answers[0][0]);
// dos.close();
// dis.close();
// socket.close();
// return ret;
// }
//
// //select but no results
// if (rawLines.size() == 0)
// {
// ret.answersNum = 0;
// dos.close();
// dis.close();
// socket.close();
// return ret;
// }
//
// int ansNum = rawLines.size();
// int varNum = variables.size();
// ArrayList<String> valist = new ArrayList<String>(variables);
// ret.answers = new String[ansNum][varNum];
//
// System.out.println("ansNum=" + ansNum);
// System.out.println("varNum=" + varNum);
// for (int i=0;i<rawLines.size();i++)
// {
// String[] ansLineContents = rawLines.get(i).split("\t");
// for (int j=0;j<varNum;j++)
// {
// ret.answers[i][j] = valist.get(j) + ":" + ansLineContents[j];
// }
// }
//
// dos.close();
// dis.close();
// socket.close();
// }
// catch (Exception e) {
// e.printStackTrace();
// }
//
// return ret;
// }
public Matches getAnswerFromGStore2 (Sparql spq)
{
// modified by Lin Yinnian using ghttp - 2018-9-28
GstoreConnector gc = new GstoreConnector("172.31.222.90", 9001);
GstoreConnector gc = new GstoreConnector(Globals.QueryEngineIP, Globals.QueryEnginePort);
String answer = gc.query("root", "123456", "dbpedia16", spq.toStringForGStore2());
System.out.println(answer);
String[] rawLines = answer.split("\n");
@@ -338,14 +247,11 @@ public class GAnswer {
// // Execute by Virtuoso or GStore when answers not found
if(m == null || m.answers == null)
{
if (curSpq.tripleList.size()>0 && curSpq.questionFocus!=null)
if(curSpq.tripleList.size()>0 && curSpq.questionFocus!=null)
{
// if(ga.isBGP(qlog, curSpq))
m = ga.getAnswerFromGStore2(curSpq);
// else
// m = ga.getAnswerFromVirtuoso(qlog, curSpq);
m = ga.getAnswerFromGStore2(curSpq);
}
if (m != null && m.answers != null)
if(m != null && m.answers != null)
{
// Found results using current SPQ, then we can break and print result.
qlog.sparql = curSpq;


+ 3
- 14
src/qa/Globals.java View File

@@ -28,7 +28,6 @@ public class Globals {
public static DBpediaLookup dblk;
public static int MaxAnswerNum = 100;
public static String Dataset = "dbpedia 2016";
public static String DictionaryPath = "default";
public static String Version = "0.1.2";
public static String GDBsystem = "gStore v0.7.2";
@@ -38,24 +37,14 @@ public class Globals {
* 2. super SQG, allow CIRCLE and WRONG edge. The structure is decided by DS tree, and can be changed in query evaluation(TOP-K match) stage.
* */
public static int evaluationMethod = 2;
public static boolean isRunAsWebServer = false; // Run Local: false; Run Server: true
public static String runningBenchmark = "QALD"; // WQ:WebQuestions; WQSP:WebQuestionsSP; CQ:ComplexQuestions
// using different method and Freebase Version (in Virtuoso.java)
public static boolean usingOperationCondition = false; // only for EXP: try state transition operations only when condition are satisfied.

public static String localPath = "/media/wip/husen/NBgAnswer/";
public static String QueryEngineIP = "127.0.0.1"; // Notice, PORT number is in the evaluation function.
public static String localPath = "./././";
public static String QueryEngineIP = "172.31.222.90"; // Notice, PORT number is in the evaluation function.
public static int QueryEnginePort = 9001;
public static void init ()
{
System.out.println("====== gAnswer2.0 over DBpedia ======");
if(isRunAsWebServer == false)
{
localPath = "D:/husen/gAnswer/";
QueryEngineIP = "172.31.222.72";
}

long t1, t2, t3, t4, t5, t6, t7, t8, t9;


+ 6
- 60
src/qa/extract/EntityRecognition.java View File

@@ -1,21 +1,14 @@
package qa.extract;

import java.io.BufferedReader;
//import java.io.File;
//import java.io.FileInputStream;
//import java.io.FileNotFoundException;
//import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
//import java.io.OutputStreamWriter;
//import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;

import lcn.EntityFragmentFields;
import fgmt.EntityFragment;
import nlp.ds.Word;
import qa.Globals;
@@ -38,7 +31,6 @@ public class EntityRecognition {
double TypeAcceptedScore = 0.5;
double AcceptedDiffScore = 1;
public HashMap<String, String> m2e = null;
public ArrayList<MergedWord> mWordList = null;
public ArrayList<String> stopEntList = null;
public ArrayList<String> badTagListForEntAndType = null;
@@ -65,12 +57,6 @@ public class EntityRecognition {
badTagListForEntAndType.add("VBP");
badTagListForEntAndType.add("POS");
// !Handwriting entity linking; (lower case)
m2e = new HashMap<String, String>();
m2e.put("bipolar_syndrome", "Bipolar_disorder");
m2e.put("battle_in_1836_in_san_antonio", "Battle_of_San_Jacinto");
m2e.put("federal_minister_of_finance_in_germany", "Federal_Ministry_of_Finance_(Germany)");
// Additional fix for CATEGORY (in DBpedia)
af = new AddtionalFix();
tr = new TypeRecognition();
@@ -156,7 +142,7 @@ public class EntityRecognition {
allCnt++;
/*
* Filters to save time and drop some bad cases.
* Filters to speed up and drop some bad cases.
*/
boolean entOmit = false, typeOmit = false;
int prep_cnt=0;
@@ -446,8 +432,8 @@ public class EntityRecognition {
if(likelyEnt.equals(lowerOriginalWord))
score *= len;
// !Award: COVER (eg, Robert Kennedy: [Robert] [Kennedy] [Robert Kennedy])
//像Social_Democratic_Party,这三个word任意组合都是ent,导致方案太多;相比较“冲突选哪个”,“连or不应该连”显得更重要(而且实际错误多为连或不连的错误),所以这里直接抛弃被覆盖的小ent
//像Abraham_Lincoln,在“不连接”的方案中,会把他们识别成两个node,最后得分超过了正确答案的得分;故对于这种词设置为必选
//e.g, Social_Democratic_Party -> all ents -> drop the overlapped smaller ones
//e.g, Abraham_Lincoln -> select the whole word
if(len>1)
{
boolean[] flag = new boolean[words.length+1];
@@ -473,8 +459,6 @@ public class EntityRecognition {
// WHOLE match || HIGH match & HIGH upper || WHOLE upper
if(hitCnt == len || ((double)hitCnt/(double)len > 0.6 && (double)UpperWordCnt/(double)len > 0.6) || UpperWordCnt == len || len>=4)
{
//如中间有逗号,则要求两边的词都在mapping的entity中出现
//例如 Melbourne_,_Florida: Melbourne, Florida 是必须选的,而 California_,_USA: Malibu, California,认为不一定正确
boolean commaTotalRight = true;
if(originalWord.contains(","))
{
@@ -741,19 +725,10 @@ public class EntityRecognition {
String n = entity;
ArrayList<EntityMapping> ret= new ArrayList<EntityMapping>();
//1. Handwriting
if(m2e.containsKey(entity))
{
String eName = m2e.get(entity);
EntityMapping em = new EntityMapping(EntityFragmentFields.entityName2Id.get(eName), eName, 1000);
ret.add(em);
return ret; //handwriting is always correct
}
//2. Lucene index
//1. Lucene index
ret.addAll(EntityFragment.getEntityMappingList(n));
//3. DBpedia Lookup (some cases)
//2. DBpedia Lookup (some cases)
if (useDblk)
{
ret.addAll(Globals.dblk.getEntityMappings(n, null));
@@ -880,36 +855,7 @@ public class EntityRecognition {
er.process(question);
}
// File inputFile = new File("D:\\husen\\gAnswer\\data\\test\\test_in.txt");
// File outputFile = new File("D:\\husen\\gAnswer\\data\\test\\test_out.txt");
// BufferedReader fr = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile),"utf-8"));
// OutputStreamWriter fw = new OutputStreamWriter(new FileOutputStream(outputFile,true),"utf-8");
//
// String input;
// while((input=fr.readLine())!=null)
// {
// String[] strArray = input.split("\t");
// String id = "";
// String question = strArray[0];
// if(strArray.length>1)
// {
// question = strArray[1];
// id = strArray[0];
// }
// //Notice "?" may leads lucene/dbpedia lookup error
// if(question.length()>1 && question.charAt(question.length()-1)=='.' || question.charAt(question.length()-1)=='?')
// question = question.substring(0,question.length()-1);
// if(question.isEmpty())
// continue;
// er.process(question);
// fw.write("Id: "+id+"\nQuery: "+question+"\n");
// fw.write(er.preLog+"\n");
// }
//
// fr.close();
// fw.close();
} catch (IOException e) {
e.printStackTrace();
}


+ 0
- 13
src/qa/extract/ExtractImplicitRelation.java View File

@@ -1,7 +1,6 @@
package qa.extract;

import java.io.BufferedReader;
//import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
@@ -26,7 +25,6 @@ public class ExtractImplicitRelation {
static final int SamplingNumber = 100; // the maximum sampling number in calculation
static final int k = 3; // select top-k when many suitable relations; select top-k entities for a word
public HashMap<String, Integer> implicitEntRel = new HashMap<String, Integer>();
/*
* Implicit Relations:
@@ -39,9 +37,6 @@ public class ExtractImplicitRelation {
* */
public ExtractImplicitRelation()
{
//orignal word to lower case
implicitEntRel.put("american", Globals.pd.predicate_2_id.get("country"));
implicitEntRel.put("united_states", Globals.pd.predicate_2_id.get("country"));
}
// Notice, it is usually UNNECESSARY for two constant, so we unimplemented this function.
@@ -96,14 +91,6 @@ public class ExtractImplicitRelation {
String eName = word.emList.get(i).entityName;
irList = getPrefferdPidListBetween_Entity_TypeVariable(eId, tId);
// !Handwriting implicit relations
if(irList != null && implicitEntRel.containsKey(word.originalForm.toLowerCase()))
{
int pId = implicitEntRel.get(word.originalForm.toLowerCase());
ImplicitRelation ir = new ImplicitRelation(tId, eId, pId, 1000);
irList.add(0, ir);
}
if(irList!=null && irList.size()>0)
{
ImplicitRelation ir = irList.get(0);


+ 0
- 1
src/qa/extract/ExtractRelation.java View File

@@ -10,7 +10,6 @@ import java.util.Queue;
import log.QueryLogger;
import nlp.ds.DependencyTree;
import nlp.ds.DependencyTreeNode;
//import nlp.ds.Word;
import paradict.ParaphraseDictionary;
import qa.Globals;
import rdf.SimpleRelation;


+ 0
- 15
src/qa/extract/TypeRecognition.java View File

@@ -8,7 +8,6 @@ import java.util.HashMap;

import nlp.ds.Word;
import nlp.tool.StopWordsList;
//import fgmt.RelationFragment;
import fgmt.TypeFragment;
import lcn.SearchInTypeShortName;
import log.QueryLogger;
@@ -44,7 +43,6 @@ public class TypeRecognition {
{
extendTypeMap = new HashMap<String, String>();
extendVariableMap = new HashMap<String, Triple>();
Triple triple = null;
//!Handwriting for convenience | TODO: approximate/semantic match of type
extendTypeMap.put("NonprofitOrganizations", "dbo:Non-ProfitOrganisation");
@@ -55,19 +53,6 @@ public class TypeRecognition {
extendTypeMap.put("USStates", "yago:StatesOfTheUnitedStates");
extendTypeMap.put("Europe", "yago:EuropeanCountries");
extendTypeMap.put("Africa", "yago:AfricanCountries");
//!The following IDs are based on DBpedia 2014.
//!extend variable (embedded triples) | eg, [?E|surfers]-?uri dbo:occupation res:Surfing | canadians��<?canadian> <birthPlace> <Canada>
//1) <?canadians> <birthPlace> <Canada> | [country people] <birthPlace|1639> [country]
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 1639, 2112902, "Canada", null, 100);
extendVariableMap.put("canadian", triple);
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 1639, 883747, "Germany", null, 100);
extendVariableMap.put("german", triple);
//2) ?bandleader <occupation|6690> <Bandleader>
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 6690, 5436853, "Bandleader", null, 100);
extendVariableMap.put("bandleader", triple);
triple = new Triple(Triple.VAR_ROLE_ID, Triple.VAR_NAME, 6690, 5436854, "Surfing>", null, 100);
extendVariableMap.put("surfer", triple);
}
public static void recognizeExtendVariable(Word w)


+ 0
- 1
src/qa/mapping/CompatibilityChecker.java View File

@@ -1,7 +1,6 @@
package qa.mapping;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;


+ 1
- 2
src/qa/mapping/DBpediaLookup.java View File

@@ -19,8 +19,7 @@ import rdf.EntityMapping;
public class DBpediaLookup {
//There are two websites of the DBpediaLookup online service.
//public static final String baseURL = "http://en.wikipedia.org/w/api.php?action=opensearch&format=xml&limit=10&search=";
//public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString=";
public static final String baseURL = "http://172.31.222.72:1234/api/search/KeywordSearch?MaxHits=5&QueryString=";
public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString=";
public HttpClient ctripHttpClient = null;


+ 0
- 2
src/qa/mapping/EntityFragmentDict.java View File

@@ -2,8 +2,6 @@ package qa.mapping;

import java.util.HashMap;

//import lcn.EntityFragmentFields;
//import qa.Globals;
import fgmt.EntityFragment;

public class EntityFragmentDict {


+ 8
- 242
src/qa/parsing/BuildQueryGraph.java View File

@@ -1,11 +1,9 @@
package qa.parsing;

//import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.PriorityQueue;
import java.util.Queue;

import fgmt.EntityFragment;
@@ -17,16 +15,15 @@ import qa.Globals;
import qa.extract.*;
import qa.mapping.SemanticItemMapping;
import rdf.PredicateMapping;
import rdf.SemanticQueryGraph;
import rdf.Triple;
import rdf.SemanticRelation;
import rdf.SimpleRelation;
import rdf.SemanticUnit;
//import paradict.ParaphraseDictionary;

/*
* The core class to build query graph, i.e, to generate SPARQL queries.
* */
/**
* Core class to build query graph, i.e, to generate SPARQL queries.
* @author husen
*/
public class BuildQueryGraph
{
public ArrayList<SemanticUnit> semanticUnitList = new ArrayList<SemanticUnit>();
@@ -48,7 +45,8 @@ public class BuildQueryGraph
whList.add("how");
whList.add("where");
// Bad words for NODE. (base form)
// Bad words for NODE. (base form)
// We will train a node recognition model to replace such heuristic rules further.
stopNodeList.add("list");
stopNodeList.add("give");
stopNodeList.add("show");
@@ -92,7 +90,7 @@ public class BuildQueryGraph
{
stopNodeList.add("area");
}
//how much is the total population of european union?
//how much is the total population of european union?
if(qStr.contains("how much"))
{
stopNodeList.add("population");
@@ -202,7 +200,7 @@ public class BuildQueryGraph
continue;
}
//Notice, the following codes guarantee all possible edges (allow CIRCLE).
//Notice, the following code guarantee all possible edges (allow CIRCLE).
//Otherwise, NO CIRCLE, and the structure may be different by changing target.
if(Globals.evaluationMethod > 1)
{
@@ -279,209 +277,6 @@ public class BuildQueryGraph
return semanticUnitList;
}
/*
* For experiment.
*/
public ArrayList<SemanticUnit> getNodeList(QueryLogger qlog, DependencyTree ds)
{
semanticUnitList = new ArrayList<SemanticUnit>();
// For ComplexQuestions or WebQuestions, only consider wh-word and at most two entities.
if(Globals.runningBenchmark.equals("CQ") || Globals.runningBenchmark.equals("WQ"))
{
// DependencyTreeNode target = ds.nodesList.get(0);
// if(Globals.runningBenchmark.equals("CQ"))
// target = detectTargetForCQ(ds, qlog);
// qlog.target = target.word;
// qlog.SQGlog += "++++ Target detect: "+target+"\n";
//
// detectTopicConstraint(qlog);
// semanticUnitList.add(new SemanticUnit(qlog.target, false)); //Set variable to object
// if(topicEnt != null)
// {
// semanticUnitList.add(new SemanticUnit(topicEnt, true)); //Set entity to subject
// }
// if(constraintEnt != null)
// {
// semanticUnitList.add(new SemanticUnit(constraintEnt, true)); //Set entity to subject
// }
}
// For general cases (e.g, QALD), consider internal variables.
else
{
for(DependencyTreeNode dtn: ds.nodesList)
{
if(isNodeWoCorefRe(dtn)) // ! Omit the coreference resolution rules !
{
semanticUnitList.add(new SemanticUnit(dtn.word, true)); //No prefer subject (default is true)
}
}
}
return semanticUnitList;
}
/*
* (For Experiment) Build query graph using STATE TRANSITION method based on 4 operations (with 4 conditions).
* 1. Condition for Connect operation: do and must do | no other nodes on simple path in DS tree.
* 2. Condition for Merge operation: do and must do | heuristic rules of CoReference Resolution.
* 3. Condition for Fold operation: do or not do | no matches of low confidence of an edge.
* 4. Condition for Expand operation: do and must do | has corresponding information.
* */
public ArrayList<SemanticUnit> processEXP(QueryLogger qlog)
{
//0) Fix stop words
DependencyTree ds = qlog.s.dependencyTreeStanford;
if(qlog.isMaltParserUsed)
ds = qlog.s.dependencyTreeMalt;
fixStopWord(qlog, ds);
//1) Detect Modifier/Modified
//rely on sentence (rather than dependency tree)
//with some ADJUSTMENT (eg, ent+noun(noType&&noEnt) -> noun.omitNode=TRUE)
for(Word word: qlog.s.words)
getTheModifiedWordBySentence(qlog.s, word); //Find continuous modifier
for(Word word: qlog.s.words)
getDiscreteModifiedWordBySentence(qlog.s, word); //Find discrete modifier
for(Word word: qlog.s.words)
if(word.modifiedWord == null) //Other words modify themselves. NOTICE: only can be called after detecting all modifier.
word.modifiedWord = word;
//print log
for(Word word: qlog.s.words)
{
if(word.modifiedWord != null && word.modifiedWord != word)
{
modifierList.add(word);
qlog.SQGlog += "++++ Modify detect: "+word+" --> " + word.modifiedWord + "\n";
}
}
//2) Detect target & 3) Coreference resolution
DependencyTreeNode target = detectTarget(ds,qlog);
qlog.SQGlog += "++++ Target detect: "+target+"\n";
if(target == null)
return null;
qlog.target = target.word;
// !target can NOT be entity. (except general question)| which [city] has most people?
if(qlog.s.sentenceType != SentenceType.GeneralQuestion && target.word.emList!=null)
{
//Counter example:Give me all Seven_Wonders_of_the_Ancient_World | (in fact, it not ENT, but CATEGORY, ?x subject Seve...)
target.word.mayEnt = false;
target.word.emList.clear();
}
try
{
// step1: get node list
semanticUnitList = getNodeList(qlog, ds);
if(semanticUnitList == null || semanticUnitList.isEmpty())
{
qlog.SQGlog += "ERROR: no nodes found.";
return null;
}
// step2: extract all potential relations
long t = System.currentTimeMillis();
System.out.println("Potential Relation Extraction start ...");
extractPotentialSemanticRelations(semanticUnitList, qlog);
qlog.timeTable.put("BQG_relation", (int)(System.currentTimeMillis()-t));
// setp3: build query graph structure by 4 operations
t = System.currentTimeMillis();
SemanticQueryGraph bestSQG = null;
if(Globals.usingOperationCondition)
{
//TODO: use operation condition
}
else
{
// for experiment, do not use conditions.
PriorityQueue<SemanticQueryGraph> QGs = new PriorityQueue<SemanticQueryGraph>();
HashSet<SemanticQueryGraph> visited = new HashSet<>();
//Initial state: all nodes isolated.
SemanticQueryGraph head = new SemanticQueryGraph(semanticUnitList);
QGs.add(head);
while(!QGs.isEmpty())
{
head = QGs.poll();
visited.add(head);
//Judge: is it a final state?
if(head.isFinalState())
{
bestSQG = head;
break; // now we just find the top-1 SQG
}
//SQG generation
//Connect (enumerate)
for(SemanticUnit u: head.semanticUnitList)
for(SemanticUnit v: head.semanticUnitList)
if(!u.equals(v) && !u.neighborUnitList.contains(v) && !v.neighborUnitList.contains(u))
{
SemanticQueryGraph tail = new SemanticQueryGraph(head);
tail.connect(u, v);
if(!QGs.contains(tail) && !visited.contains(tail))
{
tail.calculateScore(qlog.potentialSemanticRelations);
QGs.add(tail);
}
}
//Merge (coref resolution)
if(head.semanticUnitList.size() > 2)
for(SemanticUnit u: head.semanticUnitList)
for(SemanticUnit v: head.semanticUnitList)
if(!u.equals(v) && (!u.neighborUnitList.contains(v) && !v.neighborUnitList.contains(u)) || (u.neighborUnitList.contains(v) && v.neighborUnitList.contains(u)))
{
SemanticQueryGraph tail = new SemanticQueryGraph(head);
tail.merge(u, v);
if(!QGs.contains(tail) && !visited.contains(tail))
{
tail.calculateScore(qlog.potentialSemanticRelations);
QGs.add(tail);
}
}
}
}
qlog.timeTable.put("BQG_structure", (int)(System.currentTimeMillis()-t));
//Relation Extraction by potentialSR
qlog.semanticUnitList = new ArrayList<SemanticUnit>();
qlog.semanticRelations = bestSQG.semanticRelations;
semanticUnitList = bestSQG.semanticUnitList;
matchRelation(semanticUnitList, qlog);
//Prepare for item mapping
TypeRecognition.AddTypesOfWhwords(qlog.semanticRelations); // Type supplementary
TypeRecognition.constantVariableRecognition(qlog.semanticRelations, qlog); // Constant or Variable, embedded triples
//(just for display)
recordOriginalTriples(semanticUnitList, qlog);
//step3: item mapping & top-k join
t = System.currentTimeMillis();
SemanticItemMapping step5 = new SemanticItemMapping();
step5.process(qlog, qlog.semanticRelations); //top-k join (generate SPARQL queries), disambiguation
qlog.timeTable.put("BQG_topkjoin", (int)(System.currentTimeMillis()-t));
//step6: implicit relation [modify word]
t = System.currentTimeMillis();
ExtractImplicitRelation step6 = new ExtractImplicitRelation();
step6.supplementTriplesByModifyWord(qlog);
qlog.timeTable.put("BQG_implicit", (int)(System.currentTimeMillis()-t));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return semanticUnitList;
}
public void extractPotentialSemanticRelations(ArrayList<SemanticUnit> semanticUnitList, QueryLogger qlog)
{
ExtractRelation er = new ExtractRelation();
@@ -771,35 +566,6 @@ public class BuildQueryGraph
return false;
}
/*
* Judge nodes strictly.
* For EXP, do not use COREF resolution rules.
* */
public boolean isNodeWoCorefRe(DependencyTreeNode cur)
{
if(stopNodeList.contains(cur.word.baseForm))
return false;
if(cur.word.omitNode)
return false;
// Modifier can NOT be node (They may be added in query graph in the end) e.g., Queen Elizabeth II,Queen(modifier)
if(modifierList.contains(cur.word))
return false;
// NOUN
if(cur.word.posTag.startsWith("N"))
return true;

// Wh-word
if(whList.contains(cur.word.baseForm))
return true;
if(cur.word.mayEnt || cur.word.mayType || cur.word.mayCategory)
return true;
return false;
}
public DependencyTreeNode detectTarget(DependencyTree ds, QueryLogger qlog)
{
visited.clear();


+ 0
- 2
src/qa/parsing/QuestionParsing.java View File

@@ -1,7 +1,5 @@
package qa.parsing;

import org.maltparser.core.exception.MaltChainedException;

import log.QueryLogger;
import nlp.ds.DependencyTree;
import nlp.ds.DependencyTreeNode;


+ 0
- 1
src/rdf/SemanticQueryGraph.java View File

@@ -4,7 +4,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;

import qa.Globals;
import nlp.ds.Word;

public class SemanticQueryGraph implements Comparable<SemanticQueryGraph>


+ 1
- 1
src/rdf/SimpleRelation.java View File

@@ -65,7 +65,7 @@ public class SimpleRelation {
}
sumSelectivity = matchingScore*sumSelectivity*pidsup.support;
int pid = pidsup.predicateID;
if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; //����dbo�е�predicate //pid ���ܲ��� dbo �У�
if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5;
if (!pasList.containsKey(pid))
pasList.put(pid, sumSelectivity);


+ 5
- 68
src/rdf/Sparql.java View File

@@ -2,13 +2,9 @@ package rdf;

import java.util.ArrayList;
import java.util.Collections;
//import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;

import log.QueryLogger;
import nlp.ds.Sentence;
import nlp.ds.Sentence.SentenceType;
import qa.Globals;

public class Sparql implements Comparable<Sparql>
@@ -137,21 +133,21 @@ public class Sparql implements Comparable<Sparql>
}
// part2: triples
ret += " where\n{\n";
ret += " where { ";
for(Triple t : tripleList)
{
if (!t.object.equals("literal_HRZ")) { // need not display literal
ret += t.toStringForGStore();
ret += " .\n";
ret += ". ";
}
}
ret += "}\n";
ret += "} ";
// part3: order by / group by ...
if(moreThanStr != null)
ret += moreThanStr+"\n";
ret += moreThanStr+" ";
if(mostStr != null)
ret += mostStr+"\n";
ret += mostStr+" ";
// part4: limit
if(queryType != QueryType.Ask && (mostStr == null || !mostStr.contains("LIMIT")))
@@ -159,64 +155,6 @@ public class Sparql implements Comparable<Sparql>
return ret;
}
//Use to execute (select all variables; format 'aggregation' and 'ask')
public String toStringForVirtuoso()
{
String ret = "";
HashSet<String> variables = new HashSet<String>();
// prefix
if (queryType==QueryType.Ask)
ret += "ask where";
else if(countTarget)
ret += ("select COUNT(DISTINCT " + questionFocus + ") where");
else
{
// AGG: select question focus
if(moreThanStr != null || mostStr != null)
ret += ("select DISTINCT " + questionFocus + " where");
// BGP: select all variables
else
{
for (Triple t: tripleList)
{
if (!t.isSubjConstant()) variables.add(t.subject.replaceAll(" ", "_"));
if (!t.isObjConstant()) variables.add(t.object.replaceAll(" ", "_"));
}
ret += "select ";
for (String v : variables)
ret += v + " ";
ret += "where";
}
}
ret += "\n{\n";
if(variables.size() == 0)
variables.add(questionFocus);
// triples
for (Triple t : tripleList)
{
if (!t.object.equals("literal_HRZ")) {
ret += t.toStringForGStore();
ret += " .\n";
}
}
ret += "}\n";
// suffix
if(moreThanStr != null)
{
ret += moreThanStr+"\n";
}
if(mostStr != null)
{
ret += mostStr+"\n";
}
return ret;
}
public int getVariableNumber()
{
@@ -258,7 +196,6 @@ public class Sparql implements Comparable<Sparql>
public boolean equals(Object spq)
{
Sparql tempSparql= (Sparql) spq;
String s1 = this.toStringForGStore2(), s2 = tempSparql.toStringForGStore2();
if(this.toStringForGStore2().equals(tempSparql.toStringForGStore2()))
return true;
else


+ 0
- 114
src/utils/HttpRequest.java View File

@@ -1,114 +0,0 @@
package utils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import java.util.Map;

public class HttpRequest
{
public static String sendGet(String url, String param) {
String result = "";
BufferedReader in = null;
try {
String urlNameString = url + "?" + param;
URL realUrl = new URL(urlNameString);
URLConnection connection = realUrl.openConnection();
connection.setRequestProperty("accept", "*/*");
connection.setRequestProperty("connection", "Keep-Alive");
connection.setRequestProperty("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
connection.connect();
Map<String, List<String>> map = connection.getHeaderFields();
for (String key : map.keySet()) {
System.out.println(key + "--->" + map.get(key));
}
in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
} catch (Exception e) {
System.out.println("Error when sending GET request: " + e);
e.printStackTrace();
}
finally {
try {
if (in != null) {
in.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
return result;
}

public static String sendPost(String url, String param) {
PrintWriter out = null;
BufferedReader in = null;
String result = "";
try {
URL realUrl = new URL(url);
URLConnection conn = realUrl.openConnection();
conn.setRequestProperty("accept", "*/*");
conn.setRequestProperty("connection", "Keep-Alive");
conn.setRequestProperty("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
conn.setDoOutput(true);
conn.setDoInput(true);
out = new PrintWriter(conn.getOutputStream());
out.print(param);
out.flush();
in = new BufferedReader(
new InputStreamReader(conn.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
} catch (Exception e) {
System.out.println("Error when sending POST request: "+e);
e.printStackTrace();
}
finally{
try{
if(out!=null){
out.close();
}
if(in!=null){
in.close();
}
}
catch(IOException ex){
ex.printStackTrace();
}
}
return result;
}


public static String getPostData(InputStream in, int size, String charset) {
if (in != null && size > 0) {
byte[] buf = new byte[size];
try {
in.read(buf);
if (charset == null || charset.length() == 0)
return new String(buf);
else {
return new String(buf, charset);
}
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
}

Loading…
Cancel
Save