First commit to support Chinese QA

This is version 0.5, although it can works for Chinese QA now, it still remains a lot of work to further improve.
6 years ago · f1ef5b06cc
--- a/src/addition/AddtionalFix.java
+++ b/src/addition/AddtionalFix.java
@@ -20,16 +20,10 @@ public class AddtionalFix
 	
 	public AddtionalFix()
 	{
 		// Some category mappings for DBpedia, try automatic linking methods later. | base form
 		pattern2category.put("gangster_from_the_prohibition_era", "Prohibition-era_gangsters");
 		pattern2category.put("seven_wonder_of_the_ancient_world", "Seven_Wonders_of_the_Ancient_World");
 		pattern2category.put("three_ship_use_by_columbus", "Christopher_Columbus");
 		pattern2category.put("13_british_colony", "Thirteen_Colonies");
 	}
 	
 	public void process(QueryLogger qlog)
 	{
 		fixCategory(qlog);
 		oneTriple(qlog);
 		oneNode(qlog);
 		
@@ -48,45 +42,10 @@ public class AddtionalFix
 				spq.queryType = QueryType.Ask;
 	}
 	
 	public void fixCategory(QueryLogger qlog)
 	{
 		if(qlog == null || qlog.semanticUnitList == null)
 			return;
 		
 		String var = null, category = null;
 		for(SemanticUnit su: qlog.semanticUnitList)
 		{
 			if(su.centerWord.mayCategory)
 			{
 				var = "?"+su.centerWord.originalForm;
 				category = su.centerWord.category;
 			}
 		}
 		
 		if(category != null && var != null)
 			for(Sparql spq: qlog.rankedSparqls)
 			{
 				boolean occured = false;
 				for(Triple tri: spq.tripleList)
 				{
 					if(tri.subject.equals(var))
 					{
 						occured = true;
 						break;
 					}
 				}
 				String oName = category;
 				String pName = "subject";
 				int pid = Globals.pd.predicate_2_id.get(pName);
 				Triple triple =	new Triple(Triple.VAR_ROLE_ID, var, pid, Triple.CAT_ROLE_ID, oName, null, 100);
 				spq.addTriple(triple);
 			}
 	}
 	
 	/* recognize one-Node query 
 	 * Two cases：1、Special question|Imperative sentence	2、General question
 	 * 1-1：how many [], highest [] ...  | For single variable, add constraint (aggregation)
 	 * 1-2: What is backgammon? | What is a bipolar syndrome? | Search an entity (return itself or its type/description ...)
 	 * 1-2: 谁是狄仁杰? | What is a bipolar syndrome? | Search an entity (return itself or its type/description ...)
 	 * 1-3: Give me all Seven Wonders of the Ancient World. | Notice, "Seven Wonders of the Ancient World" should be recognized as ENT before. (in fact it is CATEGORY in DBpeida)
 	 * 2-1: Are there any [castles_in_the_United_States](yago:type)
 	 * 2-2：Was Sigmund Freud married? | Lack of variable node.
@@ -101,7 +60,7 @@ public class AddtionalFix
 		Word[] words = qlog.s.words;
 		if(qlog.s.sentenceType != SentenceType.GeneralQuestion)
 		{
 			//1-1: how many [type] are there | List all [type]
 			//1-1: 有多少[type] | 列出所有[type]
 			if(target.mayType && target.tmList != null)
 			{
 				String subName = "?"+target.originalForm;
@@ -111,10 +70,10 @@ public class AddtionalFix
 				sparql.addTriple(triple);
 				qlog.rankedSparqls.add(sparql);
 			}
 			//1-2: What is [ent]?
 			else if(target.mayEnt && target.emList != null)
 			{
 				if(words.length >= 3 && words[0].baseForm.equals("what") && words[1].baseForm.equals("be"))
 				//1-2: 什么是[ent]
 				if(words.length >= 3 && (words[0].baseForm.equals("什么") || words[0].baseForm.equals("谁")) && words[1].baseForm.equals("是"))
 				{
 					int eid = target.emList.get(0).entityID;
 					String subName = target.emList.get(0).entityName;
@@ -123,24 +82,14 @@ public class AddtionalFix
 					sparql.addTriple(triple);
 					qlog.rankedSparqls.add(sparql);
 				}
 			}
 			//1-3: Give me all Seven Wonders of the Ancient World.
 			else if(target.mayCategory && target.category != null)
 			{
 				String oName = target.category;
 				String pName = "subject";
 				int pid = Globals.pd.predicate_2_id.get(pName);
 				Triple triple =	new Triple(Triple.VAR_ROLE_ID, "?"+target.originalForm, pid, Triple.CAT_ROLE_ID, oName, null, 100);
 				Sparql sparql = new Sparql();
 				sparql.addTriple(triple);
 				qlog.rankedSparqls.add(sparql);
 				//1-3: [ent] with other relations
 			}
 		}
 		else 
 		else
 		{
 			if(target.mayEnt && target.emList != null)
 			{
 				//2-2：Was Sigmund Freud married?
 				//2-2：[ent]结婚了吗？
 				String relMention = "";
 				for(Word word: words)
 					if(word != target && !word.baseForm.equals(".") && !word.baseForm.equals("?"))
@@ -162,34 +111,6 @@ public class AddtionalFix
 					sparql.addTriple(triple);
 					qlog.rankedSparqls.add(sparql);
 				}
 		
 				//2-3：Are penguins endangered?
 				else
 				{
 					if(target.position < words.length && pattern2category.containsKey(words[target.position].baseForm))
 					{
 						String oName = pattern2category.get(words[target.position].baseForm);
 						String pName = "subject";
 						int pid = Globals.pd.predicate_2_id.get(pName);
 						int eid = target.emList.get(0).entityID;
 						String subName = target.emList.get(0).entityName;
 						Triple triple =	new Triple(eid, subName, pid, Triple.CAT_ROLE_ID, oName, null, 100);
 						Sparql sparql = new Sparql();
 						sparql.addTriple(triple);
 						qlog.rankedSparqls.add(sparql);
 					}
 				}
 			}
 			//2-1: Are there any [castles_in_the_United_States](yago:type)
 			else if(target.mayType && target.tmList != null)
 			{
 				String typeName = target.tmList.get(0).typeName;
 				String subName = "?" + target.originalForm;
 				//System.out.println("typeName="+typeName+" subName="+subName);
 				Triple triple =	new Triple(Triple.VAR_ROLE_ID, subName, Globals.pd.typePredicateID, Triple.TYPE_ROLE_ID, typeName, null, 100);
 				Sparql sparql = new Sparql();
 				sparql.addTriple(triple);
 				qlog.rankedSparqls.add(sparql);
 			}
 		}
 	}
--- a/src/fgmt/RelationFragment.java
+++ b/src/fgmt/RelationFragment.java
@@ -46,7 +46,9 @@ public class RelationFragment extends Fragment
 	
 	public static void load() throws Exception 
 	{		
 		String filename = Globals.localPath + "data/DBpedia2016/fragments/predicate_RDF_fragment/predicate_fragment.txt"; 
 		System.out.println("Loading relation IDs and Fragments ...");
 		
 		String filename = Globals.localPath + "data/pkubase/fragments/pkubase_predicate_fragment.txt"; 
 		List<String> inputs = FileUtil.readFile(filename);
 		relFragments = new HashMap<Integer, ArrayList<RelationFragment>>();
 		literalRelationSet = new HashSet<Integer>();
@@ -72,7 +74,7 @@ public class RelationFragment extends Fragment
 	
 	public static void loadId() throws IOException 
 	{
 		String filename = Globals.localPath + "data/DBpedia2016/fragments/id_mappings/16predicate_id.txt";
 		String filename = Globals.localPath + "data/pkubase/fragments/id_mappings/pkubase_predicate_id.txt";
 		List<String> inputs = FileUtil.readFile(filename);
 		relationShortName2IdList = new HashMap<String, ArrayList<Integer>>();

--- a/src/fgmt/TypeFragment.java
+++ b/src/fgmt/TypeFragment.java
@@ -19,8 +19,6 @@ public class TypeFragment extends Fragment {
 	public static HashMap<Integer, String> typeId2ShortName = null;
 	public static final int NO_RELATION = -24232;
 	
 	public static HashSet<String> yagoTypeList = null;
 	
 	public HashSet<Integer> inEdges = new HashSet<Integer>();
 	public HashSet<Integer> outEdges = new HashSet<Integer>();
 	public HashSet<Integer> entSet = new HashSet<Integer>();
@@ -33,26 +31,6 @@ public class TypeFragment extends Fragment {
 	 * 4, others: peace、vice
 	 */
 	public static ArrayList<String> stopYagoTypeList = null;
 	static void loadStopYagoTypeList()
 	{
 		stopYagoTypeList = new ArrayList<String>();
 		stopYagoTypeList.add("Amazon");
 		stopYagoTypeList.add("Earth");
 		stopYagoTypeList.add("TheHungerGames");
 		stopYagoTypeList.add("SparklingWine");
 		stopYagoTypeList.add("Type");
 		stopYagoTypeList.add("Flow");
 		stopYagoTypeList.add("Owner");
 		stopYagoTypeList.add("Series");
 		stopYagoTypeList.add("Shot");
 		stopYagoTypeList.add("Part");
 		stopYagoTypeList.add("Care");
 		stopYagoTypeList.add("Peace");
 		stopYagoTypeList.add("Vice");
 		stopYagoTypeList.add("Dodo");
 		stopYagoTypeList.add("CzechFilms");
 		stopYagoTypeList.add("ChineseFilms");
 	}
 	
 	public TypeFragment(String fgmt, int fid) 
 	{
@@ -100,7 +78,7 @@ public class TypeFragment extends Fragment {
 	
 	public static void load() throws Exception 
 	{	
 		String filename = Globals.localPath+"data/DBpedia2016/fragments/class_RDF_fragment/16type_fragment.txt"; 
 		String filename = Globals.localPath+"data/pkubase/fragments/pkubase_type_fragment.txt"; 
 		
 		File file = new File(filename);
 		InputStreamReader in = new InputStreamReader(new FileInputStream(file),"utf-8");
@@ -128,14 +106,13 @@ public class TypeFragment extends Fragment {
 		// can fix some data there
 		// load Type Id
 		loadId();
 		System.out.println("Load "+typeId2ShortName.size()+" basic types and "+yagoTypeList.size()+" yago types.");
 		System.out.println("Load "+typeId2ShortName.size()+" basic types.");
 	}
 	
 	public static void loadId() throws IOException 
 	{
 		String filename = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16basic_types_id.txt";
 		String yagoFileName = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16yago_types_list.txt";

 		String filename = Globals.localPath+"data/pkubase/fragments/id_mappings/pkubase_type_id.txt";
 		
 		File file = new File(filename);
 		InputStreamReader in = new InputStreamReader(new FileInputStream(file),"utf-8");
 		BufferedReader br = new BufferedReader(in);
@@ -161,19 +138,5 @@ public class TypeFragment extends Fragment {
 		typeId2ShortName.put(RelationFragment.literalTypeId, "literal_HRZ");
 		
 		br.close();
 		
 		//load YAGO types
 		in = new InputStreamReader(new FileInputStream(yagoFileName),"utf-8");
 		br = new BufferedReader(in);
 		yagoTypeList = new HashSet<String>();
 		while((line = br.readLine())!=null)
 		{
 			String[] lines = line.split("\t");
 			String typeName = lines[0];
 			yagoTypeList.add(typeName);
 		}
 		
 		loadStopYagoTypeList();
 		yagoTypeList.removeAll(stopYagoTypeList);
 	}
 }
--- a/src/lcn/BuildIndexForEntityFragments.java
+++ b/src/lcn/BuildIndexForEntityFragments.java
@@ -1,119 +0,0 @@
 package lcn;

 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.util.Date;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;

 import qa.Globals;


 public class BuildIndexForEntityFragments{
 	public void indexforentity() throws Exception
 	{
 		if(EntityFragmentFields.entityId2Name == null)
 			EntityFragmentFields.load();
 		
 		long startTime = new Date().getTime();
 		
 		//Try update KB index to DBpedia2015. by husen 2016-04-08
 		//Try update KB index to DBpedia2016. by husen 2018-8-22
 		File indexDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/entity_fragment_index");
 		File sourceDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt");
 		
 		Analyzer luceneAnalyzer_en = new StandardAnalyzer();  
 		IndexWriter indexWriter_en = new IndexWriter(indexDir_en, luceneAnalyzer_en,true); 
 		
 		int mergeFactor = 100000;    //default 10
 		int maxBufferedDoc = 1000;   //default 10
 		int maxMergeDoc = Integer.MAX_VALUE;  //INF
 		
 		//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
 		indexWriter_en.setMergeFactor(mergeFactor);
 		indexWriter_en.setMaxBufferedDocs(maxBufferedDoc);
 		indexWriter_en.setMaxMergeDocs(maxMergeDoc);		
 		
 		
 		FileInputStream file = new FileInputStream(sourceDir_en);		
 		InputStreamReader in = new InputStreamReader(file,"UTF-8");	
 		BufferedReader br = new BufferedReader(in);		
 		
 		int count = 0;
 		while(true)
 		{			
 			String _line = br.readLine();
 			{
 				if(_line == null) break;
 			}
 			count++;
 			if(count % 100000 == 0)
 				System.out.println(count);				
 			
 			String line = _line;		
 			String temp[] = line.split("\t");
 			
 			if(temp.length != 2)
 				continue;
 			else
 			{
 				int entity_id = Integer.parseInt(temp[0]);
 				if(!EntityFragmentFields.entityId2Name.containsKey(entity_id))
 					continue;
 				
 				String entity_name = EntityFragmentFields.entityId2Name.get(entity_id);
 				String entity_fragment = temp[1];
 				entity_name = entity_name.replace("____", " ");
 				entity_name = entity_name.replace("__", " ");
 				entity_name = entity_name.replace("_", " ");
 			
 					
 				Document document = new Document(); 
 				
 				Field EntityName = new Field("EntityName", entity_name, Field.Store.YES,
 						Field.Index.TOKENIZED,
 						Field.TermVector.WITH_POSITIONS_OFFSETS);	
 				Field EntityId = new Field("EntityId", String.valueOf(entity_id),
 						Field.Store.YES, Field.Index.NO);
 				Field EntityFragment = new Field("EntityFragment", entity_fragment,
 						Field.Store.YES, Field.Index.NO);
 				
 				document.add(EntityName);
 				document.add(EntityId);
 				document.add(EntityFragment);
 				indexWriter_en.addDocument(document);
 			}			
 		}
 		
 		indexWriter_en.optimize();
 		indexWriter_en.close();
 		br.close();

 		// input the time of Build index
 		long endTime = new Date().getTime();
 		System.out.println("entity_name index has build ->" + count + " " + "Time:" + (endTime - startTime));
 	}
 	
 	public static void main(String[] args)
 	{
 		BuildIndexForEntityFragments bef = new BuildIndexForEntityFragments();
 		
 		try
 		{
 			Globals.localPath="D:/husen/gAnswer/";
 			bef.indexforentity();
 		}
 		catch (Exception e) 
 		{
 			e.printStackTrace();
 		}
 	}
 }


--- a/src/lcn/BuildIndexForTypeShortName.java
+++ b/src/lcn/BuildIndexForTypeShortName.java
@@ -1,107 +0,0 @@
 package lcn;

 import java.io.File;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;

 import qa.Globals;
 import fgmt.TypeFragment;

 public class BuildIndexForTypeShortName {
 	public static void buildIndex(HashMap<String, ArrayList<Integer>> typeShortName2IdList) throws Exception
 	{
 		long startTime = new Date().getTime();
 		File indexDir_li = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/type_fragment_index");
 		
 		Analyzer luceneAnalyzer_li = new StandardAnalyzer();  
 		IndexWriter indexWriter_li = new IndexWriter(indexDir_li, luceneAnalyzer_li,true); 
 		
 		int mergeFactor = 100000;
 		int maxBufferedDoc = 1000;
 		int maxMergeDoc = Integer.MAX_VALUE;
 		
 		//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
 		indexWriter_li.setMergeFactor(mergeFactor);
 		indexWriter_li.setMaxBufferedDocs(maxBufferedDoc);
 		indexWriter_li.setMaxMergeDocs(maxMergeDoc);
 		
 		int count = 0;
 		Iterator<String> it = typeShortName2IdList.keySet().iterator();
 		while (it.hasNext()) 
 		{
 			String sn = it.next();
 			if (sn.length() == 0) {
 				continue;
 			}
 			
 			count ++;
 		
 			StringBuilder splittedSn = new StringBuilder("");
 			
 			if(sn.contains("_"))
 			{
 				String nsn = sn.replace("_", " ");
 				splittedSn.append(nsn.toLowerCase());
 			}
 			else
 			{
 				int last = 0, i = 0;
 				for(i = 0; i < sn.length(); i ++) 
 				{
 					// if it were not a small letter, then break it.
 					if(!(sn.charAt(i)>='a' && sn.charAt(i)<='z')) 
 					{
 						splittedSn.append(sn.substring(last, i).toLowerCase());
 						splittedSn.append(' ');
 						last = i;
 					}
 				}
 				splittedSn.append(sn.substring(last, i).toLowerCase());
 				while(splittedSn.charAt(0) == ' ') {
 					splittedSn.deleteCharAt(0);
 				}
 			}
 			
 			System.out.println("SplitttedType: "+splittedSn);
 			
 			Document document = new Document(); 

 			Field SplittedTypeShortName = new Field("SplittedTypeShortName", splittedSn.toString(), 
 					Field.Store.YES,
 					Field.Index.TOKENIZED,
 					Field.TermVector.WITH_POSITIONS_OFFSETS);			
 			Field TypeShortName = new Field("TypeShortName", sn,
 					Field.Store.YES, Field.Index.NO);
 			
 			document.add(SplittedTypeShortName);
 			document.add(TypeShortName);
 			indexWriter_li.addDocument(document);	
 		}
 				
 		indexWriter_li.optimize();
 		indexWriter_li.close();

 		// input the time of Build index
 		long endTime = new Date().getTime();
 		System.out.println("TypeShortName index has build ->" + count + " " + "Time:" + (endTime - startTime));
 	}
 	
 	public static void main (String[] args) {
 		try {
 			Globals.localPath="D:/husen/gAnswer/";
 			TypeFragment.load();
 			BuildIndexForTypeShortName.buildIndex(TypeFragment.typeShortName2IdList);
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
 	}

 }
--- a/src/lcn/EntityFragmentFields.java
+++ b/src/lcn/EntityFragmentFields.java
@@ -5,9 +5,13 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;

 import fgmt.EntityFragment;
 import qa.Globals;
 import utils.FileUtil;

 public class EntityFragmentFields {
 		
@@ -18,8 +22,8 @@ public class EntityFragmentFields {
 	
 	public static void load() throws IOException 
 	{
 		String filename = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16entity_id.txt";
 		String fragmentFileName = Globals.localPath+"data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt";
 		String filename = Globals.localPath+"data/pkubase/fragments/id_mappings/pkubase_entity_id.txt";
 		String fragmentFileName = Globals.localPath+"data/pkubase/fragments/pkubase_entity_fragment.txt";
 		File file = new File(filename);
 		BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf-8"));

@@ -35,7 +39,7 @@ public class EntityFragmentFields {
 		while((line = br.readLine()) != null) 
 		{
 			String[] lines = line.split("\t");
 			String entName = lines[0].substring(1, lines[0].length()-1);
 			String entName = lines[0].trim().substring(1, lines[0].length()-1);
 	
 			entityName2Id.put(entName, Integer.parseInt(lines[1]));	
 			entityId2Name.put(Integer.parseInt(lines[1]), entName);
@@ -61,4 +65,41 @@ public class EntityFragmentFields {
 		
 		br.close();
 	}
 	
 	public static void genmini()
 	{
 		String filename = Globals.localPath+"data/pkubase/fragments/id_mappings/pkuentity_id.txt";
 		String fragmentFileName = Globals.localPath+"data/pkubase/fragments/pkubase_entity_fragment_mini.txt";
 		List<String> fragments = FileUtil.readFile(fragmentFileName);
 		ArrayList<Integer> eids = new ArrayList<Integer>();
 		for(String fragment: fragments)
 		{
 			int eid = Integer.parseInt(fragment.split("\t")[0]);
 			String fgmt = fragment.split("\t")[1];
 			EntityFragment ef = new EntityFragment(eid, fgmt);
 			eids.add(eid);
 			for(int ent: ef.inEntMap.keySet())
 			{
 				eids.add(ent);
 			}
 			for(int ent: ef.outEntMap.keySet())
 			{
 				eids.add(ent);
 			}
 		}
 		System.out.println(eids.size());
 		System.out.println("Loading entity id ...");
 		List<String> data = FileUtil.readFile(filename);
 		for(String line: data)
 		{
 			String[] lines = line.split("\t");
 			int eid = Integer.parseInt(lines[1]);
 			if(eids.contains(eid))
 				System.out.println(line);
 		}
 	}
 	
 	public static void main(String[] args) {
 		EntityFragmentFields.genmini();
 	}
 }
--- a/src/log/QueryLogger.java
+++ b/src/log/QueryLogger.java
@@ -12,7 +12,6 @@ import qa.Query;
 import rdf.EntityMapping;
 import rdf.SemanticRelation;
 import rdf.Sparql;
 import rdf.MergedWord;
 import rdf.SemanticUnit;
 import qa.Answer;
 import nlp.ds.Sentence;
@@ -30,10 +29,8 @@ public class QueryLogger {
 	public boolean MODE_debug = false;
 	public boolean MODE_log = true;
 	public boolean MODE_fragment = true;
 	public boolean isMaltParserUsed = true;	// Notice, we utilize Malt Parser as default parser, which is different from the older version. TODO: some coref rules need changed to fit Malt Parser.
 	
 	public boolean isMaltParserUsed = false;	// MaltParser id deprecated.
 	public HashMap<String, Integer> timeTable = null;
 	public ArrayList<MergedWord> mWordList = null;
 	public ArrayList<SemanticUnit> semanticUnitList = null;
 	public HashMap<Integer, SemanticRelation> semanticRelations = null;
 	public HashMap<Integer, SemanticRelation> potentialSemanticRelations = null;
@@ -48,7 +45,6 @@ public class QueryLogger {
 	{
 		timeTable = new HashMap<String, Integer>();
 		rankedSparqls = new ArrayList<Sparql>();
 		mWordList = query.mWordList;
 	}
 	
 	public void reloadSentence(Sentence sentence)
--- a/src/nlp/ds/DependencyTree.java
+++ b/src/nlp/ds/DependencyTree.java
@@ -6,75 +6,37 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Stack;

 import nlp.tool.CoreNLP;
 import nlp.tool.MaltParser;
 import nlp.tool.StanfordParser;

 import org.maltparser.core.exception.MaltChainedException;
 import org.maltparser.core.syntaxgraph.DependencyStructure;
 import org.maltparser.core.syntaxgraph.node.DependencyNode;

 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.ling.IndexedWord;
 import edu.stanford.nlp.ling.SentenceUtils;
 import edu.stanford.nlp.trees.GrammaticalStructure;
 import edu.stanford.nlp.trees.TypedDependency;
 import edu.stanford.nlp.trees.semgraph.SemanticGraph;

 public class DependencyTree {
 	public DependencyTreeNode root = null;
 	public ArrayList<DependencyTreeNode> nodesList = null;
 	
 	public SemanticGraph dependencies = null;	// Method 1: CoreNLP (discarded)
 	public GrammaticalStructure gs = null;		// Method 2: Stanford Parser
 	public DependencyStructure maltGraph = null;	// Method 3: MaltParser
 //	public GrammaticalStructure gs = null;		// Method 2: Stanford Parser
 	
 	public HashMap<String, ArrayList<DependencyTreeNode>> wordBaseFormIndex = null;
 	
 	public DependencyTree (Sentence sentence, CoreNLP coreNLPparser) {
 		SemanticGraph dependencies = coreNLPparser.getBasicDependencies(sentence.plainText);
 		this.dependencies = dependencies;
 		
 		Stack<IndexedWord> stack = new Stack<IndexedWord>();
 		IndexedWord iwRoot = dependencies.getFirstRoot();
 		
 		HashMap<IndexedWord, DependencyTreeNode> map = new HashMap<IndexedWord, DependencyTreeNode>();
 		nodesList = new ArrayList<DependencyTreeNode>();

 		stack.push(iwRoot);
 		root = this.setRoot(sentence.getWordByIndex(iwRoot.index()));
 		map.put(iwRoot, root);

 		while (!stack.empty())
 		{
 			IndexedWord curIWNode = stack.pop();
 			DependencyTreeNode curDTNode = map.get(curIWNode);
 			
 			for (IndexedWord iwChild : dependencies.getChildList(curIWNode)) {
 				Word w = sentence.getWordByIndex(iwChild.index());
 				DependencyTreeNode newDTNode = this.insert(
 						curDTNode, 
 						w, 
 						dependencies.reln(curIWNode, iwChild).getShortName());
 				map.put(iwChild, newDTNode);
 				stack.push(iwChild);
 			}
 			
 			curDTNode.sortChildrenList();
 			nodesList.add(curDTNode);
 		}
 	}
 	
 	public DependencyTree (Sentence sentence, StanfordParser stanfordParser) {
 		this.gs = stanfordParser.getGrammaticalStructure(sentence.plainText);
 		
 	
 		HashMap<Integer, DependencyTreeNode> map = new HashMap<Integer, DependencyTreeNode>();
 		nodesList = new ArrayList<DependencyTreeNode>();
 		
 		List<TypedDependency> tdl = gs.typedDependencies(false);
 //	    String[] sent = { "这", "是", "一个", "简单", "的", "句子", "。" };
 	    String[] sent = sentence.getWordsArr();
 	    List<CoreLabel> rawWords = SentenceUtils.toCoreLabelList(sent);
 		List<TypedDependency> tdl = stanfordParser.getTypedDependencyList(rawWords);
 		
 		// 1. generate all nodes.
 	    for (TypedDependency td : tdl) {
 	    	// gov
 	    	if (!map.containsKey(td.gov().index()) && !td.reln().getShortName().equals("root")) {
 	    		Word w = sentence.getWordByIndex(td.gov().index());
 	    		w.posTag = td.gov().tag();	// POS TAG
 	    		DependencyTreeNode newNode = new DependencyTreeNode(w);
 	    		map.put(td.gov().index(), newNode);
 	    		nodesList.add(newNode);
@@ -82,6 +44,7 @@ public class DependencyTree {
 	    	// dep
 	    	if (!map.containsKey(td.dep().index())) {
 	    		Word w = sentence.getWordByIndex(td.dep().index());
 	    		w.posTag = td.dep().tag(); // POS TAG
 	    		DependencyTreeNode newNode = new DependencyTreeNode(w);
 	    		map.put(td.dep().index(), newNode);
 	    		nodesList.add(newNode);    		
@@ -118,139 +81,9 @@ public class DependencyTree {
 	    	}
 	    }
 	    Collections.sort(nodesList, new DependencyTreeNodeComparator()); 
 	    for (DependencyTreeNode dtn : nodesList) {
 	    	dtn.linkNN(this);
 	    }
 	}
 	
 	public DependencyTree (Sentence sentence, MaltParser maltParser)throws MaltChainedException {
 		try {
 			// the tokens are parsed in the following line
 			DependencyStructure graph = maltParser.getDependencyStructure(sentence);
 			this.maltGraph = graph;
 			//System.out.println(graph);
 			
 			HashMap<Integer, DependencyTreeNode> map = new HashMap<Integer, DependencyTreeNode>();
 			ArrayList<DependencyTreeNode> list = new ArrayList<DependencyTreeNode>();
 			Stack<DependencyNode> stack = new Stack<DependencyNode>();
 			DependencyNode nroot = graph.getDependencyRoot();
 			stack.add(nroot);
 			// 1. generate all nodes.
 			while (!stack.isEmpty()) {
 				DependencyNode n = stack.pop();
 				DependencyNode sib = n.getRightmostDependent();
 				int key = n.getIndex();
 				//System.out.println("[current node][key="+key+"] "+n+" <"+n.getHeadEdge()+">");
 				boolean flag = true;
 				while (sib != null) {
 					flag = false;
 					stack.push(sib);
 					sib = sib.getLeftSibling();
 				}
 				if (flag) {
 					sib = n.getLeftmostDependent();
 					while (sib != null) {
 						stack.push(sib);
 						sib = sib.getRightSibling();
 					}
 				}
 				if (n.hasHead() && !map.containsKey(key)) {
 					//String snode = n.toString(); 
 					String sedge = n.getHeadEdge().toString();
 					//System.out.println("[" + snode + "]  <" + sedge + ">");

 					/*int position = 0;
 					String wordOriginal = null;
 					String wordBase;
 					String postag = null;*/
 					String dep = null;					
 					int idx1, idx2;
 					
 					/*// position
 					idx1 = snode.indexOf("ID:")+3;
 					idx2 = snode.indexOf(' ', idx1);
 					position = Integer.parseInt(snode.substring(idx1, idx2));
 					
 					// word
 					idx1 = snode.indexOf("FORM:", idx2)+5;
 					idx2 = snode.indexOf(' ', idx1);
 					wordOriginal = snode.substring(idx1, idx2);
 					wordBase = Globals.coreNLP.getBaseFormOfPattern(wordOriginal.toLowerCase());
 					
 					// postag
 					idx1 = snode.indexOf("POSTAG:", idx2)+7;
 					idx2 = snode.indexOf(' ', idx1);
 					postag = snode.substring(idx1, idx2);*/
 					
 					// dep
 					idx1 = sedge.lastIndexOf(':')+1;
 					idx2 = sedge.lastIndexOf(' ');
 					dep = sedge.substring(idx1, idx2);
 					if (dep.equals("null")) {
 						dep = null;
 					}
 					else if (dep.equals("punct")) {// No consider about punctuation
 						continue;
 					}
 					
 		    		DependencyTreeNode newNode = new DependencyTreeNode(sentence.getWordByIndex(key));
 		    		newNode.dep_father2child = dep;
 		    		map.put(key, newNode);
 		    		list.add(newNode);
 				}
 			}
 			
 			
 		    // 2. add edges
 		    for (Integer k : map.keySet()) {
 		    	DependencyNode n = graph.getDependencyNode(k);
 		    	DependencyTreeNode dtn = map.get(k);
 		    	if (dtn.dep_father2child == null) {
 		    		this.setRoot(dtn);
 		    		this.root.levelInTree = 0;
 		    		this.root.dep_father2child = "root";
 		    	}
 		    	else {
 			    	DependencyTreeNode father = map.get(n.getHead().getIndex());
 			    	DependencyTreeNode child = map.get(n.getIndex());
 			    	child.father = father;
 			    	father.childrenList.add(child);
 		    	}
 		    }
 		    
 		    // Fix the tree for some cases.
 		    if(list.size() > 11)
 		    {
 		    	DependencyTreeNode dt1 = list.get(11), dt2 = list.get(5);
 		    	if(dt1!=null && dt2!=null && dt1.word.baseForm.equals("star") && dt1.father.word.baseForm.equals("be"))
 		    	{
 	    			if (dt2.word.baseForm.equals("film") || dt2.word.baseForm.equals("movie")) 
 	    			{
 	    				dt1.father.childrenList.remove(dt1);
 	    				dt1.father = dt2;
 	    				dt2.childrenList.add(dt1);
 	    			}
 		    	}
 		    }
 		    
 		    // add levelInTree, sort childrenList & nodesList
 		    for (DependencyTreeNode dtn : list) {
 		    	if (dtn.father != null) {	    	
 			    	dtn.levelInTree = dtn.father.levelInTree + 1;
 			    	dtn.sortChildrenList();
 		    	}		    	
 		    }
 		    
 		    nodesList = list;
 		    Collections.sort(nodesList, new DependencyTreeNodeComparator());	
 		    for (DependencyTreeNode dtn : nodesList) {
 		    	dtn.linkNN(this);
 		    }
 		} catch (MaltChainedException e) {
 			//e.printStackTrace();
 			//System.err.println("MaltParser exception: " + e.getMessage());
 			throw e;
 		}
 //	    for (DependencyTreeNode dtn : nodesList) {
 //	    	dtn.linkNN(this);
 //	    }
 	}
 	
 	public DependencyTreeNode setRoot(Word w) {
--- a/src/nlp/ds/Sentence.java
+++ b/src/nlp/ds/Sentence.java
@@ -2,10 +2,10 @@ package nlp.ds;

 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;

 import qa.Globals;
 import qa.Query;
 import rdf.MergedWord;

 public class Sentence {
 	public String plainText = null;
@@ -18,40 +18,64 @@ public class Sentence {
 	public enum SentenceType {SpecialQuestion,GeneralQuestion,ImperativeSentence}
 	public SentenceType sentenceType = SentenceType.SpecialQuestion;
 	
 	public Sentence (String s) 
 //	public Sentence (String s) 
 //	{
 //		plainText = s;
 //		words = Globals.coreNLP.getTaggedWords(plainText);
 //		map = new HashMap<String, Word>();
 //		for (Word w : words)
 //			map.put(w.key, w);
 //	}
 	
 	// for tokenized sentence
 	public Sentence (List<Word> wordList, String s)
 	{
 		plainText = s;
 		words = Globals.coreNLP.getTaggedWords(plainText);
 		words = new Word[wordList.size()];
 		for(int i=0; i<wordList.size(); i++)
 			words[i] = wordList.get(i);
 		
 		map = new HashMap<String, Word>();
 		for (Word w : words)
 			map.put(w.key, w);
 	}
 	
 	public Sentence (Query query, String s)
 	{
 		plainText = s;
 		words = Globals.coreNLP.getTaggedWords(plainText);
 		// inherit NodeRecognition's information
 		for(Word word: words)
 //	public Sentence (Query query, String s)
 //	{
 //		plainText = s;
 //		words = Globals.coreNLP.getTaggedWords(plainText);
 //		// inherit NodeRecognition's information
 //		for(Word word: words)
 //		{
 //			for(MergedWord mWord: query.mWordList)
 //			{
 //				if(word.originalForm.equals(mWord.name))
 //				{
 //					word.mayLiteral = mWord.mayLiteral;
 //					word.mayEnt = mWord.mayEnt;
 //					word.mayType = mWord.mayType;
 //					word.mayCategory = mWord.mayCategory;
 //					word.tmList = mWord.tmList;
 //					word.emList = mWord.emList;
 //					word.category = mWord.category;
 //				}
 //			}
 //		}
 //		map = new HashMap<String, Word>();
 //		for (Word w : words)
 //			map.put(w.key, w);
 //	}

 	public String[] getWordsArr() {
 		String[] wordArr = new String[words.length];
 		int cnt = 0;
 		for(Word w: words)
 		{
 			for(MergedWord mWord: query.mWordList)
 			{
 				if(word.originalForm.equals(mWord.name))
 				{
 					word.mayLiteral = mWord.mayLiteral;
 					word.mayEnt = mWord.mayEnt;
 					word.mayType = mWord.mayType;
 					word.mayCategory = mWord.mayCategory;
 					word.tmList = mWord.tmList;
 					word.emList = mWord.emList;
 					word.category = mWord.category;
 				}
 			}
 			wordArr[cnt++] = w.originalForm;
 		}
 		map = new HashMap<String, Word>();
 		for (Word w : words)
 			map.put(w.key, w);
 		return wordArr;
 	}
 	
 	public ArrayList<Word> getWordsByString (String w) {
 		ArrayList<Word> ret = new ArrayList<Word>();
 		for (Word wo: words) {
--- a/src/nlp/tool/CoreNLP.java
+++ b/src/nlp/tool/CoreNLP.java
@@ -1,201 +0,0 @@
 package nlp.tool;

 import java.util.List;
 import java.util.Properties;

 import nlp.ds.Word;
 import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 import edu.stanford.nlp.trees.Tree;
 import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation;
 import edu.stanford.nlp.trees.semgraph.SemanticGraph;
 import edu.stanford.nlp.trees.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation;
 import edu.stanford.nlp.util.CoreMap;

 public class CoreNLP {

 	// CoreNLP can also recognize TIME and NUMBER (see SUTime)
 	private StanfordCoreNLP pipeline_lemma;
 	
 	public CoreNLP () {
 	    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
 	    /*Properties props_all = new Properties();
 	    props_all.put("annotators", "tokenize, ssplit, pos, lemma, parse");	// full list: "tokenize, ssplit, pos, lemma, ner, parse, dcoref"
 	    pipeline_all = new StanfordCoreNLP(props_all);*/

 	    Properties props_lemma = new Properties();
 	    props_lemma.put("annotators", "tokenize, ssplit, pos, lemma");
 	    pipeline_lemma = new StanfordCoreNLP(props_lemma);		

 	}
 	
 	// For more efficient usage, refer to "http://www.jarvana.com/jarvana/view/edu/stanford/nlp/stanford-corenlp/1.2.0/stanford-corenlp-1.2.0-javadoc.jar!/edu/stanford/nlp/process/Morphology.html"
 	public String getBaseFormOfPattern (String text) {
 		String ret = new String("");
 		
 	    // create an empty Annotation just with the given text
 	    Annotation document = new Annotation(text);
 	    // run all Annotators on this text
 	    pipeline_lemma.annotate(document);


 	    // these are all the sentences in this document
 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
 	    
 	    int count = 0;
 	    for(CoreMap sentence: sentences) {
 	      // traversing the words in the current sentence
 	      // a CoreLabel is a CoreMap with additional token-specific methods
 	      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
 	        // this is the base form (lemma) of the token
 	        String lemma = token.getString(LemmaAnnotation.class);
 	        ret += lemma;
 	        ret += " ";
 	      }
 	      count ++;
 	      if (count % 100 == 0) {
 	    	  System.out.println(count);
 	      }
 	    }
 	    
 	    return ret.substring(0, ret.length()-1);
 	}
 	
 	public SemanticGraph getBasicDependencies (String s) {
 	    // create an empty Annotation just with the given text
 	    Annotation document = new Annotation(s);
 	    
 	    // run all Annotators on this text
 	    pipeline_lemma.annotate(document);
 	    
 	    // these are all the sentences in this document
 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
 	    
 	    for(CoreMap sentence: sentences) {
 	      // this is the Stanford dependency graph of the current sentence
 	      SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class);
 	      return dependencies;
 	    }
 	    
 	    return null;
 	}

 	public Tree getParseTree (String text) {
 	    // create an empty Annotation just with the given text
 	    Annotation document = new Annotation(text);
 	    
 	    // run all Annotators on this text
 	    pipeline_lemma.annotate(document);
 	    
 	    // these are all the sentences in this document
 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
 	    
 	    for(CoreMap sentence: sentences) {
 	    	// this is the parse tree of the current sentence
 	    	return sentence.get(TreeAnnotation.class);
 	    }	    
 	    
 	    return null;
 	}
 	
 	/**
 	 * How to use:
 	 * for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
 	 * 		// this is the text of the token
 	 * 		String word = token.get(TextAnnotation.class);
 	 *		// this is the POS tag of the token
 	 *		String pos = token.get(PartOfSpeechAnnotation.class);
 	 *	}
 	 * @param s
 	 * @return
 	 */
 	public CoreMap getPOS (String s) {
 	    // create an empty Annotation just with the given text
 	    Annotation document = new Annotation(s);
 	    
 	    // run all Annotators on this text
 	    pipeline_lemma.annotate(document);
 	    
 	    // these are all the sentences in this document
 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
 	    
 	    for(CoreMap sentence: sentences) {
 	      // this is the sentence with POS Tags
 	      return sentence;
 	    }
 	    
 	    return null;
 	}
 	
 	public Word[] getTaggedWords (String sentence) {
 		CoreMap taggedSentence = getPOS(sentence);
 		Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()];
 		int count = 0;
 		for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) {
 			// this is the text of the token
 			String word = token.get(TextAnnotation.class);
 			// this is the POS tag of the token
 			String pos = token.get(PartOfSpeechAnnotation.class);
 			//System.out.println(word+"["+pos+"]");
 			ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1);
 			count ++;
 		}
 		return ret;
 	}
 	
 	/*public void demo () {
 		// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
 	    Properties props = new Properties();
 	    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
 	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
 	    
 	    // read some text in the text variable
 	    String text = ... // Add your text here!
 	    
 	    // create an empty Annotation just with the given text
 	    Annotation document = new Annotation(text);
 	    
 	    // run all Annotators on this text
 	    pipeline.annotate(document);
 	    
 	    // these are all the sentences in this document
 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
 	    
 	    for(CoreMap sentence: sentences) {
 	      // traversing the words in the current sentence
 	      // a CoreLabel is a CoreMap with additional token-specific methods
 	      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
 	        // this is the text of the token
 	        String word = token.get(TextAnnotation.class);
 	        // this is the POS tag of the token
 	        String pos = token.get(PartOfSpeechAnnotation.class);
 	        // this is the NER label of the token
 	        String ne = token.get(NamedEntityTagAnnotation.class);       
 	      }

 	      // this is the parse tree of the current sentence
 	      Tree tree = sentence.get(TreeAnnotation.class);

 	      // this is the Stanford dependency graph of the current sentence
 	      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
 	    }

 	    // This is the coreference link graph
 	    // Each chain stores a set of mentions that link to each other,
 	    // along with a method for getting the most representative mention
 	    // Both sentence and token offsets start at 1!
 	    Map<Integer, CorefChain> graph = 
 	      document.get(CorefChainAnnotation.class);
 	}*/
 }
--- a/src/nlp/tool/Main.java
+++ b/src/nlp/tool/Main.java
@@ -21,13 +21,10 @@ public class Main {
 					break;
 				try {
 					long t1 = System.currentTimeMillis();
 					Sentence s = new Sentence(question);
 					Sentence s = null;
 					DependencyTree dt = new DependencyTree(s, Globals.stanfordParser);
 					System.out.println("====StanfordDependencies====");
 					System.out.println(dt);
 					DependencyTree dt2 = new DependencyTree(s, Globals.maltParser);
 					System.out.println("====MaltDependencies====");
 					System.out.println(dt2);
 					long t2 = System.currentTimeMillis();
 					System.out.println("time=" + (t2-t1) + "ms");
 				} catch (Exception e) {
--- a/src/nlp/tool/MaltParser.java
+++ b/src/nlp/tool/MaltParser.java
@@ -1,70 +0,0 @@
 package nlp.tool;


 import nlp.ds.Sentence;
 import nlp.ds.Word;

 import org.maltparser.MaltParserService;
 import org.maltparser.core.exception.MaltChainedException;
 import org.maltparser.core.syntaxgraph.DependencyStructure;

 import qa.Globals;

 public class MaltParser {
 	private MaltParserService service = null;
 	public MaltParser() {
 		try
 		{
 			System.out.print("Loading MaltParser ...");
 			service = new MaltParserService();
 			// Inititalize the parser model 'model0' and sets the working directory to '.' and sets the logging file to 'parser.log'
 			//service.initializeParserModel("-c engmalt.linear-1.7 -m parse -w . -lfi parser.log");
 			service.initializeParserModel("-c engmalt.linear-1.7 -m parse -w "+Globals.localPath+"lib/maltparser-1.9.1 -lfi parser.log");
 			firstParse();
 			System.out.println("ok!");
 		} catch (MaltChainedException e) {
 			e.printStackTrace();
 			System.err.println("MaltParser exception: " + e.getMessage());
 		}
 	}
 	
 	private void firstParse() {
 		String[] tokens = new String[12];
 		tokens[0] = "1\tIn\t_\tIN\tIN\t_"; 
 		tokens[1] = "2\twhich\t_\tWDT\tWDT\t_";
 		tokens[2] = "3\tmovies\t_\tNNS\tNNS\t_";
 		tokens[3] = "4\tdirected\t_\tVBN\tVBN\t_";
 		tokens[4] = "5\tby\t_\tIN\tIN\t_";
 		tokens[5] = "6\tGarry\t_\tNNP\tNNP\t_";
 		tokens[6] = "7\tMarshall\t_\tNNP\tNNP\t_";
 		tokens[7] = "8\twas\t_\tVBD\tVBD\t_";
 		tokens[8] = "9\tJulia\t_\tNNP\tNNP\t_";
 		tokens[9] = "10\tRoberts\t_\tNNP\tNNP\t_";
 		tokens[10] = "11\tstarring\t_\tVBG\tVBG\t_";
 		tokens[11] = "12\t?\t_\t.\t.\t_";
 		try {
 			service.parse(tokens);
 		} catch (MaltChainedException e) {
 			e.printStackTrace();
 		}
 	}
 	
 	public DependencyStructure getDependencyStructure (Sentence sentence) {
 		try {
 			return service.parse(getTaggedTokens(sentence));
 		} catch (MaltChainedException e) {
 			e.printStackTrace();
 		}
 		return null;
 	}
 	
 	private String[] getTaggedTokens (Sentence sentence) {
 		String[] ret = new String[sentence.words.length];
 		int count = 0;
 		for (Word w : sentence.words) {
 			ret[count] = new String(""+w.position+"\t"+w.originalForm+"\t_\t"+w.posTag+"\t"+w.posTag+"\t_");
 			count ++;
 		}
 		return ret;
 	}
 }
--- a/src/nlp/tool/MaltParserCon.java
+++ b/src/nlp/tool/MaltParserCon.java
@@ -1,73 +0,0 @@
 package nlp.tool;

 import java.io.File;
 import java.net.URL;

 import nlp.ds.Sentence;
 import nlp.ds.Word;

 import org.maltparser.concurrent.ConcurrentMaltParserModel;
 import org.maltparser.concurrent.ConcurrentMaltParserService;
 import org.maltparser.concurrent.graph.ConcurrentDependencyGraph;
 import org.maltparser.core.exception.MaltChainedException;
 //import org.maltparser.core.syntaxgraph.DependencyStructure;


 public class MaltParserCon {
 	private ConcurrentMaltParserModel model = null;
 	public ConcurrentDependencyGraph outputGraph = null;
 	
 	public MaltParserCon(){
 		try{
 			System.out.println("Loading Maltparser...\n");
 			URL ModelURL = new File("output/engmalt.linear-1.7.mco").toURI().toURL();
 			model = ConcurrentMaltParserService.initializeParserModel(ModelURL);
 			firstTest();
 			System.out.println("ok!\n");
 		}catch(Exception e){
 			e.printStackTrace();
 			System.err.println("MaltParser exception: " + e.getMessage());
 		}
 	}
 	
 	private void firstTest(){
 		String[] tokens = new String[12];
 		tokens[0] = "1\tIn\t_\tIN\tIN\t_"; 
 		tokens[1] = "2\twhich\t_\tWDT\tWDT\t_";
 		tokens[2] = "3\tmovies\t_\tNNS\tNNS\t_";
 		tokens[3] = "4\tdirected\t_\tVBN\tVBN\t_";
 		tokens[4] = "5\tby\t_\tIN\tIN\t_";
 		tokens[5] = "6\tGarry\t_\tNNP\tNNP\t_";
 		tokens[6] = "7\tMarshall\t_\tNNP\tNNP\t_";
 		tokens[7] = "8\twas\t_\tVBD\tVBD\t_";
 		tokens[8] = "9\tJulia\t_\tNNP\tNNP\t_";
 		tokens[9] = "10\tRoberts\t_\tNNP\tNNP\t_";
 		tokens[10] = "11\tstarring\t_\tVBG\tVBG\t_";
 		tokens[11] = "12\t?\t_\t.\t.\t_";
 		try {
 			outputGraph = model.parse(tokens);
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
 		System.out.println(outputGraph);
 	}
 	
 	public ConcurrentDependencyGraph getDependencyStructure (Sentence sentence) {
 		try {
 			return model.parse(getTaggedTokens(sentence));
 		} catch (MaltChainedException e) {
 			e.printStackTrace();
 		}
 		return null;
 	}
 	
 	private String[] getTaggedTokens (Sentence sentence) {
 		String[] ret = new String[sentence.words.length];
 		int count = 0;
 		for (Word w : sentence.words) {
 			ret[count] = new String(""+w.position+"\t"+w.originalForm+"\t_\t"+w.posTag+"\t"+w.posTag+"\t_");
 			count ++;
 		}
 		return ret;
 	}
 }
--- a/src/nlp/tool/NERecognizer.java
+++ b/src/nlp/tool/NERecognizer.java
@@ -1,53 +0,0 @@
 package nlp.tool;

 import java.util.List;

 import qa.Globals;

 import nlp.ds.Sentence;
 import nlp.ds.Word;

 import edu.stanford.nlp.ie.AbstractSequenceClassifier;
 import edu.stanford.nlp.ie.crf.CRFClassifier;
 import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations.PositionAnnotation;
 import edu.stanford.nlp.ling.CoreLabel;

 public class NERecognizer {
 	
 	static String serializedClassifier;
 	static AbstractSequenceClassifier<CoreLabel> classifier;
 	//public static String localPath="E:\\Hanshuo\\gAnswer\\";
 		
 	public NERecognizer() {
 		serializedClassifier = Globals.localPath+"lib/stanford-ner-2012-11-11/classifiers/english.all.3class.distsim.crf.ser.gz";
 		classifier  = CRFClassifier.getClassifierNoExceptions(serializedClassifier);
 	}
 	
 	/*public NERecognizer(String basePath, boolean flag) {
 		serializedClassifier = "WEB-INF\\lib\\stanford-ner-2012-11-11\\stanford-ner-2012-11-11\\classifiers\\english.all.3class.distsim.crf.ser.gz";
 	}*/
 	
 	public void recognize(Sentence sentence) {
 		List<CoreLabel> lcl = classifier.classify(sentence.plainText).get(0);
 		for (CoreLabel cl : lcl) {
 			int position = Integer.parseInt(cl.get(PositionAnnotation.class))+1;
 			Word w = sentence.getWordByIndex(position);
 			String ner = cl.get(AnswerAnnotation.class);
 			if (ner.equals("O")) w.ner = null;
 			else w.ner = ner;
 		}
 	}
 			
 	public static void main(String[] args) {
 		System.out.println("Test NER");
 		Globals.init();
 		
 		Sentence s = new Sentence("I go to school at Stanford University, which is located in California.");//"Which states of Germany are governed by the Social Democratic Party?"
 		Globals.nerRecognizer.recognize(s);
 		for (Word word : s.words) {
 			System.out.print(word + "   ");
 			System.out.println("ner=" + word.ner);
 		}
 	}
 }
--- a/src/nlp/tool/StanfordParser.java
+++ b/src/nlp/tool/StanfordParser.java
@@ -4,7 +4,6 @@ import java.io.StringReader;
 import java.util.List;

 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.objectbank.TokenizerFactory;
 import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
 import edu.stanford.nlp.process.CoreLabelTokenFactory;
 import edu.stanford.nlp.process.PTBTokenizer;
@@ -13,39 +12,40 @@ import edu.stanford.nlp.trees.GrammaticalStructureFactory;
 import edu.stanford.nlp.trees.PennTreebankLanguagePack;
 import edu.stanford.nlp.trees.Tree;
 import edu.stanford.nlp.trees.TreebankLanguagePack;
 import edu.stanford.nlp.trees.TypedDependency;
 import edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalStructure;

 public class StanfordParser {
 	private LexicalizedParser lp;
 	private TokenizerFactory<CoreLabel> tokenizerFactory;
 	private TreebankLanguagePack tlp;
 	private GrammaticalStructureFactory gsf;
 	private ChineseGrammaticalStructure gs;
 	
 //	private TokenizerFactory<CoreLabel> tokenizerFactory;
 //	private TreebankLanguagePack tlp;
 //	private GrammaticalStructureFactory gsf;
 	
 	public StanfordParser() {
 		lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
 	    tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
 	    tlp = new PennTreebankLanguagePack();
 	    gsf = tlp.grammaticalStructureFactory();
 //		lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
 //	    tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
 //	    tlp = new PennTreebankLanguagePack();
 //	    gsf = tlp.grammaticalStructureFactory();
 	    
 	    lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz");
 	}
 	
 	public GrammaticalStructure getGrammaticalStructure (String sentence) {
 	    List<CoreLabel> rawWords2 = 
 		      tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
 	    // Converts a Sentence/List/String into a Tree.
 	    // In all circumstances, the input will be treated as a single sentence to be parsed.
 	    Tree parse = lp.apply(rawWords2);

 	    return gsf.newGrammaticalStructure(parse);
 	    /*List<TypedDependency> tdl = gs.typedDependencies(false);
 	    for (TypedDependency td : tdl) {
 	    	System.out.println(td.reln().getShortName()+"("+td.gov()+","+td.dep()+")");
 	    	System.out.println("gov="+td.gov()
 	    			+"\tgov.index="
 	    			+td.gov().index()
 	    			+"\tgov.value="
 	    			+td.gov().value()
 	    			+"\tgov.pos="
 	    			+((TreeGraphNode)td.gov().parent()).value());
 	    }*/
 	    //System.out.println(tdl);
 //	public GrammaticalStructure getGrammaticalStructure (String sentence) {
 //	    List<CoreLabel> rawWords2 = 
 //		      tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
 //	    
 //	    Tree parse = lp.apply(rawWords2);
 //
 //	    return gsf.newGrammaticalStructure(parse);
 //	}
 	
 	public List<TypedDependency> getTypedDependencyList(List<CoreLabel> rawWords) 
 	{
 	    Tree parse = lp.apply(rawWords);
 	    gs = new ChineseGrammaticalStructure(parse);
 	    
 	    return gs.typedDependenciesCCprocessed();
 	}
 }
--- a/src/paradict/ParaphraseDictionary.java
+++ b/src/paradict/ParaphraseDictionary.java
@@ -10,19 +10,17 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;

 import com.huaban.analysis.jieba.SegToken;
 import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;



 import nlp.tool.CoreNLP;
 import qa.Globals;
 import qa.extract.EntityRecognitionCh;

 public class ParaphraseDictionary {
 	public static String localDataPath;
 	public static String dbpedia_relation_paraphrases_baseform_withScore;
 	public static String dbpedia_relation_paraphrases_baseform_withScore_rerank;
 	public static String dbpedia_relation_paraphrases_handwrite;
 	public static String dbpedia_predicate_id;
 	public static String relation_paraphrases_path;
 	public static String predicate_id_path;
 	public static String dbpedia_dbo_predicate;

 	public HashMap<String, Integer> predicate_2_id = null;
@@ -41,24 +39,14 @@ public class ParaphraseDictionary {
 	public int paraphrasedPredCount = 0;
 	public int lineCount = 0;
 	
 	/**
 	 * constructor
 	 * @param parser
 	 * @param ner
 	 */
 	public ParaphraseDictionary () {
 		String fixedPath = Globals.localPath;
 		String fixedPath = Globals.localPath+"data/pkubase/";

 		System.out.println(System.getProperty("user.dir"));
 		localDataPath = fixedPath + "data/DBpedia2016/parapharse/";
 		dbpedia_relation_paraphrases_baseform_withScore_rerank = localDataPath + "dbpedia-relation-paraphrases-withScore-baseform-merge-sorted-rerank-slct.txt";
 		dbpedia_relation_paraphrases_handwrite = localDataPath + "dbpedia-relation-paraphrase-handwrite.txt";
 		
 		dbpedia_predicate_id = localDataPath + "16predicate_id.txt";
 		dbpedia_dbo_predicate = localDataPath + "16dbo_predicates.txt";
 		relation_paraphrases_path = fixedPath + "paraphrase/pkubase-paraphrase.txt";
 		predicate_id_path = fixedPath + "fragments/id_mappings/pkubase_predicate_id.txt";
 		
 		bannedTypes = new HashSet<String>();
 		bannedTypes.add("Mayor");
 		
 		relns_subject = new HashSet<String>();
 		relns_subject.add("subj");
@@ -76,25 +64,16 @@ public class ParaphraseDictionary {
 		relns_object.add("obj");
 		relns_object.add("pobj");
 		
 		prepositions = new HashSet<String>();
 		prepositions.add("in");//in at on with to from before after of for
 		prepositions.add("at");
 		prepositions.add("on");
 		prepositions.add("with");
 		prepositions.add("to");
 		prepositions.add("from");
 		prepositions.add("before");
 		prepositions.add("after");
 		prepositions.add("of");
 		prepositions.add("for");
 		prepositions.add("as");
 		prepositions = new HashSet<String>(); //TODO: safe delete

 		try {
 			loadPredicateId();
 			loadDboPredicate();
 			loadParaDict();
 			addPredicateAsNLPattern();
 			addHandwriteAsNLPattern();
 //			loadDboPredicate();
 //			loadParaDict();
 			buildInvertedIndex();
 			typePredicateID = predicate_2_id.get("type"); 
 			typePredicateID = predicate_2_id.get("类型"); 
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
@@ -108,8 +87,7 @@ public class ParaphraseDictionary {
 		predicate_2_id = new HashMap<String, Integer>();
 		id_2_predicate = new HashMap<Integer, String>();
 				
 		String input_filename = dbpedia_predicate_id;
 		File file = new File(input_filename);
 		File file = new File(predicate_id_path);
 		InputStreamReader in = null;
 		BufferedReader br = null;
 		try{
@@ -118,6 +96,8 @@ public class ParaphraseDictionary {
 			String line = null;
 			while ((line = br.readLine())!= null) {
 			String[] lines = line.split("\t");
 			if(lines[0].startsWith("<") && lines[0].endsWith(">"))
 				lines[0] = lines[0].substring(1, lines[0].length()-1);
 			predicate_2_id.put(lines[0], Integer.parseInt(lines[1]));
 			id_2_predicate.put(Integer.parseInt(lines[1]), lines[0]);
 		}	
@@ -192,13 +172,10 @@ public class ParaphraseDictionary {
 		InputStreamReader in = null;
 		BufferedReader br = null;
 		try{
 			String inputFileName = dbpedia_relation_paraphrases_baseform_withScore_rerank;
 			File file = new File(inputFileName);
 			in = new InputStreamReader(new FileInputStream(file), "utf-8");
 			in = new InputStreamReader(new FileInputStream(new File(relation_paraphrases_path)), "utf-8");
 			br = new BufferedReader(in);
 			String line = null;
 			int lineCount = 0;
 			//line = br.readLine();//read the first line which indicates the format
 			while ((line = br.readLine()) != null) 
 			{
 				if (line.startsWith("#")) continue;
@@ -259,72 +236,23 @@ public class ParaphraseDictionary {
 	 * A set of very important NL patterns are the predicates themselves!
 	 */
 	public void addPredicateAsNLPattern () {
 		if(nlPattern_2_predicateList == null)
 			nlPattern_2_predicateList = new HashMap<String, ArrayList<PredicateIDAndSupport>>();
 		
 		final int support = 200;
 		int predicate_id;
 		for (String p : predicate_2_id.keySet()) 
 		{
 			// TODO: Omitting some bad relations (should be discarded in future)
 			if(p.equals("state") || p.equals("states"))
 				continue;
 			
 			predicate_id = predicate_2_id.get(p);
 			StringBuilder pattern = new StringBuilder("");
 			
 			// Work/runtime	11,SpaceStation/volume	68 and some predicates have prefix (DBpedia 2015), discard the prefix when generating pattern
 			if(p.contains("/"))

 			// TODO: segmentation: 1) tokenize 2) single ch-word
 			String patternString = "";
 			List<SegToken> q=EntityRecognitionCh.segmenter.process(p, SegMode.SEARCH);
 			for (SegToken t:q)
 			{
 				if(p.charAt(0)>='A' && p.charAt(0)<='Z')
 					p = p.substring(p.indexOf("/")+1);
 				//gameW/l	1974
 				else
 					p = p.replace("/", "");
 			}
 			
 			int last = 0, i = 0;
 			for(i = 0; i < p.length(); i ++) {
 				// if it were not a small letter, then break it.
 				if(!(p.charAt(i)>='a' && p.charAt(i)<='z')) {
 					pattern.append(p.substring(last, i).toLowerCase());
 					pattern.append(" ");
 					last = i;
 				}
 				patternString += t.word + " ";
 			}
 			pattern.append(p.substring(last, i).toLowerCase());
 			for (i = 3; i < pattern.length(); i ++) {
 				// the blank between two digits should be deleted.
 				if (pattern.charAt(i)>='0' && pattern.charAt(i)<='9'
 					&& pattern.charAt(i-1)==' '
 					&& pattern.charAt(i-2)>='0' && pattern.charAt(i-2)<='9') {
 					pattern.deleteCharAt(i-1);
 				}
 				// the blank between I and D should be deleted.
 				else if (pattern.charAt(i)=='d'
 					&& pattern.charAt(i-1)==' '
 					&& pattern.charAt(i-2)=='i'
 					&& pattern.charAt(i-3)==' ') {
 					pattern.deleteCharAt(i-1);
 				}
 				// the blank between D and B should be deleted.
 				else if (pattern.charAt(i)=='b'
 					&& pattern.charAt(i-1)==' '
 					&& pattern.charAt(i-2)=='d'
 					&& pattern.charAt(i-3)==' ') {
 					pattern.deleteCharAt(i-1);
 				}
 			}
 			
 			// pattern -> base form
 			/*String[] ptns = pattern.toString().split(" ");
 			pattern = new StringBuilder("");
 			for (String s : ptns) {
 				pattern.append(Globals.coreNLPparser.getBaseFormOfPattern(s));
 				pattern.append(" ");
 			}
 			pattern.deleteCharAt(pattern.length()-1);
 			String patternString = pattern.toString();*/
 			
 			// Special case cannot use base form, eg, foundingYear	//TODO: maybe Porter's Algorithm
 			String patternString = Globals.coreNLP.getBaseFormOfPattern(pattern.toString());
 			patternString = patternString.trim();
 			//System.out.println(p + "-->" + patternString);
 			
 			if (!nlPattern_2_predicateList.containsKey(patternString)) {
@@ -340,30 +268,39 @@ public class ParaphraseDictionary {
 	}
 	
 	public void addHandwriteAsNLPattern() throws IOException {
 		String inputFileName = dbpedia_relation_paraphrases_handwrite;
 		InputStreamReader in = null;
 		BufferedReader br = null;
 		
 		try{
 			File file = new File(inputFileName);
 			in = new InputStreamReader(new FileInputStream(file), "utf-8");
 			in = new InputStreamReader(new FileInputStream(new File(relation_paraphrases_path)), "utf-8");
 			br = new BufferedReader(in);
 			
 			String line = null;
 			//int lineCount = 0;
 			//line = br.readLine();//read the first line which indicates the format
 			while ((line = br.readLine()) != null) {
 				if (line.startsWith("#") || line.isEmpty()) continue;
 				//lineCount ++;

 				String[] content = line.split("\t");
 				
 				if(!predicate_2_id.containsKey(content[0]))
 					continue;
 				
 				int predicateID = predicate_2_id.get(content[0]);
 				String nlPattern = content[1].toLowerCase();
 				String nlPattern = content[1];
 				int support = Integer.parseInt(content[2]);
 				
 				// Need Segmentation
 				if(!nlPattern.contains(" "))
 				{
 					String patternString = "";
 					List<SegToken> q=EntityRecognitionCh.segmenter.process(nlPattern, SegMode.SEARCH);
 					for (SegToken t:q)
 					{
 						patternString += t.word + " ";
 					}
 					patternString = patternString.trim();
 					nlPattern = patternString;
 				}
 				
 				if (!nlPattern_2_predicateList.containsKey(nlPattern)) {
 					nlPattern_2_predicateList.put(nlPattern, new ArrayList<PredicateIDAndSupport>());
 				}
@@ -434,7 +371,7 @@ public class ParaphraseDictionary {
 	}
 	
 	public static void main (String[] args) {
 		Globals.coreNLP = new CoreNLP();
 //		Globals.coreNLP = new CoreNLP();
 		Globals.pd = new ParaphraseDictionary();
 		//Globals.pd.showNLPatterns();
 	}
--- a/src/qa/GAnswer.java
+++ b/src/qa/GAnswer.java
@@ -32,8 +32,8 @@ public class GAnswer {
 		QueryLogger qlog = null;
 		try 
 		{
 			if (input.length() <= 5)
 				return null;
 //			if (input.length() <= 5)
 //				return null;
 			
 			System.out.println("[Input:] "+input);
 			
@@ -47,17 +47,17 @@ public class GAnswer {
 			
 			// Try to solve each NR plan, and combine the ranked SPARQLs.
 			// We only reserve LOG of BEST NR plan for convenience.
 			// Now only 1 plan
 			for(int i=query.sList.size()-1; i>=0; i--)
 			{
 				Sentence possibleSentence = query.sList.get(i);
 				qlog.reloadSentence(possibleSentence);
 //				qlog.isMaltParserUsed = true;
 				
 				// LOG
 				System.out.println("transQ: "+qlog.s.plainText);
 				qlog.NRlog = query.preLog;
 //				qlog.NRlog = query.preLog;
 				qlog.SQGlog = "Id: "+query.queryId+"\nQuery: "+query.NLQuestion+"\n";
 				qlog.SQGlog += qlog.NRlog;
 //				qlog.SQGlog += qlog.NRlog;
 				qlog.timeTable.put("step0", (int)NRtime);
 				
 				// step 1: question parsing (dependency tree, sentence type)
@@ -91,7 +91,7 @@ public class GAnswer {
 			qlog.rankedSparqls = rankedSparqls;
 			System.out.println("number of rankedSparqls = " + qlog.rankedSparqls.size());
 			
 			// Detect question focus.
 			// Detect question focus. TODO: in which cases the question focus != target?
 			for (int i=0; i<qlog.rankedSparqls.size(); i++) 
 			{
 				// First detect by SPARQLs.
@@ -156,7 +156,7 @@ public class GAnswer {
 	{
 		// modified by Lin Yinnian using ghttp - 2018-9-28
 		GstoreConnector gc = new GstoreConnector(Globals.QueryEngineIP, Globals.QueryEnginePort);
        String answer = gc.query("root", "123456", "dbpedia16", spq.toStringForGStore2());
        String answer = gc.query("endpoint", "123", "pkubase", spq.toStringForGStore2());
        System.out.println(answer);
 		String[] rawLines = answer.split("\n");
 		
@@ -199,9 +199,13 @@ public class GAnswer {
 		int i =1;
 		
 		//file in/output
 		List<String> inputList = FileUtil.readFile("E:/Linyinnian/qald6_special.txt");
 		List<String> inputList = FileUtil.readFile("data/test/mini-ccks.txt");
 		for(String input: inputList) 
 		{	
 			if (input.length()<2 || input.charAt(0)!='q') continue;
 			System.out.println("----------------------------------------");
 			System.out.println(input);
 			
 			ArrayList<String> outputs = new ArrayList<String>();
 			ArrayList<String> spqs = new ArrayList<String>(); 
 			spqs.add("id:"+String.valueOf(i));
@@ -220,9 +224,9 @@ public class GAnswer {
 			System.out.println("Ranked Sparqls: " + qlog.rankedSparqls.size());
 			
 			outputs.add(qlog.SQGlog);
 			outputs.add(qlog.SQGlog + "Building HQG time: "+ (qlog.timeTable.get("step0")+qlog.timeTable.get("step1")+qlog.timeTable.get("step2")-qlog.timeTable.get("BQG_topkjoin")) + "ms");
 			outputs.add("TopKjoin time: "+ qlog.timeTable.get("BQG_topkjoin") + "ms");
 			outputs.add("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms");
 //			outputs.add(qlog.SQGlog + "Building HQG time: "+ (qlog.timeTable.get("step0")+qlog.timeTable.get("step1")+qlog.timeTable.get("step2")-qlog.timeTable.get("BQG_topkjoin")) + "ms");
 //			outputs.add("TopKjoin time: "+ qlog.timeTable.get("BQG_topkjoin") + "ms");
 //			outputs.add("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms");
 			
 			long excuting_st_time = System.currentTimeMillis();
 			Matches m = null;
@@ -274,8 +278,10 @@ public class GAnswer {
 						outputs.add("[" + Math.min(MAX_SPQ_NUM+1, idx) + "]" + "score=" + 1000 + "\n" + stdSPQwoPrefix + "\n");
 				}
 			}
 			else
 				outputs.add("");
 			
 			FileUtil.writeFile(outputs, "E:/Linyinnian/qald6_special_out.txt", true);
 			FileUtil.writeFile(outputs, "data/test/mini-ccks.out", true);
 		}
 			
 	}
--- a/src/qa/Globals.java
+++ b/src/qa/Globals.java
@@ -8,26 +8,18 @@ import lcn.EntityFragmentFields;
 import fgmt.RelationFragment;
 import fgmt.TypeFragment;
 import paradict.ParaphraseDictionary;
 import qa.mapping.DBpediaLookup;
 import nlp.tool.NERecognizer;
 import nlp.tool.CoreNLP;
 import nlp.tool.MaltParser;
 import nlp.tool.StanfordParser;
 import nlp.tool.StopWordsList;

 public class Globals {
 	// nlp tools
 	public static CoreNLP coreNLP;
 	public static StanfordParser stanfordParser;
 	public static StopWordsList stopWordsList;
 	public static MaltParser maltParser;
 	public static NERecognizer nerRecognizer;
 	// relation paraphrase dictionary
 	public static ParaphraseDictionary pd;
 	// entity linking system
 	public static DBpediaLookup dblk;
 	public static int MaxAnswerNum = 100;
 	public static String Dataset = "dbpedia 2016";
 	public static String Dataset = "pkubase";
 	public static String Version = "0.1.2";
 	public static String GDBsystem = "gStore v0.7.2";
 	
@@ -39,34 +31,25 @@ public class Globals {
 	public static int evaluationMethod = 2; 
 	
 	public static String localPath = "./././";
 	public static String QueryEngineIP = "dbpedia16.gstore-pku.com";	// Notice, PORT number is in the evaluation function.
 	public static String QueryEngineIP = "pkubase.gstore-pku.com";	// Notice, PORT number is in the evaluation function.
 	public static int QueryEnginePort = 80;
 	
 	public static void init () 
 	{
 		System.out.println("====== gAnswer2.0 over DBpedia ======");
 		System.out.println("====== gAnswer2.0 over Pkubase ======");

 		long t1, t2, t3, t4, t5, t6, t7, t8, t9;
 		
 		t1 = System.currentTimeMillis();
 		coreNLP = new CoreNLP();
 		
 		t2 = System.currentTimeMillis();
 		stanfordParser = new StanfordParser();
 		
 		t3 = System.currentTimeMillis();
 		maltParser = new MaltParser();
 		
 		t4 = System.currentTimeMillis();
 		nerRecognizer = new NERecognizer();
 		
 		t5 = System.currentTimeMillis();
 		t2 = System.currentTimeMillis();
 		stopWordsList = new StopWordsList();
 		
 		t6 = System.currentTimeMillis();
 		t3 = System.currentTimeMillis();
 		pd = new ParaphraseDictionary();
 		
 		t7 = System.currentTimeMillis();
 		t4 = System.currentTimeMillis();
 		try 
 		{	
 			EntityFragmentFields.load();
@@ -78,20 +61,13 @@ public class Globals {
 			e1.printStackTrace();
 		}
 		
 		t8 = System.currentTimeMillis();
 		dblk = new DBpediaLookup();
 		
 		t9 = System.currentTimeMillis();
 		t5 = System.currentTimeMillis();
 		System.out.println("======Initialization======");
 		System.out.println("CoreNLP(Lemma): " + (t2-t1) + "ms.");
 		System.out.println("StanfordParser: " + (t3-t2) + "ms.");
 		System.out.println("MaltParser: " + (t4-t3) + "ms.");
 		System.out.println("NERecognizer: " + (t5-t4) + "ms.");
 		System.out.println("StopWordsList: " + (t6-t5) + "ms.");
 		System.out.println("ParaphraseDict & posTagPattern: " + (t7-t6) + "ms.");
 		System.out.println("GraphFragments: " + (t8-t7) + "ms.");
 		System.out.println("DBpediaLookup: " + (t9-t8) + "ms.");
 		System.out.println("* Total *: " + (t9-t1) + "ms.");
 		System.out.println("StanfordParser: " + (t2-t1) + "ms.");
 		System.out.println("StopWordsList: " + (t3-t2) + "ms.");
 		System.out.println("ParaphraseDict: " + (t4-t3) + "ms.");
 		System.out.println("GraphFragments: " + (t5-t4) + "ms.");
 		System.out.println("* Total *: " + (t5-t1) + "ms.");
 		System.out.println("==========================");
 	}

--- a/src/qa/Query.java
+++ b/src/qa/Query.java
@@ -1,10 +1,11 @@
 package qa;

 import java.util.ArrayList;
 import java.util.List;

 import nlp.ds.Sentence;
 import qa.extract.EntityRecognition;
 import rdf.MergedWord;
 import nlp.ds.Word;
 import qa.extract.EntityRecognitionCh;

 /**
 * 1. preprocessing of question
@@ -21,7 +22,7 @@ public class Query
 	public String queryId = null;
 	public String preLog = "";
 	
 	public ArrayList<MergedWord> mWordList = null;
 	public List<Word> words = null;
 	
 	public Query(){}
 	public Query(String _question)
@@ -32,15 +33,17 @@ public class Query
 		TransferedQuestion = getTransferedQuestion(NLQuestion);	
 		
 		// step1. NODE Recognition
 		MergedQuestionList = getMergedQuestionList(TransferedQuestion);
 //		MergedQuestionList = getMergedQuestionList(TransferedQuestion);
 		words = EntityRecognitionCh.parseSentAndRecogEnt(TransferedQuestion);
 		
 		// build Sentence
 		sList = new ArrayList<Sentence>();
 		for(String mergedQuestion: MergedQuestionList)
 		{
 			Sentence sentence = new Sentence(this, mergedQuestion);
 			sList.add(sentence);
 		}
 		sList.add(new Sentence(words, TransferedQuestion)); // TODO: TransferedQuestion or _question
 //		for(String mergedQuestion: MergedQuestionList)
 //		{
 //			Sentence sentence = new Sentence(this, mergedQuestion);
 //			sList.add(sentence);
 //		}
 	}
 	
 	public boolean isDigit(char ch)
@@ -66,6 +69,14 @@ public class Query
 	 */
 	public String getTransferedQuestion(String question)
 	{
 		//discard ? ! .
 		if(question.endsWith("？") || question.endsWith("。") || question.endsWith("！"))
 			question = question.substring(0, question.length()-1);
 		
 		//discard 《》 because stanford parser DO NOT recognize them. TODO: why?
 		question = question.replace("《", "").replace("》", "");
 		question = question.replace("“", "").replace("”", "");	// now just discard "" because they confuse the parser. 
 		
 		//rule1: discard ".", because "." and "_" will be disconnected by parser. Discard word tail's "'", which may pollutes NER
 		question = question.replace("' ", " ");
 		String [] words = question.split(" ");
@@ -84,45 +95,31 @@ public class Query
 			ret = ret.substring(0,ret.length()-1);
 		
 		ret = ret.replace("-", " ");
 		ret = ret.replace("in america", "in United States");
 		
 		//rule2: as well as -> and
 		ret = ret.replace("as well as", "and");
 		
 		//rule3: movie -> film
 		ret = ret.replace(" movie", " film");
 		ret = ret.replace(" movies", " films");

 		
 		return ret;
 	}
 	
 	/**
 	 * Recognize entity & type & literal in KB and replace " " in Phrases with "_"
 	 * @param question
 	 * @return merged question list
 	 */
 	public ArrayList<String> getMergedQuestionList(String question)
 	{
 		ArrayList<String> mergedQuestionList = null;
 		//entity & type recognize
 		EntityRecognition er = new EntityRecognition(); 
 		mergedQuestionList = er.process(question);
 		preLog = er.preLog;
 		mWordList = er.mWordList;

 		return mergedQuestionList;
 	}
 	
 	public String removeQueryId(String question)
 	{
 		String ret = question;
 		// case 1: 1\t
 		int st = question.indexOf("\t");
 		if(st!=-1 && question.length()>1 && question.charAt(0)>='0' && question.charAt(0)<='9')
 		if(st!=-1 && question.length()>4 && isDigit(question.charAt(0)))
 		{
 			queryId = question.substring(0,st);
 			ret = question.substring(st+1);
 			System.out.println("Extract QueryId :"+queryId);
 		}
 		// case 2: q1: | 1:
 		st = question.indexOf(":");
 		if(st!=-1 && st<6  && question.length()>4 && (isDigit(question.charAt(0)) ||question.startsWith("q")))
 		{
 			queryId = question.substring(0,st).replace("q", "");
 			ret = question.substring(st+1);
 			System.out.println("Extract QueryId :"+queryId);
 		}
 		
 		return ret;
 	}
 }
--- a/src/qa/extract/EntityRecognition.java
+++ b/src/qa/extract/EntityRecognition.java
@@ -1,864 +0,0 @@
 package qa.extract;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;

 import fgmt.EntityFragment;
 import nlp.ds.Word;
 import qa.Globals;
 import rdf.EntityMapping;
 import rdf.NodeSelectedWithScore;
 import rdf.TypeMapping;
 import rdf.MergedWord;
 import utils.FileUtil;
 import addition.*;

 /**
 * Core class of Node Recognition
 * @author husen
 */
 public class EntityRecognition {
 	public String preLog = "";
 	public String stopEntFilePath = Globals.localPath + "data/DBpedia2016/parapharse/stopEntDict.txt";
 	
 	double EntAcceptedScore = 26;
 	double TypeAcceptedScore = 0.5;
 	double AcceptedDiffScore = 1;
 	
 	public ArrayList<MergedWord> mWordList = null;
 	public ArrayList<String> stopEntList = null;
 	public ArrayList<String> badTagListForEntAndType = null;
 	ArrayList<ArrayList<Integer>> selectedList = null;
 	
 	TypeRecognition tr = null;
 	AddtionalFix af = null;
 	
 	public EntityRecognition() 
 	{
 		// LOG
 		preLog = "";
 		loadStopEntityDict();
 		
 		// Bad posTag for entity
 		badTagListForEntAndType = new ArrayList<String>();
 		badTagListForEntAndType.add("RBS");
 		badTagListForEntAndType.add("JJS");
 		badTagListForEntAndType.add("W");
 		badTagListForEntAndType.add(".");
 		badTagListForEntAndType.add("VBD");
 		badTagListForEntAndType.add("VBN");
 		badTagListForEntAndType.add("VBZ");
 		badTagListForEntAndType.add("VBP");
 		badTagListForEntAndType.add("POS");
 		
 		// Additional fix for CATEGORY (in DBpedia)
 		af = new AddtionalFix();
 		tr = new TypeRecognition();
 		
 		System.out.println("EntityRecognizer Initial : ok!");
 	}
 	
 	public void loadStopEntityDict()
 	{
 		stopEntList = new ArrayList<String>();
 		try 
 		{
 			List<String> inputs = FileUtil.readFile(stopEntFilePath);
 			for(String line: inputs)
 			{
 				if(line.startsWith("#"))
 					continue;
 				stopEntList.add(line);
 			}	
 		} 
 		catch (Exception e) {
 			e.printStackTrace();
 		}
 	}
 	
 	public ArrayList<String> process(String question)
 	{
 		ArrayList<String> fixedQuestionList = new ArrayList<String>();
 		ArrayList<Integer> literalList = new ArrayList<Integer>();
 		HashMap<Integer, Double> entityScores = new HashMap<Integer, Double>();
 		HashMap<Integer, Integer> entityMappings = new HashMap<Integer, Integer>();
 		HashMap<Integer, Double> typeScores = new HashMap<Integer, Double>();
 		HashMap<Integer, String> typeMappings = new HashMap<Integer, String>();
 		HashMap<Integer, Double> mappingScores = new HashMap<Integer, Double>();
 		ArrayList<Integer> mustSelectedList = new ArrayList<Integer>();
 		
 		System.out.println("--------- entity/type recognition start ---------");
 		
 		Word[] words = Globals.coreNLP.getTaggedWords(question);
 		mWordList = new ArrayList<MergedWord>();
 		
 		long t1 = System.currentTimeMillis();
 		int checkEntCnt = 0, checkTypeCnt = 0, hitEntCnt = 0, hitTypeCnt = 0, allCnt = 0;
 		boolean needRemoveCommas = false;
 		
 		// Check entity & type
 		// Notice, ascending order by length
 		StringBuilder tmpOW = new StringBuilder();
 		StringBuilder tmpBW = new StringBuilder();
 		for(int len=1; len<=words.length; len++)
 		{
 			for(int st=0,ed=st+len; ed<=words.length; st++,ed++)
 			{
 				String originalWord = "", baseWord = "", allUpperWord = "";
 				//String[] posTagArr = new String[len];
 				for(int j=st; j<ed; j++)
 				{
 					//posTagArr[j-st] = words[j].posTag;
 					//originalWord += words[j].originalForm;
 					//baseWord += words[j].baseForm;
 					tmpOW.append(words[j].originalForm);
 					tmpBW.append(words[j].baseForm);
 					String tmp = words[j].originalForm;
 					if(tmp.length()>0 && tmp.charAt(0) >='a' && tmp.charAt(0)<='z')
 					{
 						String pre = tmp.substring(0,1).toUpperCase();
 						tmp = pre + tmp.substring(1);
 					}
 					allUpperWord += tmp;
 					
 					if(j < ed-1)
 					{
 						//originalWord += "_";
 						//baseWord += "_";
 						tmpOW.append("_");
 						tmpBW.append("_");
 					}
 				}
 				originalWord = tmpOW.toString();
 				baseWord=tmpBW.toString();
 				tmpOW.setLength(0);
 				tmpBW.setLength(0);
 				
 				allCnt++;
 /*
 * Filters to speed up and drop some bad cases.  
 */				
 				boolean entOmit = false, typeOmit = false;
 				int prep_cnt=0;
 				
 				// Upper words can pass filter. eg： "Melbourne , Florida"
 				int UpperWordCnt = 0;
 				for(int i=st;i<ed;i++)
 					if((words[i].originalForm.charAt(0)>='A' && words[i].originalForm.charAt(0)<='Z') 
 							|| ((words[i].posTag.equals(",") || words[i].originalForm.equals("'")) && i>st && i<ed-1))
 						UpperWordCnt++;
 				
 				// Filters 
 				if(UpperWordCnt<len || st==0)
 				{
 					if(st==0)
 					{
 						if(!words[st].posTag.startsWith("DT") && !words[st].posTag.startsWith("N"))
 						{
 							entOmit = true;
 							typeOmit = true;
 						}
 					}
 					else if(st>0)
 					{
 						Word formerWord = words[st-1];
 						//as princess
 						if(formerWord.baseForm.equals("as"))
 							entOmit = true;
 						//how many dogs?
 						if(formerWord.baseForm.equals("many"))
 							entOmit = true;
 						
 						//obama's daughter ; your height | len=1 to avoid: Asimov's Foundation series
 						if(len == 1 && (formerWord.posTag.startsWith("POS") || formerWord.posTag.startsWith("PRP")))
 							entOmit = true;
 						//the father of you
 						if(ed<words.length)
 						{
 							Word nextWord = words[ed];
 							if(formerWord.posTag.equals("DT") && nextWord.posTag.equals("IN"))
 								entOmit = true;
 						}
 						//the area code of ; the official language of
 						boolean flag1=false, flag2=false;
 						for(int i=0;i<=st;i++)
 							if(words[i].posTag.equals("DT"))
 								flag1 = true;
 						for(int i=ed-1;i<words.length;i++)
 							if(words[i].posTag.equals("IN"))
 								flag2 = true;
 						if(flag1 && flag2)
 							entOmit = true;
 					}
 					if(ed < words.length)
 					{
 						Word nextWord = words[ed];
 						// (lowerCase)+(UpperCase)
 						if(nextWord.originalForm.charAt(0)>='A' && nextWord.originalForm.charAt(0)<='Z')
 							entOmit = true;
 					}
 					
 					for(int i=st;i<ed;i++)
 					{
 						if(words[i].posTag.startsWith("I"))
 							prep_cnt++;
 						
 						for(String badTag: badTagListForEntAndType)
 						{
 							if(words[i].posTag.startsWith(badTag))
 							{
 								entOmit = true;
 								typeOmit = true;
 								break;
 							}
 						}
 						if(words[i].posTag.startsWith("P") && (i!=ed-1 || len==1)){
 							entOmit = true;
 							typeOmit = true;
 						}
 						// First word
 						if(i==st)
 						{
 							if(words[i].posTag.startsWith("I") || words[i].posTag.startsWith("EX") || words[i].posTag.startsWith("TO"))
 							{
 								entOmit = true;
 								typeOmit = true;
 							}
 							if(words[i].posTag.startsWith("D") && len==2){
 								entOmit = true;
 								typeOmit = true;
 							}
 							if(words[i].baseForm.startsWith("list") || words[i].baseForm.startsWith("many"))
 							{
 								entOmit = true;
 								typeOmit = true;
 							}
 							if(words[i].baseForm.equals("and"))
 							{
 								entOmit = true;
 								typeOmit = true;
 							}
 						}
 						// Last word.
 						if(i==ed-1)
 						{
 							if(words[i].posTag.startsWith("I") || words[i].posTag.startsWith("D") || words[i].posTag.startsWith("TO"))
 							{
 								entOmit = true;
 								typeOmit = true;
 							}
 							if(words[i].baseForm.equals("and"))
 							{
 								entOmit = true;
 								typeOmit = true;
 							}
 						}
 						// Single word.
 						if(len==1)
 						{
 							//TODO: Omit general noun. eg: father, book ...
 							if(!words[i].posTag.startsWith("N"))
 							{
 								entOmit = true;
 								typeOmit = true;
 							}
 						}
 					}
 					// Too many preposition. 
 					if(prep_cnt >= 3)
 					{
 						entOmit = true;
 						typeOmit = true;
 					}
 				}
 /*
 * Filter done.
 */
 							
 				// Search category | highest priority
 				String category = null;
 				if(af.pattern2category.containsKey(baseWord))
 				{
 					typeOmit = true;
 					entOmit = true;
 					category = af.pattern2category.get(baseWord);
 				}
 				
 				// Search type
 				int hitMethod = 0; // 1=dbo(baseWord), 2=dbo(originalWord), 3=yago|extend()
 				ArrayList<TypeMapping> tmList = new ArrayList<TypeMapping>();
 				if(!typeOmit)
 				{
 					System.out.println("Type Check:  "+originalWord);
 					//checkTypeCnt++;
 					//search standard type  
 					tmList = tr.getTypeIDsAndNamesByStr(baseWord);
 					if(tmList == null || tmList.size() == 0)
 					{
 						tmList = tr.getTypeIDsAndNamesByStr(originalWord);
 						if(tmList != null && tmList.size()>0)
 							hitMethod = 2;
 					}
 					else
 						hitMethod = 1;
 					
 					//Search extend type (YAGO type)
 					if(tmList == null || tmList.size() == 0)
 					{
 						tmList = tr.getExtendTypeByStr(allUpperWord);
 						if(tmList != null && tmList.size() > 0)
 						{
 							preLog += "++++ Extend Type detect: "+baseWord+": "+" prefferd relaiton:"+tmList.get(0).prefferdRelation+"\n";
 							hitMethod = 3;
 						}
 					}
 				}
 				
 				// Search entity
 				ArrayList<EntityMapping> emList = new ArrayList<EntityMapping>();
 				if(!entOmit && !stopEntList.contains(baseWord))
 				{
 					System.out.println("Ent Check: "+originalWord);
 					checkEntCnt++;
 					// Notice, the second parameter is whether use DBpedia Lookup.
 					emList = getEntityIDsAndNamesByStr(originalWord, (UpperWordCnt>=len-1 || len==1),len);
 					if(emList == null || emList.size() == 0)
 					{
 						emList = getEntityIDsAndNamesByStr(baseWord, (UpperWordCnt>=len-1 || len==1), len);
 					}
 					if(emList!=null && emList.size()>10)
 					{
 						ArrayList<EntityMapping> tmpList = new ArrayList<EntityMapping>();
 						for(int i=0;i<10;i++)
 						{
 							tmpList.add(emList.get(i));
 						}
 						emList = tmpList;
 					}
 				}
 				
 				MergedWord mWord = new MergedWord(st,ed,originalWord);
 				
 				// Add category
 				if(category != null)
 				{
 					mWord.mayCategory = true;
 					mWord.category = category;
 					int key = st*(words.length+1) + ed;
 					mustSelectedList.add(key);
 				}
 				
 				// Add literal
 				if(len==1 && checkLiteralWord(words[st]))
 				{
 					mWord.mayLiteral = true;
 					int key = st*(words.length+1) + ed;
 					literalList.add(key);
 				}
 				
 				// Add type mappings
 				if(tmList!=null && tmList.size()>0)
 				{
 					// Drop by score threshold
 					if(tmList.get(0).score < TypeAcceptedScore)
 						typeOmit = true;

 					// Only allow EXACT MATCH when method=1|2
 					// TODO: consider approximate match and taxonomy. eg, actor->person
 					String likelyType = tmList.get(0).typeName.toLowerCase();
 					String candidateBase = baseWord.replace("_", ""), candidateOriginal = originalWord.replace("_", "").toLowerCase();
 					if(!candidateBase.equals(likelyType) && hitMethod == 1)
 						typeOmit = true;
 					if(!candidateOriginal.equals(likelyType) && hitMethod == 2)
 						typeOmit = true;
 					
 					if(!typeOmit)
 					{
 						mWord.mayType = true;
 						mWord.tmList = tmList;
 						
 						int key = st*(words.length+1) + ed;
 						typeMappings.put(key, tmList.get(0).typeName);
 						typeScores.put(key, tmList.get(0).score);
 					}
 				}
 				
 				// Add entity mappings
 				if(emList!=null && emList.size()>0)
 				{
 					// Drop by score threshold
 					if(emList.get(0).score < EntAcceptedScore)
 						entOmit = true;
 					
 					// Drop: the [German Shepherd] dog
 					else if(len > 2)
 					{
 						for(int key: entityMappings.keySet())
 						{
 							//int te=key%(words.length+1);
 							int ts=key/(words.length+1);
 							if(ts == st+1 && ts <= ed)
 							{
 								//DT in lowercase (allow uppercase, such as: [The Pillars of the Earth])
 								if(words[st].posTag.startsWith("DT") && !(words[st].originalForm.charAt(0)>='A'&&words[st].originalForm.charAt(0)<='Z'))
 								{
 									entOmit = true;
 								}
 							}
 						}
 					}
 					
 					// Record info in merged word
 					if(!entOmit)
 					{
 						mWord.mayEnt = true;
 						mWord.emList = emList;
 					
 						// use to remove duplicate and select
 						int key = st*(words.length+1) + ed;
 						entityMappings.put(key, emList.get(0).entityID);
 						
 						// fix entity score | conflict resolution
 						double score = emList.get(0).score;
 						String likelyEnt = emList.get(0).entityName.toLowerCase().replace(" ", "_");
 						String lowerOriginalWord = originalWord.toLowerCase();
 						// !Award: whole match
 						if(likelyEnt.equals(lowerOriginalWord))
 							score *= len;
 						// !Award: COVER (eg, Robert Kennedy: [Robert] [Kennedy] [Robert Kennedy])
 						//e.g, Social_Democratic_Party -> all ents -> drop the overlapped smaller ones
 						//e.g, Abraham_Lincoln -> select the whole word
 						if(len>1)
 						{
 							boolean[] flag = new boolean[words.length+1];
 							ArrayList<Integer> needlessEntList = new ArrayList<Integer>();
 							double tmpScore=0;
 							for(int preKey: entityMappings.keySet())
 							{
 								if(preKey == key)
 									continue;
 								int te=preKey%(words.length+1),ts=preKey/(words.length+1);
 								for(int i=ts;i<te;i++)
 									flag[i] = true;
 								if(st<=ts && ed>= te)
 								{
 									needlessEntList.add(preKey);
 									tmpScore += entityScores.get(preKey);
 								}
 							}
 							int hitCnt = 0;
 							for(int i=st;i<ed;i++)
 								if(flag[i])
 									hitCnt++;
 							// WHOLE match || HIGH match & HIGH upper || WHOLE upper
 							if(hitCnt == len || ((double)hitCnt/(double)len > 0.6 && (double)UpperWordCnt/(double)len > 0.6) || UpperWordCnt == len || len>=4)
 							{
 								boolean commaTotalRight = true;
 								if(originalWord.contains(","))
 								{
 									String candidateCompactString = originalWord.replace(",","").replace("_", "").toLowerCase();
 									String likelyCompactEnt = likelyEnt.replace(",","").replace("_", "");
 									if(!candidateCompactString.equals(likelyCompactEnt))
 										commaTotalRight = false;
 									else
 									{
 										mWord.name = mWord.name.replace("_,_","_");
 										needRemoveCommas = true;
 									}
 								}
 								if(commaTotalRight)
 								{
 									mustSelectedList.add(key);
 									if(tmpScore>score)
 										score = tmpScore+1;
 									for(int preKey: needlessEntList)
 									{
 										entityMappings.remove(preKey);
 										mustSelectedList.remove(Integer.valueOf(preKey));
 									}
 								}
 							}
 						}
 						//NOTICE: score in mWord have no changes. we only change the score in entityScores.
 						entityScores.put(key,score);
 					}
 				}
 				
 				if(mWord.mayCategory || mWord.mayEnt || mWord.mayType || mWord.mayLiteral)
 					mWordList.add(mWord);
 			}
 		}
 		
 		/* Print all candidates (use fixed score).*/
 		System.out.println("------- Result ------");
 		for(MergedWord mWord: mWordList)
 		{
 			int key = mWord.st * (words.length+1) + mWord.ed;
 			if(mWord.mayCategory)
 			{
 				System.out.println("Detect category mapping: "+mWord.name+": "+ mWord.category +" score: 100.0");
 	        	preLog += "++++ Category detect: "+mWord.name+": "+mWord.category+" score: 100.0\n";
 			}
 			if(mWord.mayEnt)
 			{
 				System.out.println("Detect entity mapping: "+mWord.name+": [");
 				for(EntityMapping em: mWord.emList)
 					System.out.print(em.entityName + ", ");
 				System.out.println("]");
 	        	preLog += "++++ Entity detect: "+mWord.name+": "+mWord.emList.get(0).entityName+" score:"+entityScores.get(key)+"\n";
 				hitEntCnt++;
 			}
 			if(mWord.mayType)
 			{
 				System.out.println("Detect type mapping: "+mWord.name+": [");
 				for(TypeMapping tm: mWord.tmList)
 					System.out.print(tm.typeName + ", ");
 				System.out.println("]");
 	    		preLog += "++++ Type detect: "+mWord.name+": "+mWord.tmList.get(0).typeName +" score:"+typeScores.get(key)+"\n";
 				hitTypeCnt++;
 			}
 			if(mWord.mayLiteral)
 			{
 				System.out.println("Detect literal: "+mWord.name);
 				preLog += "++++ Literal detect: "+mWord.name+"\n";
 			}
 		}
 		
 		/*
 		 * Sort by score and remove duplicate.
 		 * eg, <"video_game" "ent:Video game" "50.0"> <"a_video_game" "ent:Video game" "45.0">.
 		 * Notice, reserve all information in mWordList.
 		 */
 		// one ENT maps different mergedWord in query, reserve the higher score.
 		ByValueComparator bvc = new ByValueComparator(entityScores,words.length+1);
 		List<Integer> keys = new ArrayList<Integer>(entityMappings.keySet());
        Collections.sort(keys, bvc);
        for(Integer key : keys)
        {
        	if(!mappingScores.containsKey(entityMappings.get(key)))
        		mappingScores.put(entityMappings.get(key), entityScores.get(key));
        	else
        		entityMappings.remove(key);
        }
        
        selectedList = new ArrayList<ArrayList<Integer>>();
        ArrayList<Integer> selected = new ArrayList<Integer>();
        
        // Some phrases must be selected.
        selected.addAll(mustSelectedList);
        for(Integer key: typeMappings.keySet())
        {
        	// !type(len>1) (Omit len=1 because: [Brooklyn Bridge] is a entity.
        	int ed = key%(words.length+1), st = key/(words.length+1);
        	if(st+1 < ed)
        	{
        		boolean beCovered = false;
        		//Entity cover type, eg:[prime_minister of Spain]
 				for(int preKey: entityMappings.keySet())
 				{
 					int te=preKey%(words.length+1),ts=preKey/(words.length+1);
 					//Entiy should longer than type
 					if(ts <= st && te >= ed && ed-st < te-ts)
 					{
 						beCovered = true;
 					}
 				}
 				
 				if(!beCovered)
 					selected.add(key);
        	}
        }
        
        // Conflict resolution
        ArrayList<Integer> noConflictSelected = new ArrayList<Integer>();
    	
 		//select longer one when conflict
 		boolean[] flag = new boolean[words.length];
 		ByLenComparator blc = new ByLenComparator(words.length+1);
 		Collections.sort(selected,blc);
 		  
 		for(Integer key : selected) 
 		{
 			int ed = key%(words.length+1), st = (key-ed)/(words.length+1);
 		  	boolean omit = false;
 		  	for(int i=st;i<ed;i++)
 		  	{
 		  		if(flag[i])
 		  		{
 		  			omit = true;
 		  			break;
 		  		}
 		  	}
 		  	if(omit)
 		  		continue;
 		  	for(int i=st;i<ed;i++)
 		  		flag[i]=true;
 		  	noConflictSelected.add(key);
 		}
 		
 		// Scoring and ranking --> top-k decision
        dfs(keys,0,noConflictSelected,words.length+1);
        ArrayList<NodeSelectedWithScore> nodeSelectedWithScoreList = new ArrayList<NodeSelectedWithScore>();
        for(ArrayList<Integer> select: selectedList)
        {
        	double score = 0;
        	for(Integer key: select)
        	{
        		if(entityScores.containsKey(key))
        			score += entityScores.get(key);
        		if(typeScores.containsKey(key))
        			score += typeScores.get(key);
        	}
        	NodeSelectedWithScore tmp = new NodeSelectedWithScore(select, score);
        	nodeSelectedWithScoreList.add(tmp);
        }
        Collections.sort(nodeSelectedWithScoreList);
        
        // Replace
        int cnt = 0;
        for(int k=0; k<nodeSelectedWithScoreList.size(); k++)
        {
        	if(k >= nodeSelectedWithScoreList.size())
        		break;
        	selected = nodeSelectedWithScoreList.get(k).selected;
   
        	Collections.sort(selected);
 	        int j = 0;
 	        String res = question;
 	        if(selected.size()>0)
 	        {
 		        res = words[0].originalForm;
 		        int tmp = selected.get(j++), st = tmp/(words.length+1), ed = tmp%(words.length+1);
 		        for(int i=1;i<words.length;i++)
 		        {
 		        	if(i>st && i<ed)
 		        	{
 		        		res = res+"_"+words[i].originalForm;
 		        	}
 		        	else
 		        	{
 		        		res = res+" "+words[i].originalForm;
 		        	}
 		        	if(i >= ed && j<selected.size())
 		        	{
 		        		tmp = selected.get(j++);
 		        		st = tmp/(words.length+1);
 		        		ed = tmp%(words.length+1);
 		        	}
 		        }
 	        }
 	        else
 	        {
 	        	res = words[0].originalForm;
 		        for(int i=1;i<words.length;i++)
 		        {
 		        	res = res+" "+words[i].originalForm;
 		        }
 	        }
 	        
 	        boolean ok = true;
 	        for(String str: fixedQuestionList)
 	        	if(str.equals(res))
 	        		ok = false;
 	        if(!ok)
 	        	continue;
 	        
 	        if(needRemoveCommas)
 	        	res = res.replace("_,_","_");
 	        
 	        System.out.println("Merged: "+res);
 	        preLog += "plan "+cnt+": "+res+"\n";
 	        fixedQuestionList.add(res);
 	        cnt++;
 	        if(cnt >= 3)	// top-3
 	        	break;
        }
        long t2 = System.currentTimeMillis();
 //        preLog += "Total hit/check/all ent num: "+hitEntCnt+" / "+checkEntCnt+" / "+allCnt+"\n";
 //        preLog += "Total hit/check/all type num: "+hitTypeCnt+" / "+checkTypeCnt+" / "+allCnt+"\n";
        preLog += "Node Recognition time: "+ (t2-t1) + "ms\n";
 		System.out.println("Total check time: "+ (t2-t1) + "ms");
 		System.out.println("--------- pre entity/type recognition end ---------");
 		
 		return fixedQuestionList;
 	}
 	
 	public void dfs(List<Integer> keys,int dep,ArrayList<Integer> selected,int size)
 	{
 		if(dep == keys.size())
 		{
 			ArrayList<Integer> tmpList = (ArrayList<Integer>) selected.clone();
 			selectedList.add(tmpList);
 		}
 		else
 		{
 			//off: dep-th mWord
 			dfs(keys,dep+1,selected,size);
 			//on: no conflict
 			boolean conflict = false;
 			for(int preKey: selected)
 			{
 				int curKey = keys.get(dep);
 				int preEd = preKey%size, preSt = (preKey-preEd)/size;
 				int curEd = curKey%size, curSt = (curKey-curEd)/size;
 				if(!(preSt<preEd && preEd<=curSt && curSt<curEd) && !(curSt<curEd && curEd<=preSt && preSt<preEd))
 					conflict = true;
 			}
 			if(!conflict)
 			{
 				selected.add(keys.get(dep));
 				dfs(keys,dep+1,selected,size);
 				selected.remove(keys.get(dep));
 			}
 		}
 		
 	}
 	
 	public ArrayList<EntityMapping> getEntityIDsAndNamesByStr(String entity, boolean useDblk, int len) 
 	{	
 		String n = entity;
 		ArrayList<EntityMapping> ret= new ArrayList<EntityMapping>();
 		
 		//1. Lucene index
 		ret.addAll(EntityFragment.getEntityMappingList(n));
 		
 		//2. DBpedia Lookup (some cases)
 		if (useDblk) 
 		{ 
 			ret.addAll(Globals.dblk.getEntityMappings(n, null));
 		}
 		
 		Collections.sort(ret);
 		
 		if (ret.size() > 0) return ret;
 		else return null;
 	}
 	
 	public int preferDBpediaLookupOrLucene(String entityName)
 	{
 		int cntUpperCase = 0;
 		int cntSpace = 0;
 		int cntPoint = 0;
 		int length = entityName.length();
 		for (int i=0; i<length; i++)
 		{
 			char c = entityName.charAt(i);
 			if (c==' ')
 				cntSpace++;
 			else if (c=='.')
 				cntPoint++;
 			else if (c>='A' && c<='Z')
 				cntUpperCase++;
 		}
 		
 		if ((cntUpperCase>0 || cntPoint>0) && cntSpace<3)
 			return 1;
 		if (cntUpperCase == length)
 			return 1;
 		return 0;		
 	}
 	
 	static class ByValueComparator implements Comparator<Integer> {
        HashMap<Integer, Double> base_map;
        int base_size;
        double eps = 1e-8;
        
        int dblcmp(double a,double b)
        {
        	if(a+eps < b)
        		return -1;
        	return b+eps<a ? 1:0;
        }
 
        public ByValueComparator(HashMap<Integer, Double> base_map, Integer size) {
            this.base_map = base_map;
            this.base_size = size;
        }
 
        public int compare(Integer arg0, Integer arg1) {
            if (!base_map.containsKey(arg0) || !base_map.containsKey(arg1)) {
                return 0;
            }
 
            if (dblcmp(base_map.get(arg0),base_map.get(arg1))<0) {
                return 1;
            } 
            else if (dblcmp(base_map.get(arg0),base_map.get(arg1))==0) 
            {
            	int len0 = (arg0%base_size)-arg0/base_size , len1 = (arg1%base_size)-arg1/base_size;
                if (len0 < len1) {
                    return 1;
                } else if (len0 == len1) {
                    return 0;
                } else {
                    return -1;
                }
            } 
            else {
                return -1;
            }
        }
    }
 	
 	static class ByLenComparator implements Comparator<Integer> {
        int base_size;
 
        public ByLenComparator(int size) {
            this.base_size = size;
        }
 
        public int compare(Integer arg0, Integer arg1) {
        	int len0 = (arg0%base_size)-arg0/base_size , len1 = (arg1%base_size)-arg1/base_size;
            if (len0 < len1) {
                return 1;
            } else if (len0 == len1) {
                return 0;
            } else {
                return -1;
            }
        }
    }
 	 
 	public boolean isDigit(char ch)
 	{
 		if(ch>='0' && ch<='9')
 			return true;
 		return false;
 	}
 	
 	//TODO: other literal words.
 	public boolean checkLiteralWord(Word word)
 	{
 		boolean ok = false;
 		if(word.posTag.equals("CD"))
 			ok = true;
 		return ok;
 	}
 	
 	public static void main (String[] args) 
 	{
 		Globals.init();
 		EntityRecognition er = new EntityRecognition();
 		try 
 		{
 			BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
 			while (true) 
 			{	
 				System.out.println("Please input the question: ");
 				String question = br.readLine();
 				
 				er.process(question);
 			}
 	
 		} catch (IOException e) {
 			e.printStackTrace();
 		}
 	}

 }
--- a/src/qa/extract/EntityRecognitionCh.java
+++ b/src/qa/extract/EntityRecognitionCh.java
@@ -0,0 +1,566 @@
 package qa.extract;

 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.io.IOException;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;

 import lcn.EntityFragmentFields;

 import com.huaban.analysis.jieba.JiebaSegmenter;
 import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;
 import com.huaban.analysis.jieba.SegToken;

 import edu.stanford.nlp.util.Pair;
 import fgmt.TypeFragment;
 import qa.Query;
 import rdf.EntityMapping;
 import rdf.TypeMapping;
 import nlp.ds.*;
 import utils.FileUtil;

 final class MODNUM
 {
 	public static int prime=9999991;
 }
 //TODO: replace by nlp.ds.word
 class Word
 {
 	//type:0=normal word 1=entity 2=literal(string)
 	String word;
 	int type;
 	int pos=0;
 	List<String> entList=null;
 	Word(String w)
 	{
 		word=w;
 		type=0;
 	}	
 	Word(String w,int i)
 	{
 		word=w;
 		type=i;
 	}
 	Word(String w,int i, int j)
 	{
 		word=w;
 		type=i;
 		pos=j;
 	}	
 	Word(String w,int i, int j,List<String> l)
 	{
 		word=w;
 		type=i;
 		pos=j;
 		entList=l;
 	}	
 }

 class Ent
 {
 	public final int mod=MODNUM.prime;
 	public String entity_name,mention;
 	public int no;
 	public long hashe,hashm;
 	public Ent(String load)
 	{
 		int indexOf9=load.indexOf(9);
 		if (indexOf9>=0)
 		{
 			mention=load.substring(0, indexOf9);
 			String tmp=load.substring(indexOf9+1);
 			int t9=tmp.indexOf(9);
 			if (t9>=0)
 			{
 				entity_name=tmp.substring(0, t9);
 				String numberStr=tmp.substring(t9+1);
 				try
 				{
 					no=Integer.valueOf(numberStr);
 				}catch(Exception e){no=-1;};
 			}
 			else entity_name=tmp;
 			hashe=calHash(entity_name);			
 		}
 		else
 		{
 			mention=load;
 			hashe=-1;
 		}
 		hashm=calHash(mention);
 	}
 	public long calHash(String p)
 	{
 		long x=0;
 		if (p==null || p.length()==0) return 0;
 		for (int i=0;i<p.length();i++)
 		{
 			x=x*65536+(long)(int)p.charAt(i);
 			x=x%mod;
 		}
 		return x;
 	}
 	@Override
 	public int hashCode()
 	{
 		return (int)hashm;
 	}
 	public Ent(){};
 }

 public class EntityRecognitionCh {
 	public static HashMap<String, List<String>> entMap,nentMap;
 	public static JiebaSegmenter segmenter = new JiebaSegmenter();
 	
 	public final static int MaxEnt=20;
 	
 	static
 	{
 		long t0 = System.currentTimeMillis();
 		List<String> nent = FileUtil.readFile("data/pkubase/paraphrase/ccksminutf.txt");
 		List<String> mention2ent = FileUtil.readFile("data/pkubase/paraphrase/mini-mention2ent.txt");		

 		entMap=new HashMap<>();
 		nentMap=new HashMap<>();

 		System.out.println("Mention2Ent size: " + mention2ent.size());
 		for (String input:mention2ent)
 		{
 			Ent q=new Ent(input);
 			if (entMap.containsKey(q.mention)) 
 				entMap.get(q.mention).add(q.entity_name);
 			else
 			{
 				List<String> l=new ArrayList<>();
 				l.add(q.entity_name);
 				entMap.put(q.mention, l);
 			}
 		}
 		// mention: NOT ent word; entity_name: frequency	
 		for (String input:nent)
 		{
 			Ent q=new Ent(input);
 			if (nentMap.containsKey(q.mention)) 
 				nentMap.get(q.mention).add(q.entity_name);
 			else
 			{
 				List<String> l=new ArrayList<>();
 				l.add(q.entity_name);
 				nentMap.put(q.mention, l);
 			}
 		}		
 		
 		long t1 = System.currentTimeMillis();
 		System.out.println("Read Mention2Ent used "+(t1-t0)+"ms");	
 	}
 	
 	public static boolean isAllNumber(String q)
 	{
 		boolean ret=true;
 		for (int i=0;i<q.length();i++)
 		{
 			if (q.charAt(i)<48 || q.charAt(i)>57) return false;
 		}
 		return ret;
 	}
 	public static String longestFirst2(String Question)
 	{
 		String ret="";
 		String input=Question.replace('{',' ').replace('}',' ');
 		
 		int len=input.length();
 		int[][] ex=new int[len+3][];
 		Ent[][] entx=new Ent[len+3][];
 		for (int i=0;i<len+2;i++) ex[i]=new int[len+3];
 		for (int i=0;i<len+2;i++) entx[i]=new Ent[len+3];
 		for (int l=1;l<=len;l++) 
 		{
 			int pos=0;
 			for (int j=l-1;j<len;j++)
 			{
 				String searchstr=input.substring(j-l+1,j+1);
 				List<String> rstlist=entMap.get(searchstr);

 				if (rstlist!=null && rstlist.size()>0)
 				{
 					++pos;
 					ex[l][pos]=j;
 					entx[l][pos]=new Ent(searchstr);
 				}
 			}
 			ex[l][0]=pos;
 		}	
 		int covered[]=new int[len+3];
 		for (int l=len;l>=1;l--)
 		{
 			for (int p=1;p<=ex[l][0];p++)
 			{
 				int flag=1;
 				for (int k=ex[l][p];k>=ex[l][p]-l+1;k--) if (covered[k]>0) flag=0;
 				if (flag==1)
 				{
 					//1:占用  2:词头 4:词尾  8:其他
 					int FLAG=0;
 					List<String> nlist=nentMap.get(entx[l][p].mention);
 					if (nlist!=null && nlist.size()>0) FLAG=8;
 					if (isAllNumber(entx[l][p].mention)) FLAG=8;
 					
 					covered[ex[l][p]]|=4;
 					covered[ex[l][p]-l+1]|=2;
 					for (int k=ex[l][p];k>=ex[l][p]-l+1;k--)
 					{
 						covered[k]|=1|FLAG;
 					}
 				}
 			}
 		}
 		
 		for (int i=0;i<len;i++)
 		{
 			if ((covered[i]&2)!=0 && (covered[i]&8)==0) ret=ret+"{";
 			ret=ret+Question.charAt(i);
 			if ((covered[i]&4)!=0 && (covered[i]&8)==0) ret=ret+"}";
 		}
 		//System.out.println("Longest First: "+ret);
 		//System.out.println("Time: "+(t1-t0)+"ms");
 		return ret;
 	}
 	//1->①
 	public static String intToCircle(int i)
 	{
 		if (0>i || i>20) return null;
 		String ret="";
 		ret=ret+(char)(9311+i);
 		return ret;
 	}
 	//①->1
 	public static int circleToInt(String i)
 	{
 		int ret=i.charAt(0)-9311;
 		if (0<ret&& ret<20) return ret;
 		else return -1;
 	}
 	public static Pair<String,List<Word>> processedString(String s)
 	{
 		List<Word> ret=new ArrayList<>();
 		String sentence = "";
 		int flag=0;
 		String word="";
 		for (int i=0;i<s.length();i++)
 		{
 			if (s.charAt(i)=='{')
 			{
 				flag=1;
 				continue;
 			}
 			if (s.charAt(i)=='}')
 			{
 				if (word.length()<=2)
 				{
 					sentence+=word;
 					word="";
 					flag=0;
 					continue;
 				}
 				int FLAG=-1;
 				for (Word j:ret)
 					if (word.equals(j.word)) 
 						FLAG=j.pos;
 				if (FLAG==-1)
 				{
 					flag=0;
 					ret.add(new Word(word,1,ret.size()+1));
 					word="";
 					sentence+=intToCircle(ret.size());
 					continue;
 				}
 				else
 				{
 					flag=0;
 					word="";
 					sentence+=intToCircle(FLAG);
 					continue;
 				}
 			}
 			if (flag==0) sentence+=s.charAt(i);
 			if (flag==1) word=word+s.charAt(i);
 		}
 		return new Pair<String,List<Word>>(sentence,ret);
 	}
 	public static String reprocess(List<Word> d, List<SegToken> list)
 	{
 		String ret="";
 		
 		int used[]=new int[list.size()+1];
 		int isValid[]=new int[list.size()+1];
 		for (int i=0;i<list.size();i++) isValid[i]=0;
 		
 		
 		for(int len=4;len>=1;len--)
 		{
 			for (int i=0;i<list.size()-len+1;i++)
 			{
 				String tmp="";
 				int flag=1;
 				for (int j=i;j<i+len;j++)
 				{
 					tmp=tmp+list.get(j).word;
 					if (tmp.length()>4) flag=0;
 					if (circleToInt(list.get(j).word)>=0) flag=0;
 					if (used[j]==1) flag=0;
 				}
 				if (flag==0) continue;
 				List<String> rstlist=entMap.get(tmp);
 				List<String> nlist=nentMap.get(tmp);
 				if (nlist!=null && nlist.size()>0)
 				{
 					for (int j=i;j<i+len;j++) 
 					{
 						used[j]=1;	
 					}
 				}
 				if (rstlist!=null && rstlist.size()>0 && (nlist==null||nlist.size()==0))
 				{
 					for (int j=i;j<i+len;j++) used[j]=1;
 					int pos=-1;
 					for (Word k:d) if (tmp.equals(k.word))
 					{
 						pos=k.pos;break;
 					}
 					if (pos>0) 
 					{
 						isValid[i]=pos;
 						for (int j=i+1;j<i+len;j++)isValid[j]=-1;
 					}
 					else
 					{
 						d.add(new Word(tmp,1,d.size()+1));
 						isValid[i]=d.size();
 						for (int j=i+1;j<i+len;j++)isValid[j]=-1;
 					}
 				}

 			}
 		}
 		for (int i=0;i<list.size();i++)
 		{
 			if (isValid[i]==0)
 			{
 				ret=ret+list.get(i).word;
 			}
 			if (isValid[i]>0)
 			{
 				ret=ret+intToCircle(isValid[i]);
 			}
 		}
 		return ret;
 	}
 	public static String removeQueryId2(String question)
 	{
 		String ret = question;
 		int st = question.indexOf(":");
 		if(st!=-1 && st<6  && question.length()>4 && ((question.charAt(0)>='0' && question.charAt(0)<='9') ||question.charAt(0)=='q'))
 		{
 			ret = question.substring(st+1);
 		}
 		return ret;
 	}
 	public static String thirdprocess(String sentence,List<Word> d)
 	{
 		String temp="",rets2="";
 		int insyh=0;
 		int count=0;
 		List<Integer> lst=new ArrayList<>();
 		String syh="";
 		for (int i=0;i<sentence.length();i++)
 		{
 			if (circleToInt(""+sentence.charAt(i))!=-1)
 			{
 				count++;
 			}
 			else
 			{
 				if (count>=3)
 				{
 					String newent="";
 					for (int j=i-count;j<i;j++)
 					{
 						newent+=d.get(circleToInt(""+sentence.charAt(j))-1).word;
 					}
 					temp+=intToCircle(d.size());
 					d.add(new Word(newent,2,d.size()+1));
 				}
 				else
 					for (int j=i-count;j<i;j++)
 					{
 						temp+=sentence.charAt(j);
 					}
 				temp+=sentence.charAt(i);
 				count=0;
 			}
 		}	
 		for (int i=0;i<temp.length();i++)
 		{
 			if (temp.charAt(i)=='"'&&insyh==0 || temp.charAt(i)=='“')
 			{
 				insyh=1;
 				syh="";
 				rets2+=temp.charAt(i);
 			}
 			else if (temp.charAt(i)=='"'&&insyh==1 || temp.charAt(i)=='”')
 			{
 				insyh=0;
 				if (lst.size()>=1)
 				{
 					String rp="";
 					for (int j=0;j<syh.length();j++)
 					{
 						int q=circleToInt(""+syh.charAt(j));
 						if (q==-1) 
 							rp+=syh.charAt(j);
 						else
 						{
 							rp+=d.get(q-1).word;
 							//ret[q]="";
 						}
 					}
 					d.add(new Word(rp,2,d.size()+1));
 					rets2+=intToCircle(d.size())+temp.charAt(i);
 				}
 				else
 				{
 					rets2+=syh+temp.charAt(i);
 				}
 			}
 			else if (insyh==1)
 			{
 				if (circleToInt(""+temp.charAt(i))!=-1)
 					lst.add(circleToInt(""+temp.charAt(i)));
 				syh+=temp.charAt(i);
 			}
 			else
 				rets2+=temp.charAt(i);
 		}
 		return rets2;
 	}
 	
 	public static Pair<String,List<Word>> parse(String input, JiebaSegmenter segmenter)
 	{
 //		input=removeQueryId2(input);	// Remove query id before.
 		String newinput=longestFirst2 (input);

 		Pair<String,List<Word>> d=null,r=new Pair<String,List<Word>>();
 		r.second=new ArrayList<>();
 		try {
 			d=processedString(newinput);
 		} catch (Exception e) {
 			System.out.println(e);
 		}
 		if (d!=null)
 		{
 			//System.out.println(d.first);
 			
 			List<SegToken> q=segmenter.process(d.first, SegMode.SEARCH);
 			String secondstr="";
 			for (SegToken t:q)
 			{
 				secondstr=secondstr+t.word+",";
 			}
 			//System.out.println("First process: "+secondstr);

 			String finalstring="";
 			String stickstr=reprocess(d.second,q);
 			String thirdstr=thirdprocess(stickstr,d.second);
 			
 			List<SegToken> q2=segmenter.process(thirdstr, SegMode.SEARCH);
 			for (SegToken t:q2)
 			{
 				finalstring=finalstring+t.word+",";
 				int p=circleToInt(""+t.word.charAt(0));
 				if (p!=-1)
 				{
 					Word ds=d.second.get(p-1);
 					r.second.add(new Word(ds.word,ds.type,ds.pos,entMap.get(ds.word)));
 				}
 				else
 				{
 					r.second.add(new Word(t.word,0,-1));
 				}
 			}
 			
 			System.out.println("Result: "+finalstring);
 			
 			r.first=thirdstr;
 			
 			return r;
 		}
 		else return null;
 	}
 	
 	public static List<nlp.ds.Word> parseSentAndRecogEnt(String sent)
 	{
 		Pair<String, List<Word>> result = parse(sent, segmenter);
 		if(result == null)
 			return null;
 		
 		List<nlp.ds.Word> words = new ArrayList<nlp.ds.Word>();
 		int position = 1;
 		for(Word ow: result.second)
 		{
 			// Note: jieba postag is deprecated, so we utilize stanford parser to get postag in later. 
 			nlp.ds.Word word = new nlp.ds.Word(ow.word, ow.word, null, position++);
 			words.add(word);
 			if(ow.type == 1 && ow.entList != null)
 			{
 				// Now just consider TYPE there in a smiple way.
 				if(TypeFragment.typeShortName2IdList.containsKey(ow.word))
 				{
 					word.mayType = true;
 					word.tmList.add(new TypeMapping(TypeFragment.typeShortName2IdList.get(ow.word).get(0), ow.word, 100.0));
 				}
 				word.mayEnt = true;
 				word.emList = new ArrayList<EntityMapping>();
 				double score = 100;
 				for(String ent: ow.entList)
 				{
 					if(EntityFragmentFields.entityName2Id.containsKey(ent))
 					{
 						//TODO: consider more suitable entity score
 						int eid = EntityFragmentFields.entityName2Id.get(ent);
 //						String fstr = EntityFragmentFields.entityFragmentString.get(eid);
 //						System.out.println(eid+"\t"+fstr);
 						word.emList.add(new EntityMapping(eid, ent, score));
 						score -= 10;
 					}
 				}
 			}
 			else if(ow.type == 2)
 				word.mayLiteral = true;
 			// TODO: consider TYPE
 		}
 		
 		return words;
 	}
 	
 	public static void main(String[] args) throws IOException {
 		
 		EntityFragmentFields.load();
 		
 		List<String> inputList = FileUtil.readFile("data/test/mini-ccks.txt");
 		
 		for(String input: inputList) 
 		{
 			if (input.length()<2 || input.charAt(0)!='q') continue;
 			System.out.println("----------------------------------------");
 			System.out.println(input);
 			EntityRecognitionCh.parseSentAndRecogEnt(input);
 		}

 	}

 }

--- a/src/qa/extract/ExtractImplicitRelation.java
+++ b/src/qa/extract/ExtractImplicitRelation.java
@@ -19,7 +19,6 @@ import log.QueryLogger;
 import fgmt.EntityFragment;
 import fgmt.TypeFragment;
 import nlp.ds.Word;
 import nlp.tool.CoreNLP;

 public class ExtractImplicitRelation {
 	
@@ -374,7 +373,7 @@ public class ExtractImplicitRelation {
 	
 	public static void main(String[] args) throws Exception {
 		
 		Globals.coreNLP = new CoreNLP();
 //		Globals.coreNLP = new CoreNLP();
 		Globals.pd = new ParaphraseDictionary();
 		try 
 		{
--- a/src/qa/extract/ExtractRelation.java
+++ b/src/qa/extract/ExtractRelation.java
@@ -28,8 +28,6 @@ public class ExtractRelation {
 	public ArrayList<SimpleRelation> findRelationsBetweenTwoUnit(SemanticUnit su1, SemanticUnit su2, QueryLogger qlog)
 	{
 		DependencyTree T = qlog.s.dependencyTreeStanford;
 		if(qlog.isMaltParserUsed)
 			T = qlog.s.dependencyTreeMalt;
 		
 		DependencyTreeNode n1 = T.getNodeByIndex(su1.centerWord.position), n2 = T.getNodeByIndex(su2.centerWord.position);
 		ArrayList<DependencyTreeNode> shortestPath = T.getShortestNodePathBetween(n1,n2);
--- a/src/qa/extract/TypeRecognition.java
+++ b/src/qa/extract/TypeRecognition.java
@@ -90,15 +90,7 @@ public class TypeRecognition {
 		if(allUpperFormWord.length() > 1 && allUpperFormWord.substring(1).equals(allUpperFormWord.substring(1).toLowerCase()))
 			return null;
 		
 		//search in YAGO type
 		if(TypeFragment.yagoTypeList.contains(allUpperFormWord))
 		{
 			//YAGO prefix
 			String typeName = "yago:"+allUpperFormWord;
 			TypeMapping tm = new TypeMapping(-1,typeName,Globals.pd.typePredicateID,1);
 			tmList.add(tm);
 		}
 		else if(extendTypeMap.containsKey(allUpperFormWord))
 		if(extendTypeMap.containsKey(allUpperFormWord))
 		{
 			String typeName = extendTypeMap.get(allUpperFormWord);
 			TypeMapping tm = new TypeMapping(-1,typeName,Globals.pd.typePredicateID,1);
@@ -251,22 +243,22 @@ public class TypeRecognition {
 				}
 			}
 			// type
 			else if(sr.arg1Word.mayType)
 			else if(sr.arg1Word.mayType) //TODO: type
 			{
 				//rule in/of [type] -> constant  |eg, How many [countries] are there in [exT:Europe] -> ?uri rdf:type yago:EuropeanCountries
 				if(arg1WordPos >= 2 && (words[arg1WordPos-1].baseForm.equals("in") || words[arg1WordPos-1].baseForm.equals("of"))  
 						&& !words[arg1WordPos-2].posTag.startsWith("V"))
 				{
 					sr.isArg1Constant = true;
 					double largerScore = 1000;
 					if(sr.predicateMappings!=null && sr.predicateMappings.size()>0)
 						largerScore = sr.predicateMappings.get(0).score * 2;
 					PredicateMapping nPredicate = new PredicateMapping(Globals.pd.typePredicateID, largerScore, "[type]");
 					sr.predicateMappings.add(0,nPredicate);
 					
 					//constant type should be object
 					sr.preferredSubj = sr.arg2Word;
 				}
 //				if(arg1WordPos >= 2 && (words[arg1WordPos-1].baseForm.equals("in") || words[arg1WordPos-1].baseForm.equals("of"))  
 //						&& !words[arg1WordPos-2].posTag.startsWith("V"))
 //				{
 //					sr.isArg1Constant = true;
 //					double largerScore = 1000;
 //					if(sr.predicateMappings!=null && sr.predicateMappings.size()>0)
 //						largerScore = sr.predicateMappings.get(0).score * 2;
 //					PredicateMapping nPredicate = new PredicateMapping(Globals.pd.typePredicateID, largerScore, "[type]");
 //					sr.predicateMappings.add(0,nPredicate);
 //					
 //					//constant type should be object
 //					sr.preferredSubj = sr.arg2Word;
 //				}
 			}
 			//ent: constant
 			else if(sr.arg1Word.mayEnt)
@@ -297,37 +289,37 @@ public class TypeRecognition {
 			else if(sr.arg2Word.mayType)
 			{
 				//rule in/of [type] -> constant  |eg, How many [countries] are there in [exT:Europe] -> ?uri rdf:type yago:EuropeanCountries
 				if(arg2WordPos >= 2 && (words[arg2WordPos-1].baseForm.equals("in") || words[arg2WordPos-1].baseForm.equals("of")) 
 						&& !words[arg2WordPos-2].posTag.startsWith("V") )
 				{
 					sr.isArg2Constant = true;
 					double largerScore = 1000;
 					if(sr.predicateMappings!=null && sr.predicateMappings.size()>0)
 						largerScore = sr.predicateMappings.get(0).score * 2;
 					PredicateMapping nPredicate = new PredicateMapping(Globals.pd.typePredicateID, largerScore, "[type]");
 					sr.predicateMappings.add(0,nPredicate);
 					
 					sr.preferredSubj = sr.arg1Word;
 				}
 //				if(arg2WordPos >= 2 && (words[arg2WordPos-1].baseForm.equals("in") || words[arg2WordPos-1].baseForm.equals("of")) 
 //						&& !words[arg2WordPos-2].posTag.startsWith("V") )
 //				{
 //					sr.isArg2Constant = true;
 //					double largerScore = 1000;
 //					if(sr.predicateMappings!=null && sr.predicateMappings.size()>0)
 //						largerScore = sr.predicateMappings.get(0).score * 2;
 //					PredicateMapping nPredicate = new PredicateMapping(Globals.pd.typePredicateID, largerScore, "[type]");
 //					sr.predicateMappings.add(0,nPredicate);
 //					
 //					sr.preferredSubj = sr.arg1Word;
 //				}
 				//rule: Be ... a type?
 				if(words[0].baseForm.equals("be") && arg2WordPos >=3 && words[arg2WordPos-1].baseForm.equals("a"))
 				{
 					sr.isArg2Constant = true;
 					double largerScore = 1000;
 					if(sr.predicateMappings!=null && sr.predicateMappings.size()>0)
 						largerScore = sr.predicateMappings.get(0).score * 2;
 					PredicateMapping nPredicate = new PredicateMapping(Globals.pd.typePredicateID, largerScore, "[type]");
 					sr.predicateMappings.add(0,nPredicate);
 					
 					sr.preferredSubj = sr.arg1Word;
 				}
 //				if(words[0].baseForm.equals("be") && arg2WordPos >=3 && words[arg2WordPos-1].baseForm.equals("a"))
 //				{
 //					sr.isArg2Constant = true;
 //					double largerScore = 1000;
 //					if(sr.predicateMappings!=null && sr.predicateMappings.size()>0)
 //						largerScore = sr.predicateMappings.get(0).score * 2;
 //					PredicateMapping nPredicate = new PredicateMapping(Globals.pd.typePredicateID, largerScore, "[type]");
 //					sr.predicateMappings.add(0,nPredicate);
 //					
 //					sr.preferredSubj = sr.arg1Word;
 //				}
 			}
 			else if(sr.arg2Word.mayEnt)
 			{
 				sr.isArg2Constant = true;
 			}
 			
 			if(sr.arg1Word != sr.preferredSubj)
 			if(sr.arg2Word == sr.preferredSubj)
 				sr.swapArg1Arg2();
 		}
 	}
--- a/src/qa/mapping/DBpediaLookup.java
+++ b/src/qa/mapping/DBpediaLookup.java
@@ -1,163 +0,0 @@
 package qa.mapping;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.HashMap;

 import lcn.EntityFragmentFields;
 import log.QueryLogger;

 import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.httpclient.HttpException;
 import org.apache.commons.httpclient.methods.GetMethod;

 import fgmt.EntityFragment;
 import rdf.EntityMapping;

 public class DBpediaLookup {
 	//There are two websites of the DBpediaLookup online service.
 	//public static final String baseURL = "http://en.wikipedia.org/w/api.php?action=opensearch&format=xml&limit=10&search=";
 	public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString=";
 	
 	public HttpClient ctripHttpClient = null;
 	
 	//public static final String begin = "<Text xml:space=\"preserve\">";
 	//public static final String begin = "<Result>\n        <Label>";
 	public static final String begin = "<Result>\n      <Label>";
 	public static final int begin_length = begin.length();
 	//public static final String end = "</Text>";
 	public static final String end = "</Label>";
 	public static final int end_length = end.length();
 	
 	public static HashMap<String, String>entMentionDict = null;	// TODO: base on redirect data & wikipedia click data to build mention2ent's dictionary, now just manually
 	
 	public DBpediaLookup() 
 	{
 		ctripHttpClient = new HttpClient();		
 		ctripHttpClient.setTimeout(3000);
 		
 		entMentionDict = new HashMap<String, String>();
 		entMentionDict.put("Prince_Charles", "Charles,_Prince_of_Wales");
 	}
 	
 	public ArrayList<EntityMapping> getEntityMappings(String searchString, QueryLogger qlog) 
 	{
 		ArrayList<String> slist = new ArrayList<String>();
 		if(entMentionDict.containsKey(searchString))
 			slist.add(entMentionDict.get(searchString));
 		else
 			slist = lookForEntityNames(searchString, qlog);
 		
 		if (slist.size() == 0 && searchString.contains(". "))		
 			slist.addAll(lookForEntityNames(searchString.replaceAll(". ", "."), qlog));		
 		
 		ArrayList<EntityMapping> emlist = new ArrayList<EntityMapping>();
 		
 		// Now string use "_" as delimiter (original)
 		String[] sa = searchString.split("_");
 		int UpperCnt = 0;
 		for(String str: sa)
 		{
 			if( (str.charAt(0)>='A'&&str.charAt(0)<='Z') || (str.charAt(0)>='0'&&str.charAt(0)<='9') )
 				UpperCnt ++;
 		}
 		
 		System.out.print("DBpediaLookup find: " + slist + ", ");
 		
 		int count = 40;
 		for (String s : slist) 
 		{
 			//consider ABBR only when all UPPER; drop when too long edit distance
 			if(UpperCnt < sa.length && EntityFragment.calEditDistance(s, searchString.replace("_", ""))>searchString.length()/2)
 				continue;
 			
 			int eid = -1;
 			s = s.replace(" ", "_");
 			if(EntityFragmentFields.entityName2Id.containsKey(s))
 			{
 				eid = EntityFragmentFields.entityName2Id.get(s);
 				emlist.add(new EntityMapping(eid, s, count));
 				count -=2 ;
 			}
 			else
 			{
 				System.out.print("Drop "+s+" because it not in Entity Dictionary. ");
 			}
 		}
 		System.out.println("DBpediaLookup select: " + emlist);
 		
 		return emlist;
 	}
 	
 	public ArrayList<String> lookForEntityNames (String searchString, QueryLogger qlog) {
 		// URL transition: " " -> %20
 		GetMethod getMethod = new GetMethod((baseURL+searchString).replaceAll(" ", "%20"));
 		ArrayList<String> ret = new ArrayList<String>();
 		int statusCode;
 		
 		try {
 			statusCode = ctripHttpClient.executeMethod(getMethod);
 		} catch (HttpException e) {
 			e.printStackTrace();
 			return ret;
 		} catch (IOException e) {
 			e.printStackTrace();
 			return ret;
 		}
 		
 		if (statusCode!=200) return null;
 		
 		String response = getMethod.getResponseBodyAsString();
 		if (qlog != null && qlog.MODE_debug) {
 			System.out.println("searchString=" + searchString);
 			System.out.println("statusCode=" + statusCode);
 			System.out.println("response=" + getMethod.getResponseBodyAsString());
 		}
 		getMethod.releaseConnection();
 		
 		//System.out.println(response);
 				
 		if (response == null || response.isEmpty())
 			return ret;
 		int idx1  = response.indexOf(begin);
 		while (idx1 != -1) {
 			int idx2 = response.indexOf(end, idx1+begin_length);
 			String ss = response.substring(idx1+begin_length, idx2);
 			ret.add(ss);
 			//System.out.println(ss);
 			idx1 = response.indexOf(begin, idx2 + end_length);
 		}		

 		return ret;
 	}
 	
 	public static void main(String argv[]){
 		
 		DBpediaLookup dbplook = new DBpediaLookup();
 		
 		BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
 		try {
 			while (true) {
 				System.out.println("Test DBpediaLookup.");
 				System.out.print("Please input the search string: ");
 				String searchString = br.readLine();
 				try {
 					long t1 = System.currentTimeMillis();
 					ArrayList<String> res = dbplook.lookForEntityNames(searchString, null);
 					long t2 = System.currentTimeMillis();
 					System.out.println(res);
 					System.out.println("time=" + (t2-t1) + "ms");
 				} catch (Exception e) {
 					e.printStackTrace();
 				}
 			}
 		} catch (IOException e) {
 			e.printStackTrace();	
 		}

 		
 		return;
 	}
 }
--- a/src/qa/parsing/BuildQueryGraph.java
+++ b/src/qa/parsing/BuildQueryGraph.java
@@ -37,84 +37,19 @@ public class BuildQueryGraph
 	
 	public BuildQueryGraph()
 	{
 		whList.add("what");
 		whList.add("which");
 		whList.add("who");
 		whList.add("whom");
 		whList.add("when");
 		whList.add("how");
 		whList.add("where");
 		whList.add("什么");
 		whList.add("什么时候");
 		whList.add("哪些");
 		whList.add("哪里");
 		whList.add("谁");
 		
 		// Bad words for NODE. (base form) 
 		// We will train a node recognition model to replace such heuristic rules further.
 		stopNodeList.add("list");
 		stopNodeList.add("give");
 		stopNodeList.add("show");
 		stopNodeList.add("star");
 		stopNodeList.add("theme");
 		stopNodeList.add("world");
 		stopNodeList.add("independence");
 		stopNodeList.add("office");
 		stopNodeList.add("year");
 		stopNodeList.add("work");
 	}
 	
 	public void fixStopWord(QueryLogger qlog, DependencyTree ds)
 	{
 		String qStr = qlog.s.plainText.toLowerCase();
 		
 		//... [which] 
 		for(int i=2;i<qlog.s.words.length;i++)
 			if(qlog.s.words[i].baseForm.equals("which"))
 				stopNodeList.add(qlog.s.words[i].baseForm);
 		
 		//take [place]
 		if(qStr.contains("take place") || qStr.contains("took place"))
 			stopNodeList.add("place");
 		
 		//(When was Alberta admitted) as [province] 
 		if(qStr.contains("as province"))
 			stopNodeList.add("province");
 		
 		//what form of government is found in ...
 		if(qStr.contains("form of government"))
 			stopNodeList.add("government");
 		
 		//alma mater of the chancellor
 		if(qStr.contains("alma mater of the chancellor"))
 		{
 			stopNodeList.add("chancellor");
 		}
 		//How large is the area of UK?
 		if(qStr.contains("the area of") || qStr.contains("how big"))
 		{
 			stopNodeList.add("area");
 		}
 		//how much is the total population of european union?
 		if(qStr.contains("how much"))
 		{
 			stopNodeList.add("population");
 			stopNodeList.add("elevation");
 		}
 		//when was the founding date of french fifth republic
 		if(qStr.contains("when was the"))
 		{
 			stopNodeList.add("founding");
 			stopNodeList.add("date");
 			stopNodeList.add("death");
 			stopNodeList.add("episode");
 		}
 		if(qStr.contains("what other book"))
 		{
 			stopNodeList.add("book");
 		}
 		//Is [Michelle Obama] the [wife] of Barack Obama?
 		if(qlog.s.words[0].baseForm.equals("be") && isNode(ds.getNodeByIndex(2)) && ds.getNodeByIndex(3).dep_father2child.equals("det") 
 				&& isNode(ds.getNodeByIndex(4)) && qlog.s.words[4].baseForm.equals("of"))
 			stopNodeList.add(ds.getNodeByIndex(4).word.baseForm);
 		stopNodeList.add("信仰");
 		stopNodeList.add("人");
 	}

 	// Semantic Parsing for DBpedia.
 	// Semantic Parsing for Pkubase.
 	public ArrayList<SemanticUnit> process(QueryLogger qlog)
 	{
 		try 
@@ -135,15 +70,15 @@ public class BuildQueryGraph
 * 3)Coreference resolution.
 * */		
 			//0) Fix stop words
 			fixStopWord(qlog, ds);
 //			fixStopWord(qlog, ds);
 			
 			//1) Detect Modifier/Modified
 			//rely on sentence (rather than dependency tree)
 			//with some ADJUSTMENT (eg, ent+noun(noType&&noEnt) -> noun.omitNode=TRUE)
 			for(Word word: qlog.s.words)
 				getTheModifiedWordBySentence(qlog.s, word);	//Find continuous modifier
 			for(Word word: qlog.s.words)
 				getDiscreteModifiedWordBySentence(qlog.s, word); //Find discrete modifier
 //			for(Word word: qlog.s.words)
 //				getDiscreteModifiedWordBySentence(qlog.s, word); //Find discrete modifier
 			for(Word word: qlog.s.words)
 				if(word.modifiedWord == null)	//Other words modify themselves. NOTICE: only can be called after detecting all modifier.
 					word.modifiedWord = word;
@@ -167,9 +102,9 @@ public class BuildQueryGraph
 			
 			qlog.target = target.word;
 			// !target can NOT be entity. (except general question)| which [city] has most people?
 			if(qlog.s.sentenceType != SentenceType.GeneralQuestion && target.word.emList!=null) 
 			// only when target.mayType=True or exist other entities.
 			if(qlog.s.sentenceType != SentenceType.GeneralQuestion && target.word.mayEnt && target.word.mayType) 
 			{
 				//Counter example：Give me all Seven_Wonders_of_the_Ancient_World | (in fact, it not ENT, but CATEGORY, ?x subject Seve...)
 				target.word.mayEnt = false;
 				target.word.emList.clear();
 			}
@@ -241,6 +176,17 @@ public class BuildQueryGraph
 					curSU.neighborUnitList.add(expandSU);
 				}
 			}
 			if(semanticUnitList.size() == 1 && target.word.mayEnt)
 			{
 				Word[] words = qlog.s.words;
 				SemanticUnit curSU = semanticUnitList.get(0);
 				SemanticUnit expandSU = new SemanticUnit(words[words.length-1], false);
 				semanticUnitList.add(expandSU);
 				curSU.neighborUnitList.add(expandSU);
 				expandSU.neighborUnitList.add(curSU);
 				target = ds.getNodeByIndex(words.length);
 				qlog.target = target.word;
 			}
 			qlog.timeTable.put("BQG_structure", (int)(System.currentTimeMillis()-t));
 			
 			//step2: Find relations (Notice, we regard that the coreference have been resolved now)
@@ -251,7 +197,7 @@ public class BuildQueryGraph
 			qlog.timeTable.put("BQG_relation", (int)(System.currentTimeMillis()-t));
 		
 			//Prepare for item mapping
 			TypeRecognition.AddTypesOfWhwords(qlog.semanticRelations); // Type supplementary
 //			TypeRecognition.AddTypesOfWhwords(qlog.semanticRelations); // Type supplementary
 			TypeRecognition.constantVariableRecognition(qlog.semanticRelations, qlog); // Constant or Variable, embedded triples
 			
 			//(just for display)
@@ -361,7 +307,7 @@ public class BuildQueryGraph
 					tmpRelations = new ArrayList<SimpleRelation>();
 					//Copy relations (for 'and', 'as soon as'...) |eg, In which films did Julia_Roberts and Richard_Gere play?
 					//TODO: judge by dependency tree | other way to supplement relations
 					if(curSU.centerWord.position + 2 == expandSU.centerWord.position && qlog.s.words[curSU.centerWord.position].baseForm.equals("and"))
 					if(curSU.centerWord.position + 2 == expandSU.centerWord.position && qlog.s.words[curSU.centerWord.position].baseForm.equals("和"))
 					{
 						for(SimpleRelation sr: simpleRelations)
 						{
@@ -566,6 +512,7 @@ public class BuildQueryGraph
 		return false;
 	}
 	
 	// detect the target (question focus), also to detect some co-reference via rules. (TODO: test existing utils for co-reference resolution)
 	public DependencyTreeNode detectTarget(DependencyTree ds, QueryLogger qlog)
 	{
 		visited.clear();
@@ -583,8 +530,10 @@ public class BuildQueryGraph
 		// No Wh-Word: use the first node; NOTICE: consider MODIFIER rules. E.g, was us president Obama ..., target=obama (rather us)
 		if(target == null)
 		{
 			for(Word word: words)
 			//Chinese sentence: the question focus is usually in the tail.
 			for(int i=words.length-1; i>=0; i--)
 			{
 				Word word = words[i];
 				Word modifiedWord = word.modifiedWord;
 				if(modifiedWord != null && isNodeCandidate(modifiedWord))
 				{
@@ -594,42 +543,25 @@ public class BuildQueryGraph
 			}
 			
 			if(target == null)
 				target = ds.nodesList.get(0);
 			
 			/* Are [E|tree_frogs] a type of [E|amphibian] , type
 			*/
 			for(DependencyTreeNode dtn: target.childrenList)
 			{
 				if(dtn.word.baseForm.equals("type"))
 				{
 					dtn.word.represent = target.word;
 				}
 			}
 			
 				target = ds.nodesList.get(0);	
 			
 		}
 		//where, NOTICE: wh target from NN may not pass the function isNode()
 		if(target.word.baseForm.equals("where"))
 		//where
 		if(target.word.baseForm.equals("哪里"))
 		{
 			int curPos = target.word.position - 1;
 			
 			//!Where is the residence of
 			if(words[curPos+1].baseForm.equals("be") && words[curPos+2].posTag.equals("DT"))
 			//大兴安岭的[终点]是(哪里)
 			if(curPos-2>=0 && isNodeCandidate(words[curPos-2]) && words[curPos-1].baseForm.equals("是"))
 			{
 				for(int i=curPos+4;i<words.length;i++)
 					if(words[i-1].posTag.startsWith("N") && words[i].posTag.equals("IN"))
 					{
 						target.word.represent = words[i-1];
 						target = ds.getNodeByIndex(i);
 						break;
 					}
 				
 				target.word.represent = words[curPos-1];
 				target = ds.getNodeByIndex(words[curPos-1].position);
 			}
 		}
 		//which
 		if(target.word.baseForm.equals("which"))
 		if(target.word.baseForm.equals("哪些") || target.word.baseForm.equals("哪个"))
 		{
 			// test case: In which US state is Mount_McKinley located
 			// test case: 韩国有哪些著名景点？
 			int curPos = target.word.position-1;
 			if(curPos+1 < words.length)
 			{
@@ -639,27 +571,10 @@ public class BuildQueryGraph
 					// which city ... target = city
 					target.word.represent = word1;
 					target = ds.getNodeByIndex(word1.position);
 					int word1Pos = word1.position - 1;
 					// word1 + be + (the) + word2, and be is root: word1 & word2 may coreference
 					if(ds.root.word.baseForm.equals("be") && word1Pos+3 < words.length && words[word1Pos+1].baseForm.equals("be"))
 					{
 						// which city is [the] headquarters ...
 						Word word2 = words[word1Pos+2].modifiedWord;
 						if(words[word1Pos+2].posTag.equals("DT"))
 							word2 = words[word1Pos+3].modifiedWord;
 						int word2Pos = word2.position - 1;
 						if(word2Pos+1 < words.length && isNodeCandidate(word2) && words[word2Pos+1].posTag.startsWith("IN"))
 						{
 							//In which city is [the] headquarters of ... | target = headquarters, city & headquarters: coreference
 							//In which city was the president of Montenegro born? | COUNTER example, city & president: independent
 							target.word.represent = word2;
 							target = ds.getNodeByIndex(word2.position);
 						}
 					}
 				}
 			}
 			// by dependency tree
 			if(target.word.baseForm.equals("which"))
 			if(target.word.baseForm.equals("哪些") || target.word.baseForm.equals("哪个"))
 			{
 				//Which of <films> had the highest budget
 				boolean ok = false;
@@ -683,14 +598,14 @@ public class BuildQueryGraph
 			
 		}
 		//what
 		else if(target.word.baseForm.equals("what"))
 		else if(target.word.baseForm.equals("什么"))
 		{
 			//Detect：what is [the] sth1 prep. sth2?
 			//Detect：龙卷风的[英文名]是(什么) | 金轮国师的(什么)[武功]有十龙十象之力？
 			//Omit: what is sth? 
 			if(target.father != null && ds.nodesList.size()>=5)
 			{
 				DependencyTreeNode tmp1 = target.father;
 				if(tmp1.word.baseForm.equals("be"))
 				if(tmp1.word.baseForm.equals("是"))
 				{
 					for(DependencyTreeNode child: tmp1.childrenList)
 					{
@@ -698,15 +613,13 @@ public class BuildQueryGraph
 							continue;
 						if(isNode(child))
 						{
 							//sth1
 							boolean hasPrep = false;
 							boolean another_node = false;
 							for(DependencyTreeNode grandson: child.childrenList)
 							{	//prep
 								if(grandson.dep_father2child.equals("prep"))
 									hasPrep = true;
 							}
 							//Detect modifier: what is the sht1's [sth2]? | what is the largest [city]?
 							if(hasPrep || qlog.s.hasModifier(child.word))
 								if(isNode(grandson))
 									another_node = true;
 							
 							//more than 2 nodes || Detect modifier: what is the sht1's [sth2]? | what is the largest [city]?
 							if(another_node || qlog.s.hasModifier(child.word))
 							{
 								target.word.represent = child.word;
 								target = child;
@@ -715,82 +628,84 @@ public class BuildQueryGraph
 						}
 					}
 				}
 				//what sth || What airlines are (part) of the SkyTeam alliance?
 				//what sth: 什么山高于8000米
 				else if(isNode(tmp1))
 				{
 					target.word.represent = tmp1.word;
 					target = tmp1;
 					// Coreference resolution
 					int curPos = target.word.position - 1;
 					if(curPos+3<words.length && words[curPos+1].baseForm.equals("be")&&words[curPos+3].posTag.startsWith("IN") && words.length > 6)
 					{
 						words[curPos+2].represent = target.word;
 					}
 					
 					target = tmp1;					
 				}
 			}
 			// by sentence
 			if(target.word.baseForm.equals("what"))
 			if(target.word.baseForm.equals("什么"))
 			{
 				// 金轮国师的(什么)[武功]有十龙十象之力？
 				int curPos = target.word.position - 1;
 				// what be the [node] ... ? (Notice: words.length CONTAINS symbol(?)，different from nodeList)
 				if(words.length > 5 && words[curPos+1].baseForm.equals("be") && words[curPos+2].baseForm.equals("the") && isNodeCandidate(words[curPos+3]))
 				if(curPos + 1 <= words.length - 1 && isNodeCandidate(words[curPos+1]))
 				{
 					target.word.represent = words[curPos+3];
 					target = ds.getNodeByIndex(words[curPos+3].position);
 					target.word.represent = words[curPos+1];
 					target = ds.getNodeByIndex(words[curPos+1].position);
 				}
 			}
 			
 		}
 		//who
 		else if(target.word.baseForm.equals("who"))
 		else if(target.word.baseForm.equals("谁"))
 		{
 			//Detect：who is/does [the] sth1 prep. sth2?  || Who was the pope that founded the Vatican_Television ? | Who does the voice of Bart Simpson?
 			//Detect：武汉大学的现任[校长]是(谁)？	和子女一起演过电影电视剧的[演员]有(谁)？
 			//Others: who is sth? who do sth?  | target = who
 			//test case: Who is the daughter of Robert_Kennedy married to?
 			if(ds.nodesList.size()>=5)
 			{	//who
 				for(DependencyTreeNode tmp1: ds.nodesList)
 				{
 					if(tmp1 != target.father && !target.childrenList.contains(tmp1))
 						continue;
 					if(tmp1.word.baseForm.equals("be") || tmp1.word.baseForm.equals("do"))
 					{	//is
 						for(DependencyTreeNode child: tmp1.childrenList)
 						{
 							if(child == target)
 								continue;
 							if(isNode(child))
 							{	//sth1
 								boolean hasPrep = false;
 								for(DependencyTreeNode grandson: child.childrenList)
 								{	//prep
 									if(grandson.dep_father2child.equals("prep"))
 										hasPrep = true;
 								}
 								//Detect modifier: who is the sht1's sth2?
 //								if(hasPrep || qlog.s.plainText.contains(child.word.originalForm + " 's")) // replaced by detect modifier directly
 								if(hasPrep || qlog.s.hasModifier(child.word))
 								{
 									target.word.represent = child.word;
 									target = child;
 									break;
 								}
 							}
 						}
 					}
 				}
 			}
 			//test case: 湖上草是[谁]的(诗)？
 //			if(ds.nodesList.size()>=5)
 //			{	//who
 //				for(DependencyTreeNode tmp1: ds.nodesList)
 //				{
 //					if(tmp1 != target.father && !target.childrenList.contains(tmp1))
 //						continue;
 //					if(tmp1.word.baseForm.equals("be") || tmp1.word.baseForm.equals("do"))
 //					{	//is
 //						for(DependencyTreeNode child: tmp1.childrenList)
 //						{
 //							if(child == target)
 //								continue;
 //							if(isNode(child))
 //							{	//sth1
 //								boolean hasPrep = false;
 //								for(DependencyTreeNode grandson: child.childrenList)
 //								{	//prep
 //									if(grandson.dep_father2child.equals("prep"))
 //										hasPrep = true;
 //								}
 //								//Detect modifier: who is the sht1's sth2?if(hasPrep || qlog.s.hasModifier(child.word))
 //								{
 //									target.word.represent = child.word;
 //									target = child;
 //									break;
 //								}
 //							}
 //						}
 //					}
 //				}
 //			}
 			// by sentence
 			if(target.word.baseForm.equals("who"))
 			if(target.word.baseForm.equals("谁"))
 			{
 				int curPos = target.word.position - 1;
 				// who is usually coreference when it not the first word. 
 				if(curPos - 1 >= 0 && isNodeCandidate(words[curPos-1]))
 				// [Node]是(谁)
 				if(curPos - 2 >= 0 && isNodeCandidate(words[curPos-2]))
 				{
 					target.word.represent = words[curPos-1];
 					target = ds.getNodeByIndex(words[curPos-1].position);
 					// 谁 在末尾: 武汉大学的现任[校长]是(谁)
 					if(curPos == words.length - 1 && (words[curPos-1].baseForm.equals("是") || words[curPos-1].baseForm.equals("有")) )
 					{
 						target.word.represent = words[curPos-2];
 						target = ds.getNodeByIndex(words[curPos-2].position);
 					}
 					// [湖上草]是谁的(诗)
 					if(curPos + 2 == words.length-1 && words[curPos-1].baseForm.equals("是") 
 							&& words[curPos+1].baseForm.equals("的") && isNodeCandidate(words[curPos+2]))
 					{
 						words[curPos+2].represent = words[curPos-2];
 					}
 				}
 				// Do nothing: [谁]的[女儿]嫁给了王思聪
 			}
 		}
 		//how
@@ -847,7 +762,7 @@ public class BuildQueryGraph
 	/*
 	 * There are two cases of [ent]+[type]：1、Chinese company 2、De_Beer company; 
 	 * For 1, chinese -> company，for 2, De_Beer <- company
 	 * Return: True : ent -> type | False ： type <- ent
 	 * Return: True : ent -> type | False ： ent <- type
 	 * */
 	public boolean checkModifyBetweenEntType(Word entWord, Word typeWord)
 	{
@@ -868,9 +783,9 @@ public class BuildQueryGraph
 	 * Trough sentence rather than dependency tree as the latter often incorrect 
 	 * Generally a sequencial nodes always modify the last node, an exception is test case 3. So we apply recursive search method.
 	 * test case:
 	 * 1) the highest Chinese mountain
 	 * 2) the Chinese popular director
 	 * 3) the De_Beers company  (company[type]-> De_Beers[ent])
 	 * 1) 最高的中国山峰
 	 * 2) 中国流行歌手
 	 * 3) 谷歌公司  (company[type]-> De_Beers[ent])
 	 * */
 	public Word getTheModifiedWordBySentence(Sentence s, Word curWord)
 	{
@@ -898,14 +813,14 @@ public class BuildQueryGraph
 				return curWord.modifiedWord = curWord;
 		}
 		
 		//modify LEFT: ent + type(cur) : De_Beer company
 		//modify LEFT: ent + type(cur) : 谷歌 公司
 		if(preWord != null && curWord.mayType && preWord.mayEnt) //ent + type(cur)
 		{
 			if(!checkModifyBetweenEntType(preWord, curWord)) //De_Beer <- company, 注意此时即使type后面还连着node，也不理会了
 				return curWord.modifiedWord = preWord;
 		}
 		
 		//modify itself: ent(cur) + type : De_Beer company
 		//modify itself: ent(cur) + type : 谷歌 公司
 		if(nextModifiedWord != null && curWord.mayEnt && nextModifiedWord.mayType)
 		{
 			if(!checkModifyBetweenEntType(curWord, nextModifiedWord))
--- a/src/qa/parsing/QuestionParsing.java
+++ b/src/qa/parsing/QuestionParsing.java
@@ -16,36 +16,20 @@ public class QuestionParsing {
 	}
 	
 	public void getDependenciesAndNER (QueryLogger qlog) {
 		long t1 = System.currentTimeMillis();
 		try {
 			long t1 = System.currentTimeMillis();
 			
 			qlog.s.dependencyTreeStanford = new DependencyTree(qlog.s, Globals.stanfordParser);
 		}catch(Exception e){
 			e.printStackTrace();
 		}
 		
 		long t2 = System.currentTimeMillis();
 		try{
 			qlog.s.dependencyTreeMalt = new DependencyTree(qlog.s, Globals.maltParser);
 		}catch(Exception e){
 			//if errors occur, abandon malt tree
 			qlog.s.dependencyTreeMalt = qlog.s.dependencyTreeStanford;
 			System.err.println("MALT parser error! Use stanford parser instead.");
 		}					
 		
 		try {
 			long t3 = System.currentTimeMillis();
 			Globals.nerRecognizer.recognize(qlog.s);
 			long t4 = System.currentTimeMillis();
 			
 			long t2 = System.currentTimeMillis();
 //			Globals.nerRecognizer.recognize(qlog.s);	//TODO: check NER
 			
 			System.out.println("====StanfordDependencies("+(t2-t1)+"ms)====");
 			System.out.println(qlog.s.dependencyTreeStanford);
 			System.out.println("====MaltDependencies("+(t3-t2)+"ms)====");
 			System.out.println(qlog.s.dependencyTreeMalt);
 			System.out.println("====NameEntityRecognition("+(t4-t3)+"ms)====");
 			qlog.s.printNERResult();
 //			qlog.s.printNERResult();
 			
 			qlog.timeTable.put("StanfordParser", (int)(t2-t1));
 			qlog.timeTable.put("MaltParser", (int)(t3-t2));
 			qlog.timeTable.put("NER", (int)(t4-t3));
 			
 		} catch (Exception e) {
 			e.printStackTrace();
 		}
@@ -53,8 +37,7 @@ public class QuestionParsing {
 	
 	public void recognizeSentenceType(QueryLogger qlog)
 	{
 		boolean IsImperativeSentence = recognizeImperativeSentence(qlog.s.dependencyTreeStanford)||
 									   recognizeImperativeSentence(qlog.s.dependencyTreeMalt);
 		boolean IsImperativeSentence = recognizeImperativeSentence(qlog.s.dependencyTreeStanford);
 		if (IsImperativeSentence)
 		{
 			qlog.s.sentenceType = SentenceType.ImperativeSentence;
@@ -66,16 +49,14 @@ public class QuestionParsing {
 			return;
 		}
 		
 		boolean IsSpecialQuestion = recognizeSpecialQuestion(qlog.s.dependencyTreeStanford)||
 									recognizeSpecialQuestion(qlog.s.dependencyTreeMalt);
 		boolean IsSpecialQuestion = recognizeSpecialQuestion(qlog.s.dependencyTreeStanford);
 		if (IsSpecialQuestion)
 		{
 			qlog.s.sentenceType = SentenceType.SpecialQuestion;
 			return;
 		}
 		
 		boolean IsGeneralQuestion = recognizeGeneralQuestion(qlog.s.dependencyTreeStanford)||
 									recognizeGeneralQuestion(qlog.s.dependencyTreeMalt);
 		boolean IsGeneralQuestion = recognizeGeneralQuestion(qlog.s.dependencyTreeStanford);
 		if (IsGeneralQuestion)
 		{
 			qlog.s.sentenceType = SentenceType.GeneralQuestion;
--- a/src/rdf/MergedWord.java
+++ b/src/rdf/MergedWord.java
@@ -1,41 +0,0 @@
 package rdf;

 import java.util.ArrayList;

 import rdf.EntityMapping;
 import rdf.TypeMapping;

 public class MergedWord implements Comparable<MergedWord> 
 {
 	//original position
 	public int st,ed;
 	//position after merge (unselected is -1)
 	public int mergedPos = -1;
 	public String name;
 	public boolean mayCategory = false;
 	public boolean mayLiteral = false;
 	public boolean mayEnt = false;
 	public boolean mayType = false;
 	public ArrayList<EntityMapping> emList = null;
 	public ArrayList<TypeMapping> tmList = null;
 	public String category = null;
 	
 	public MergedWord(int s,int e,String n)
 	{
 		st = s;
 		ed = e;
 		name = n;
 	}
 	
 	@Override
 	//long to short
 	public int compareTo(MergedWord o) 
 	{
 		int lenDiff = (this.ed-this.st) - (o.ed-o.st);
 		
 		if (lenDiff > 0) return -1;
 		else if (lenDiff < 0) return 1;
 		return 0;
 	}
 	
 }
--- a/src/rdf/SimpleRelation.java
+++ b/src/rdf/SimpleRelation.java
@@ -65,7 +65,7 @@ public class SimpleRelation {
 			}
 			sumSelectivity = matchingScore*sumSelectivity*pidsup.support;			
 			int pid = pidsup.predicateID;
 			if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; 
 //			if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; 
 			
 			if (!pasList.containsKey(pid))
 				pasList.put(pid, sumSelectivity);