You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

EntityFragmentFields.java 3.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. package lcn;
  2. import java.io.BufferedReader;
  3. import java.io.File;
  4. import java.io.FileInputStream;
  5. import java.io.IOException;
  6. import java.io.InputStreamReader;
  7. import java.util.ArrayList;
  8. import java.util.HashMap;
  9. import java.util.List;
  10. import fgmt.EntityFragment;
  11. import qa.Globals;
  12. import utils.FileUtil;
  13. public class EntityFragmentFields {
  14. // entity dictionary
  15. public static HashMap<String, Integer> entityName2Id = null;
  16. public static HashMap<Integer, String> entityId2Name = null;
  17. public static HashMap<Integer, String> entityFragmentString = null;
  18. public static void load() throws IOException
  19. {
  20. String filename = Globals.localPath+"data/pkubase/fragments/id_mappings/pkubase_entity_id.txt";
  21. String fragmentFileName = Globals.localPath+"data/pkubase/fragments/pkubase_entity_fragment.txt";
  22. File file = new File(filename);
  23. BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf-8"));
  24. entityName2Id = new HashMap<String, Integer>();
  25. entityId2Name = new HashMap<Integer, String>();
  26. long t1, t2, t3;
  27. t1 = System.currentTimeMillis();
  28. // load entity id
  29. System.out.println("Loading entity id ...");
  30. String line;
  31. while((line = br.readLine()) != null)
  32. {
  33. String[] lines = line.split("\t");
  34. String entName = lines[0].trim().substring(1, lines[0].length()-1);
  35. entityName2Id.put(entName, Integer.parseInt(lines[1]));
  36. entityId2Name.put(Integer.parseInt(lines[1]), entName);
  37. }
  38. br.close();
  39. t2 = System.currentTimeMillis();
  40. System.out.println("Load "+entityId2Name.size()+" entity ids in "+ (t2-t1) + "ms.");
  41. // load entity fragment
  42. System.out.println("Loading entity fragments ...");
  43. br = new BufferedReader(new InputStreamReader(new FileInputStream(fragmentFileName),"utf-8"));
  44. entityFragmentString = new HashMap<Integer, String>();
  45. while((line = br.readLine()) != null)
  46. {
  47. String[] lines = line.split("\t");
  48. if(lines.length != 2)
  49. continue;
  50. int eId = Integer.parseInt(lines[0]);
  51. entityFragmentString.put(eId, lines[1]);
  52. }
  53. t3 = System.currentTimeMillis();
  54. System.out.println("Load "+entityFragmentString.size()+" entity fragments in "+ (t3-t2) + "ms.");
  55. br.close();
  56. }
  57. public static void genmini()
  58. {
  59. String filename = Globals.localPath+"data/pkubase/fragments/id_mappings/pkuentity_id.txt";
  60. String fragmentFileName = Globals.localPath+"data/pkubase/fragments/pkubase_entity_fragment_mini.txt";
  61. List<String> fragments = FileUtil.readFile(fragmentFileName);
  62. ArrayList<Integer> eids = new ArrayList<Integer>();
  63. for(String fragment: fragments)
  64. {
  65. int eid = Integer.parseInt(fragment.split("\t")[0]);
  66. String fgmt = fragment.split("\t")[1];
  67. EntityFragment ef = new EntityFragment(eid, fgmt);
  68. eids.add(eid);
  69. for(int ent: ef.inEntMap.keySet())
  70. {
  71. eids.add(ent);
  72. }
  73. for(int ent: ef.outEntMap.keySet())
  74. {
  75. eids.add(ent);
  76. }
  77. }
  78. System.out.println(eids.size());
  79. System.out.println("Loading entity id ...");
  80. List<String> data = FileUtil.readFile(filename);
  81. for(String line: data)
  82. {
  83. String[] lines = line.split("\t");
  84. int eid = Integer.parseInt(lines[1]);
  85. if(eids.contains(eid))
  86. System.out.println(line);
  87. }
  88. }
  89. public static void main(String[] args) {
  90. EntityFragmentFields.genmini();
  91. }
  92. }

GAnswer system is a natural language QA system developed by Institute of Computer Science & Techonology Data Management Lab, Peking University, led by Prof. Zou Lei. GAnswer is able to translate natural language questions to query graphs containing semant