#encoding=utf-8 inEnEdge = {} outEnEdge = {} inEdge={} outEdge = {} types = {} with open('triple file represented by ids here','r') as f: i = 1 for line in f: tri = line[:-1].split('\t') if tri[1] == 'id of ' and tri[2]!='-1': if types.has_key(tri[0]): types[tri[0]].add(tri[2]) else: types[tri[0]] = set() types[tri[0]].add(tri[2]) else: if outEdge.has_key(tri[0]): outEdge[tri[0]].add(tri[1]) else: outEdge[tri[0]] = set() outEdge[tri[0]].add(tri[1]) if tri[2]!='-1': if outEnEdge.has_key(tri[0]): if outEnEdge[tri[0]].has_key(tri[2]): outEnEdge[tri[0]][tri[2]].add(tri[1]) else: outEnEdge[tri[0]][tri[2]] = set() outEnEdge[tri[0]][tri[2]].add(tri[1]) else: outEnEdge[tri[0]]={} outEnEdge[tri[0]][tri[2]] = set() outEnEdge[tri[0]][tri[2]].add(tri[1]) if inEdge.has_key(tri[2]): inEdge[tri[2]].add(tri[1]) else: inEdge[tri[2]] = set() inEdge[tri[2]].add(tri[1]) if inEnEdge.has_key(tri[2]): if inEnEdge[tri[2]].has_key(tri[0]): inEnEdge[tri[2]][tri[0]].add(tri[1]) else: inEnEdge[tri[2]][tri[0]] = set() inEnEdge[tri[2]][tri[0]].add(tri[1]) else: inEnEdge[tri[2]] = {} inEnEdge[tri[2]][tri[0]] = set() inEnEdge[tri[2]][tri[0]].add(tri[1]) if i%10000 == 0: print(i) i += 1 print(len(inEnEdge)) print(len(outEnEdge)) print(len(inEdge)) print(len(outEdge)) print(len(types)) wr = open('output fragment file','w') for i in range(12301050):#here we should iterate every entitiy if i%10000 == 0: print(i) eid = "%d"%i ret = "" tmp = "" if inEnEdge.has_key(eid): tmp = "" for k in inEnEdge[eid].keys(): tmp += k tmp += ':' for item in inEnEdge[eid][k]: if item == '-1': continue tmp += item + ';' tmp += ',' ret += tmp tmp = "" ret += '|' if outEnEdge.has_key(eid): tmp = "" for k in outEnEdge[eid].keys(): tmp += k tmp += ':' for item in outEnEdge[eid][k]: if item == '-1': continue tmp += item + ';' tmp += ',' ret += tmp tmp = "" ret += '|' if inEdge.has_key(eid): tmp = "" for item in inEdge[eid]: if item == '-1': continue tmp += item + ',' ret += tmp tmp="" ret += '|' if outEdge.has_key(eid): tmp = "" for item in outEdge[eid]: if item == '-1': continue tmp += item + ',' ret += tmp tmp="" ret += '|' if types.has_key(eid): tmp = "" for item in types[eid]: if item == '-1': continue tmp += item + ',' ret += tmp tmp="" wr.write("%s\t%s\n"%(eid,ret))