You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graphfiles.py 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. """ Utilities function to manage graph files
  2. """
  3. def loadCT(filename):
  4. """load data from .ct file.
  5. Notes
  6. ------
  7. a typical example of data in .ct is like this:
  8. 3 2 <- number of nodes and edges
  9. 0.0000 0.0000 0.0000 C <- each line describes a node (x,y,z + label)
  10. 0.0000 0.0000 0.0000 C
  11. 0.0000 0.0000 0.0000 O
  12. 1 3 1 1 <- each line describes an edge : to, from,?, label
  13. 2 3 1 1
  14. """
  15. import networkx as nx
  16. from os.path import basename
  17. g = nx.Graph()
  18. with open(filename) as f:
  19. content = f.read().splitlines()
  20. g = nx.Graph(name=str(content[0]), filename=basename(filename)) # set name of the graph
  21. tmp = content[1].split(" ")
  22. if tmp[0] == '':
  23. nb_nodes = int(tmp[1]) # number of the nodes
  24. nb_edges = int(tmp[2]) # number of the edges
  25. else:
  26. nb_nodes = int(tmp[0])
  27. nb_edges = int(tmp[1])
  28. # patch for compatibility : label will be removed later
  29. for i in range(0, nb_nodes):
  30. tmp = content[i + 2].split(" ")
  31. tmp = [x for x in tmp if x != '']
  32. g.add_node(i, atom=tmp[3], label=tmp[3])
  33. for i in range(0, nb_edges):
  34. tmp = content[i + g.number_of_nodes() + 2]
  35. tmp = [tmp[i:i+3] for i in range(0, len(tmp), 3)]
  36. g.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1,
  37. bond_type=tmp[3].strip(), label=tmp[3].strip())
  38. return g
  39. def loadGXL(filename):
  40. from os.path import basename
  41. import networkx as nx
  42. import xml.etree.ElementTree as ET
  43. tree = ET.parse(filename)
  44. root = tree.getroot()
  45. index = 0
  46. g = nx.Graph(filename=basename(filename), name=root[0].attrib['id'])
  47. dic = {} #used to retrieve incident nodes of edges
  48. for node in root.iter('node'):
  49. dic[node.attrib['id']] = index
  50. labels = {}
  51. for attr in node.iter('attr'):
  52. labels[attr.attrib['name']] = attr[0].text
  53. if 'chem' in labels:
  54. labels['label'] = labels['chem']
  55. g.add_node(index, **labels)
  56. index += 1
  57. for edge in root.iter('edge'):
  58. labels = {}
  59. for attr in edge.iter('attr'):
  60. labels[attr.attrib['name']] = attr[0].text
  61. if 'valence' in labels:
  62. labels['label'] = labels['valence']
  63. g.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], **labels)
  64. return g
  65. def saveGXL(graph, filename):
  66. import xml.etree.ElementTree as ET
  67. root_node = ET.Element('gxl')
  68. attr = dict()
  69. attr['id'] = graph.graph['name']
  70. attr['edgeids'] = 'true'
  71. attr['edgemode'] = 'undirected'
  72. graph_node = ET.SubElement(root_node, 'graph', attrib=attr)
  73. for v in graph:
  74. current_node = ET.SubElement(graph_node, 'node', attrib={'id' : str(v)})
  75. for attr in graph.nodes[v].keys():
  76. cur_attr = ET.SubElement(current_node, 'attr', attrib={'name' : attr})
  77. cur_value = ET.SubElement(cur_attr,graph.nodes[v][attr].__class__.__name__)
  78. cur_value.text = graph.nodes[v][attr]
  79. for v1 in graph:
  80. for v2 in graph[v1]:
  81. if(v1 < v2): #Non oriented graphs
  82. cur_edge = ET.SubElement(graph_node, 'edge', attrib={'from' : str(v1),
  83. 'to' : str(v2)})
  84. for attr in graph[v1][v2].keys():
  85. cur_attr = ET.SubElement(cur_edge, 'attr', attrib={'name' : attr})
  86. cur_value = ET.SubElement(cur_attr, graph[v1][v2][attr].__class__.__name__)
  87. cur_value.text = str(graph[v1][v2][attr])
  88. tree = ET.ElementTree(root_node)
  89. tree.write(filename)
  90. def loadDataset(filename):
  91. """load file list of the dataset.
  92. """
  93. from os.path import dirname, splitext
  94. dirname_dataset = dirname(filename)
  95. extension = splitext(filename)[1][1:]
  96. data = []
  97. y = []
  98. if extension == "ds":
  99. content = open(filename).read().splitlines()
  100. for i in range(0, len(content)):
  101. tmp = content[i].split(' ')
  102. data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
  103. y.append(float(tmp[1]))
  104. elif(extension == "cxl"):
  105. import xml.etree.ElementTree as ET
  106. tree = ET.parse(filename)
  107. root = tree.getroot()
  108. data = []
  109. y = []
  110. for graph in root.iter('print'):
  111. mol_filename = graph.attrib['file']
  112. mol_class = graph.attrib['class']
  113. data.append(loadGXL(dirname_dataset + '/' + mol_filename))
  114. y.append(mol_class)
  115. return data, y

A Python package for graph kernels, graph edit distances and graph pre-image problem.