|
- import networkx as nx
-
- def loadCT(filename):
- """load data from .ct file.
-
- Notes
- ------
- a typical example of data in .ct is like this:
-
- 3 2 <- number of nodes and edges
- 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?
- 0.0000 0.0000 0.0000 C
- 0.0000 0.0000 0.0000 O
- 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?
- 2 3 1 1
- """
- content = open(filename).read().splitlines()
- G = nx.Graph(name=str(content[0])) # set name of the graph
- tmp = content[1].split(" ")
- if tmp[0] == '':
- nb_nodes = int(tmp[1]) # number of the nodes
- nb_edges = int(tmp[2]) # number of the edges
- else:
- nb_nodes = int(tmp[0])
- nb_edges = int(tmp[1])
-
- for i in range(0, nb_nodes):
- tmp = content[i + 2].split(" ")
- tmp = [x for x in tmp if x != '']
- G.add_node(i, label=tmp[3])
-
- for i in range(0, nb_edges):
- tmp = content[i + G.number_of_nodes() + 2].split(" ")
- tmp = [x for x in tmp if x != '']
- G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
- return G
-
-
- def loadGXL(filename):
- import networkx as nx
- import xml.etree.ElementTree as ET
-
- tree = ET.parse(filename)
- root = tree.getroot()
- index = 0
- G = nx.Graph()
- dic={}
- for node in root.iter('node'):
- label = node.find('attr')[0].text
- dic[node.attrib['id']] = index
- G.add_node(index, id=node.attrib['id'], label=label)
- index += 1
-
- for edge in root.iter('edge'):
- label = edge.find('attr')[0].text
- G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
- return G
-
- def loadDataset(filename):
- """load file list of the dataset.
- """
- from os.path import dirname, splitext
-
- dirname_dataset = dirname(filename)
- extension = splitext(filename)[1][1:]
- data = []
- y = []
- if(extension == "ds"):
- content = open(filename).read().splitlines()
- for i in range(0, len(content)):
- tmp = content[i].split(' ')
- data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
- y.append(float(tmp[1]))
- elif(extension == "cxl"):
- import xml.etree.ElementTree as ET
-
- tree = ET.parse(filename)
- root = tree.getroot()
- data = []
- y = []
- for graph in root.iter('print'):
- mol_filename = graph.attrib['file']
- mol_class = graph.attrib['class']
- data.append(loadGXL(dirname_dataset + '/' + mol_filename))
- y.append(mol_class)
-
- return data, y
|