You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graphfiles.py 2.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import networkx as nx
  2. def loadCT(filename):
  3. """load data from .ct file.
  4. Notes
  5. ------
  6. a typical example of data in .ct is like this:
  7. 3 2 <- number of nodes and edges
  8. 0.0000 0.0000 0.0000 C <- each line describes a node, the last parameter in which is the label of the node, representing a chemical element @Q what are the first 3 numbers?
  9. 0.0000 0.0000 0.0000 C
  10. 0.0000 0.0000 0.0000 O
  11. 1 3 1 1 <- each line describes an edge, the first two numbers represent two nodes of the edge, the last number represents the label. @Q what are the 3th numbers?
  12. 2 3 1 1
  13. """
  14. content = open(filename).read().splitlines()
  15. G = nx.Graph(name=str(content[0])) # set name of the graph
  16. tmp = content[1].split(" ")
  17. if tmp[0] == '':
  18. nb_nodes = int(tmp[1]) # number of the nodes
  19. nb_edges = int(tmp[2]) # number of the edges
  20. else:
  21. nb_nodes = int(tmp[0])
  22. nb_edges = int(tmp[1])
  23. for i in range(0, nb_nodes):
  24. tmp = content[i + 2].split(" ")
  25. tmp = [x for x in tmp if x != '']
  26. G.add_node(i, label=tmp[3])
  27. for i in range(0, nb_edges):
  28. tmp = content[i + G.number_of_nodes() + 2].split(" ")
  29. tmp = [x for x in tmp if x != '']
  30. G.add_edge(int(tmp[0]) - 1, int(tmp[1]) - 1, label=int(tmp[3]))
  31. return G
  32. def loadGXL(filename):
  33. import networkx as nx
  34. import xml.etree.ElementTree as ET
  35. tree = ET.parse(filename)
  36. root = tree.getroot()
  37. index = 0
  38. G = nx.Graph()
  39. dic={}
  40. for node in root.iter('node'):
  41. label = node.find('attr')[0].text
  42. dic[node.attrib['id']] = index
  43. G.add_node(index, id=node.attrib['id'], label=label)
  44. index += 1
  45. for edge in root.iter('edge'):
  46. label = edge.find('attr')[0].text
  47. G.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], label=label)
  48. return G
  49. def loadDataset(filename):
  50. """load file list of the dataset.
  51. """
  52. from os.path import dirname, splitext
  53. dirname_dataset = dirname(filename)
  54. extension = splitext(filename)[1][1:]
  55. data = []
  56. y = []
  57. if(extension == "ds"):
  58. content = open(filename).read().splitlines()
  59. for i in range(0, len(content)):
  60. tmp = content[i].split(' ')
  61. data.append(loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1))) # remove the '#'s in file names
  62. y.append(float(tmp[1]))
  63. elif(extension == "cxl"):
  64. import xml.etree.ElementTree as ET
  65. tree = ET.parse(filename)
  66. root = tree.getroot()
  67. data = []
  68. y = []
  69. for graph in root.iter('print'):
  70. mol_filename = graph.attrib['file']
  71. mol_class = graph.attrib['class']
  72. data.append(loadGXL(dirname_dataset + '/' + mol_filename))
  73. y.append(mol_class)
  74. return data, y

A Python package for graph kernels, graph edit distances and graph pre-image problem.