Browse Source

Modify function to load .cml graph data where an outer cml element is added to the data file (dataset ACE).

v0.2.x
jajupmochi 4 years ago
parent
commit
0373be03ab
2 changed files with 7 additions and 3 deletions
  1. +4
    -2
      gklearn/dataset/file_managers.py
  2. +3
    -1
      gklearn/dataset/metadata.py

+ 4
- 2
gklearn/dataset/file_managers.py View File

@@ -537,16 +537,18 @@ class DataLoader():


def load_cml(self, filename): # @todo: directed graphs.
# @todo: what is "atomParity" and "bondStereo" in the data file?
from os.path import basename
import networkx as nx
import xml.etree.ElementTree as ET

xmlns = '{http://www.xml-cml.org/schema}' # @todo: why this has to be added?
tree = ET.parse(filename)
root = tree.getroot()
index = 0
g = nx.Graph(filename=basename(filename), name=root.attrib['id'])
g_id = root.find(xmlns + 'molecule').attrib['id']
g = nx.Graph(filename=basename(filename), name=g_id)
dic = {} # used to retrieve incident nodes of edges
xmlns = '{http://www.xml-cml.org/schema}' # @todo: why this has to be added?
for atom in root.iter(xmlns + 'atom'):
dic[atom.attrib['id']] = index
labels = {}


+ 3
- 1
gklearn/dataset/metadata.py View File

@@ -33,6 +33,7 @@ GREYC_META = {
'train_valid_test': [],
'stereoisomerism': True,
'load_files': ['data.ds'],
'extra_params': {'gformat': 'cml'}
},
'Acyclic': {
'database': 'greyc',
@@ -224,7 +225,8 @@ GREYC_META = {
'domain': 'small molecules',
'train_valid_test': [],
'stereoisomerism': True,
'load_files': [],
'load_files': ['data.txt'],
'extra_params': {'gformat': 'cml'}
},
}



Loading…
Cancel
Save