Browse Source

Modify function to load .cml graph data where an outer cml element is added to the data file (dataset ACE).

v0.2.x
jajupmochi 4 years ago
parent
commit
0373be03ab
2 changed files with 7 additions and 3 deletions
  1. +4
    -2
      gklearn/dataset/file_managers.py
  2. +3
    -1
      gklearn/dataset/metadata.py

+ 4
- 2
gklearn/dataset/file_managers.py View File

@@ -537,16 +537,18 @@ class DataLoader():




def load_cml(self, filename): # @todo: directed graphs. def load_cml(self, filename): # @todo: directed graphs.
# @todo: what is "atomParity" and "bondStereo" in the data file?
from os.path import basename from os.path import basename
import networkx as nx import networkx as nx
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET


xmlns = '{http://www.xml-cml.org/schema}' # @todo: why this has to be added?
tree = ET.parse(filename) tree = ET.parse(filename)
root = tree.getroot() root = tree.getroot()
index = 0 index = 0
g = nx.Graph(filename=basename(filename), name=root.attrib['id'])
g_id = root.find(xmlns + 'molecule').attrib['id']
g = nx.Graph(filename=basename(filename), name=g_id)
dic = {} # used to retrieve incident nodes of edges dic = {} # used to retrieve incident nodes of edges
xmlns = '{http://www.xml-cml.org/schema}' # @todo: why this has to be added?
for atom in root.iter(xmlns + 'atom'): for atom in root.iter(xmlns + 'atom'):
dic[atom.attrib['id']] = index dic[atom.attrib['id']] = index
labels = {} labels = {}


+ 3
- 1
gklearn/dataset/metadata.py View File

@@ -33,6 +33,7 @@ GREYC_META = {
'train_valid_test': [], 'train_valid_test': [],
'stereoisomerism': True, 'stereoisomerism': True,
'load_files': ['data.ds'], 'load_files': ['data.ds'],
'extra_params': {'gformat': 'cml'}
}, },
'Acyclic': { 'Acyclic': {
'database': 'greyc', 'database': 'greyc',
@@ -224,7 +225,8 @@ GREYC_META = {
'domain': 'small molecules', 'domain': 'small molecules',
'train_valid_test': [], 'train_valid_test': [],
'stereoisomerism': True, 'stereoisomerism': True,
'load_files': [],
'load_files': ['data.txt'],
'extra_params': {'gformat': 'cml'}
}, },
} }




Loading…
Cancel
Save