Browse Source

Hide deprecation warning in graph_file.py.

v0.2.x
jajupmochi 4 years ago
parent
commit
920ebda3f9
1 changed files with 63 additions and 63 deletions
  1. +63
    -63
      gklearn/utils/graph_files.py

+ 63
- 63
gklearn/utils/graph_files.py View File

@@ -1,8 +1,8 @@
""" Utilities function to manage graph files
"""
import warnings
warnings.simplefilter('always', DeprecationWarning)
warnings.warn('The functions in the module "gklearn.utils.graph_files" will be deprecated and removed since version 0.4.0. Use the corresponding functions in the module "gklearn.dataset" instead.', DeprecationWarning)
# import warnings
# warnings.simplefilter('always', DeprecationWarning)
# warnings.warn('The functions in the module "gklearn.utils.graph_files" will be deprecated and removed since version 0.4.0. Use the corresponding functions in the module "gklearn.dataset" instead.', DeprecationWarning)

from os.path import dirname, splitext

@@ -26,17 +26,17 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs):
y : List

Targets corresponding to graphs.
Notes
-----
This function supports following graph dataset formats:

'ds': load data from .ds file. See comments of function loadFromDS for a example.

'cxl': load data from Graph eXchange Language file (.cxl file). See
'cxl': load data from Graph eXchange Language file (.cxl file). See
`here <http://www.gupro.de/GXL/Introduction/background.html>`__ for detail.

'sdf': load data from structured data file (.sdf file). See
'sdf': load data from structured data file (.sdf file). See
`here <http://www.nonlinear.com/progenesis/sdf-studio/v0.9/faq/sdf-file-format-guidance.aspx>`__
for details.

@@ -77,20 +77,20 @@ def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', **kwargs):
import warnings
warnings.simplefilter('always', DeprecationWarning)
warnings.warn('The function "gklearn.utils.save_dataset" will be deprecated and removed since version 0.4.0. Use the class "gklearn.dataset.DataSaver" instead.', DeprecationWarning)
import os
dirname_ds = os.path.dirname(filename)
if dirname_ds != '':
dirname_ds += '/'
os.makedirs(dirname_ds, exist_ok=True)
if 'graph_dir' in kwargs:
graph_dir = kwargs['graph_dir'] + '/'
os.makedirs(graph_dir, exist_ok=True)
del kwargs['graph_dir']
else:
graph_dir = dirname_ds
graph_dir = dirname_ds
if group == 'xml' and gformat == 'gxl':
with open(filename + '.xml', 'w') as fgroup:
fgroup.write("<?xml version=\"1.0\"?>")
@@ -122,7 +122,7 @@ def load_ct(filename): # @todo: this function is only tested on CTFile V2000; he
1 3 1 1 <- each line describes an edge : to, from, bond type, bond stereo

2 3 1 1
Check `CTFile Formats file <https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=10&ved=2ahUKEwivhaSdjsTlAhVhx4UKHczHA8gQFjAJegQIARAC&url=https%3A%2F%2Fwww.daylight.com%2Fmeetings%2Fmug05%2FKappler%2Fctfile.pdf&usg=AOvVaw1cDNrrmMClkFPqodlF2inS>`__
for detailed format discription.
"""
@@ -144,7 +144,7 @@ def load_ct(filename): # @todo: this function is only tested on CTFile V2000; he
if count_line_tags[i] != '': # if not obsoleted
g.graph[count_line_tags[i]] = tmp[i].strip()
i += 1
# read the atom block.
atom_tags = ['x', 'y', 'z', 'atom_symbol', 'mass_difference', 'charge', 'atom_stereo_parity', 'hydrogen_count_plus_1', 'stereo_care_box', 'valence', 'h0_designator', '', '', 'atom_atom_mapping_number', 'inversion_retention_flag', 'exact_change_flag']
for i in range(0, nb_atoms):
@@ -156,7 +156,7 @@ def load_ct(filename): # @todo: this function is only tested on CTFile V2000; he
if atom_tags[j] != '':
g.nodes[i][atom_tags[j]] = tmp[j].strip()
j += 1
# read the bond block.
bond_tags = ['first_atom_number', 'second_atom_number', 'bond_type', 'bond_stereo', '', 'bond_topology', 'reacting_center_status']
for i in range(0, nb_bonds):
@@ -169,7 +169,7 @@ def load_ct(filename): # @todo: this function is only tested on CTFile V2000; he
if bond_tags[j] != '':
g.edges[(n1, n2)][bond_tags[j]] = tmp[j].strip()
j += 1
# get label names.
label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []}
atom_symbolic = [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, None, None, 1, 1, 1]
@@ -188,7 +188,7 @@ def load_ct(filename): # @todo: this function is only tested on CTFile V2000; he
else:
label_names['edge_attrs'].append(key)
break
return g, label_names


@@ -215,19 +215,19 @@ def load_gxl(filename): # @todo: directed graphs.
for attr in edge.iter('attr'):
labels[attr.attrib['name']] = attr[0].text
g.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], **labels)
# get label names.
label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []}
for node in root.iter('node'):
for attr in node.iter('attr'):
if attr[0].tag == 'int': # @todo: this maybe wrong, and slow.
if attr[0].tag == 'int': # @todo: this maybe wrong, and slow.
label_names['node_labels'].append(attr.attrib['name'])
else:
label_names['node_attrs'].append(attr.attrib['name'])
break
for edge in root.iter('edge'):
for attr in edge.iter('attr'):
if attr[0].tag == 'int': # @todo: this maybe wrong, and slow.
if attr[0].tag == 'int': # @todo: this maybe wrong, and slow.
label_names['edge_labels'].append(attr.attrib['name'])
else:
label_names['edge_attrs'].append(attr.attrib['name'])
@@ -249,20 +249,20 @@ def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[],
gxl_file.write("<graph id=\"" + name + "\" edgeids=\"false\" edgemode=\"undirected\">\n")
for v, attrs in graph.nodes(data=True):
gxl_file.write("<node id=\"_" + str(v) + "\">")
for l_name in node_labels:
gxl_file.write("<attr name=\"" + l_name + "\"><int>" +
for l_name in node_labels:
gxl_file.write("<attr name=\"" + l_name + "\"><int>" +
str(attrs[l_name]) + "</int></attr>")
for a_name in node_attrs:
gxl_file.write("<attr name=\"" + a_name + "\"><float>" +
for a_name in node_attrs:
gxl_file.write("<attr name=\"" + a_name + "\"><float>" +
str(attrs[a_name]) + "</float></attr>")
gxl_file.write("</node>\n")
for v1, v2, attrs in graph.edges(data=True):
gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\">")
for l_name in edge_labels:
gxl_file.write("<attr name=\"" + l_name + "\"><int>" +
for l_name in edge_labels:
gxl_file.write("<attr name=\"" + l_name + "\"><int>" +
str(attrs[l_name]) + "</int></attr>")
for a_name in edge_attrs:
gxl_file.write("<attr name=\"" + a_name + "\"><float>" +
for a_name in edge_attrs:
gxl_file.write("<attr name=\"" + a_name + "\"><float>" +
str(attrs[a_name]) + "</float></attr>")
gxl_file.write("</edge>\n")
gxl_file.write("</graph>\n")
@@ -276,7 +276,7 @@ def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[],
attr['edgeids'] = 'true'
attr['edgemode'] = 'undirected'
graph_node = ET.SubElement(root_node, 'graph', attrib=attr)
for v in graph:
current_node = ET.SubElement(graph_node, 'node', attrib={'id': str(v)})
for attr in graph.nodes[v].keys():
@@ -285,7 +285,7 @@ def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[],
cur_value = ET.SubElement(cur_attr,
graph.nodes[v][attr].__class__.__name__)
cur_value.text = graph.nodes[v][attr]
for v1 in graph:
for v2 in graph[v1]:
if (v1 < v2): # Non oriented graphs
@@ -302,7 +302,7 @@ def save_gxl(graph, filename, method='default', node_labels=[], edge_labels=[],
cur_value = ET.SubElement(
cur_attr, graph[v1][v2][attr].__class__.__name__)
cur_value.text = str(graph[v1][v2][attr])
tree = ET.ElementTree(root_node)
tree.write(filename)
elif method == 'gedlib':
@@ -458,11 +458,11 @@ def load_mat(filename, order): # @todo: need to be updated (auto order) or depre
g.add_edge(col, row)
data.append(g)
# print(g.edges(data=True))
label_names = {'node_labels': ['label_1'], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []}
if order[1] == 0:
label_names['edge_labels'].append('label_1')
return data, y, label_names


@@ -477,12 +477,12 @@ def load_tud(filename):
import networkx as nx
from os import listdir
from os.path import dirname, basename
def get_infos_from_readme(frm): # @todo: add README (cuniform), maybe node/edge label maps.
"""Get information from DS_label_readme.txt file.
"""
def get_label_names_from_line(line):
"""Get names of labels/attributes from a line.
"""
@@ -490,8 +490,8 @@ def load_tud(filename):
names = str_names.split(',')
names = [attr.strip() for attr in names]
return names
def get_class_label_map(label_map_strings):
label_map = {}
for string in label_map_strings:
@@ -500,7 +500,7 @@ def load_tud(filename):
return label_map


label_names = {'node_labels': [], 'node_attrs': [],
label_names = {'node_labels': [], 'node_attrs': [],
'edge_labels': [], 'edge_attrs': []}
class_label_map = None
class_label_map_strings = []
@@ -528,16 +528,16 @@ def load_tud(filename):
line = content_rm[i].strip()
class_label_map = get_class_label_map(class_label_map_strings)
i += 1
return label_names, class_label_map

# get dataset name.
dirname_dataset = dirname(filename)
filename = basename(filename)
fn_split = filename.split('_A')
ds_name = fn_split[0].strip()
# load data file names
for name in listdir(dirname_dataset):
if ds_name + '_A' in name:
@@ -561,20 +561,20 @@ def load_tud(filename):
# this is supposed to be the node attrs, make sure to put this as the last 'elif'
elif ds_name + '_attributes' in name:
fna = dirname_dataset + '/' + name
# get labels and attributes names.
if 'frm' in locals():
label_names, class_label_map = get_infos_from_readme(frm)
else:
label_names = {'node_labels': [], 'node_attrs': [],
label_names = {'node_labels': [], 'node_attrs': [],
'edge_labels': [], 'edge_attrs': []}
class_label_map = None
with open(fgi) as gi:
content_gi = gi.read().splitlines() # graph indicator
with open(fam) as am:
content_am = am.read().splitlines() # adjacency matrix
# load targets.
if 'fgl' in locals():
with open(fgl) as gl:
@@ -609,7 +609,7 @@ def load_tud(filename):
else:
for i, line in enumerate(content_gi):
data[int(line) - 1].add_node(i)
# add edges
for line in content_am:
tmp = line.split(',')
@@ -670,7 +670,7 @@ def load_tud(filename):
data[g].edges[n[0], n[1]][a_name] = attrs[i]

return data, targets, label_names

def load_from_ds(filename, filename_targets):
"""Load data from .ds file.
@@ -681,9 +681,9 @@ def load_from_ds(filename, filename_targets):

'.gxl': see dunction load_gxl for detail.

Note these graph formats are checked automatically by the extensions of
Note these graph formats are checked automatically by the extensions of
graph files.
"""
"""
dirname_dataset = dirname(filename)
data = []
y = []
@@ -695,7 +695,7 @@ def load_from_ds(filename, filename_targets):
load_file_fun = load_ct
elif extension == 'gxl' or extension == 'sdf': # @todo: .sdf not tested yet.
load_file_fun = load_gxl
if filename_targets is None or filename_targets == '':
for i in range(0, len(content)):
tmp = content[i].split(' ')
@@ -711,7 +711,7 @@ def load_from_ds(filename, filename_targets):
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1))
data.append(g)
_append_label_names(label_names, l_names)
with open(filename_targets) as fnt:
content_y = fnt.read().splitlines()
# assume entries in filename and filename_targets have the same order.
@@ -719,13 +719,13 @@ def load_from_ds(filename, filename_targets):
tmp = item.split(' ')
# assume the 3rd entry in a line is y (for Alkane dataset)
y.append(float(tmp[2]))
return data, y, label_names


# def load_from_cxl(filename):
# import xml.etree.ElementTree as ET
#
#
# dirname_dataset = dirname(filename)
# tree = ET.parse(filename)
# root = tree.getroot()
@@ -736,11 +736,11 @@ def load_from_ds(filename, filename_targets):
# mol_class = graph.attrib['class']
# data.append(load_gxl(dirname_dataset + '/' + mol_filename))
# y.append(mol_class)
def load_from_xml(filename, dir_dataset=None):
import xml.etree.ElementTree as ET
if dir_dataset is not None:
dir_dataset = dir_dataset
else:
@@ -757,16 +757,16 @@ def load_from_xml(filename, dir_dataset=None):
data.append(g)
_append_label_names(label_names, l_names)
y.append(mol_class)
return data, y, label_names


def _append_label_names(label_names, new_names):
for key, val in label_names.items():
label_names[key] += [name for name in new_names[key] if name not in val]
if __name__ == '__main__':
if __name__ == '__main__':
# ### Load dataset from .ds file.
# # .ct files.
# ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds',
@@ -782,7 +782,7 @@ if __name__ == '__main__':
# print(Gn[1].nodes(data=True))
# print(Gn[1].edges(data=True))
# print(targets[1])
# # .gxl file.
# ds_file = '../../datasets/monoterpenoides/dataset_10+.ds' # node/edge symb
# Gn, y, label_names = load_dataset(ds_file)
@@ -803,7 +803,7 @@ if __name__ == '__main__':
# ### Convert graph from one format to another.
# # .gxl file.
# import networkx as nx
# ds = {'name': 'monoterpenoides',
# ds = {'name': 'monoterpenoides',
# 'dataset': '../../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
# Gn, y = loadDataset(ds['dataset'])
# y = [int(i) for i in y]
@@ -826,13 +826,13 @@ if __name__ == '__main__':
# filename = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl/monoterpenoides'
# xparams = {'method': 'gedlib'}
# saveDataset(Gn, y, gformat='gxl', group='xml', filename=filename, xparams=xparams)
# save dataset.
# ds = {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
# Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# saveDataset(Gn, y, group='xml', filename='temp/temp')
# test - new way to add labels and attributes.
# dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
# filename = '../../datasets/Fingerprint/Fingerprint_A.txt'


Loading…
Cancel
Save