Browse Source

clear repo: a test.

v0.1
jajupmochi 5 years ago
parent
commit
2143ff1515
4 changed files with 13 additions and 326 deletions
  1. +4
    -0
      .gitignore
  2. +0
    -1
      gklearn/kernels/structuralspKernel.py
  3. +0
    -320
      gklearn/utils/unused/suffix_tree.py
  4. +9
    -5
      notebooks/utils/plot_all_graphs.py

+ 4
- 0
.gitignore View File

@@ -43,3 +43,7 @@ build/

.coverage
htmlcov

virtualenv

.vscode/

+ 0
- 1
gklearn/kernels/structuralspKernel.py View File

@@ -25,7 +25,6 @@ from gklearn.utils.graphdataset import get_dataset_attributes
from gklearn.utils.parallel import parallel_gm
from gklearn.utils.trie import Trie


def structuralspkernel(*args,
node_label='atom',
edge_weight=None,


+ 0
- 320
gklearn/utils/unused/suffix_tree.py View File

@@ -1,320 +0,0 @@
"""
@author: linlin
@references:
[1] `ptrus/suffix-trees <https://github.com/ptrus/suffix-trees/blob/master/suffix_trees/STree.py>`__, 2018.6
"""

import sys


class STree():
"""Class representing the suffix tree. The generalized suffix tree is supported."""

def __init__(self, input=''):
self.root = _SNode()
self.root.depth = 0
self.root.idx = 0
self.root.parent = self.root
self.root._add_suffix_link(self.root)

if not input == '':
self.build(input)

def _check_input(self, input):
"""Checks the validity of the input.
In case of an invalid input throws ValueError.
"""
if isinstance(input, str):
return 'st'
elif isinstance(input, list):
if all(isinstance(item, str) for item in input):
return 'gst'

raise ValueError("String argument should be of type String or"
" a list of strings")

def build(self, x):
"""Builds the Suffix tree on the given input.
If the input is of type List of Strings:
Generalized Suffix Tree is built.
:param x: String or List of Strings
"""
type = self._check_input(x)

if type == 'st':
x += next(self._terminalSymbolsGenerator())
self._build(x)
if type == 'gst':
self._build_generalized(x)

def _build(self, x):
"""Builds a Suffix tree."""
self.word = x
self._build_McCreight(x)

def _build_McCreight(self, x):
"""Builds a Suffix tree using McCreight O(n) algorithm.
Algorithm based on:
McCreight, Edward M. "A space-economical suffix tree construction algorithm." - ACM, 1976.
Implementation based on:
UH CS - 58093 String Processing Algorithms Lecture Notes
"""
u = self.root
d = 0
for i in range(len(x)):
while u.depth == d and u._has_transition(x[d + i]):
u = u._get_transition_link(x[d + i])
d = d + 1
while d < u.depth and x[u.idx + d] == x[i + d]:
d = d + 1
if d < u.depth:
u = self._create_node(x, u, d)
self._create_leaf(x, i, u, d)
if not u._get_suffix_link():
self._compute_slink(x, u)
u = u._get_suffix_link()
d = d - 1
if d < 0:
d = 0

def _create_node(self, x, u, d):
i = u.idx
p = u.parent
v = _SNode(idx=i, depth=d)
v._add_transition_link(u, x[i + d])
u.parent = v
p._add_transition_link(v, x[i + p.depth])
v.parent = p
return v

def _create_leaf(self, x, i, u, d):
w = _SNode()
w.idx = i
w.depth = len(x) - i
u._add_transition_link(w, x[i + d])
w.parent = u
return w

def _compute_slink(self, x, u):
d = u.depth
v = u.parent._get_suffix_link()
while v.depth < d - 1:
v = v._get_transition_link(x[u.idx + v.depth + 1])
if v.depth > d - 1:
v = self._create_node(x, v, d - 1)
u._add_suffix_link(v)

def _build_Ukkonen(self, x):
"""Builds a Suffix tree using Ukkonen's online O(n) algorithm.
Algorithm based on:
Ukkonen, Esko. "On-line construction of suffix trees." - Algorithmica, 1995.
"""
# TODO.
raise NotImplementedError()

def _build_generalized(self, xs):
"""Builds a Generalized Suffix Tree (GST) from the array of strings provided.
"""
terminal_gen = self._terminalSymbolsGenerator()

_xs = ''.join([x + next(terminal_gen) for x in xs])
self.word = _xs
self._generalized_word_starts(xs)
self._build(_xs)
self.root._traverse(self._label_generalized)

def _label_generalized(self, node):
"""Helper method that labels the nodes of GST with indexes of strings
found in their descendants.
"""
if node.is_leaf():
x = {self._get_word_start_index(node.idx)}
else:
x = {
n
for ns in node.transition_links for n in ns[0].generalized_idxs
}
node.generalized_idxs = x

def _get_word_start_index(self, idx):
"""Helper method that returns the index of the string based on node's
starting index"""
i = 0
for _idx in self.word_starts[1:]:
if idx < _idx:
return i
else:
i += 1
return i

def lcs(self, stringIdxs=-1):
"""Returns the Largest Common Substring of Strings provided in stringIdxs.
If stringIdxs is not provided, the LCS of all strings is returned.
::param stringIdxs: Optional: List of indexes of strings.
"""
if stringIdxs == -1 or not isinstance(stringIdxs, list):
stringIdxs = set(range(len(self.word_starts)))
else:
stringIdxs = set(stringIdxs)

deepestNode = self._find_lcs(self.root, stringIdxs)
start = deepestNode.idx
end = deepestNode.idx + deepestNode.depth
return self.word[start:end]

def _find_lcs(self, node, stringIdxs):
"""Helper method that finds LCS by traversing the labeled GSD."""
nodes = [
self._find_lcs(n, stringIdxs) for (n, _) in node.transition_links
if n.generalized_idxs.issuperset(stringIdxs)
]

if nodes == []:
return node

deepestNode = max(nodes, key=lambda n: n.depth)
return deepestNode

def _generalized_word_starts(self, xs):
"""Helper method returns the starting indexes of strings in GST"""
self.word_starts = []
i = 0
for n in range(len(xs)):
self.word_starts.append(i)
i += len(xs[n]) + 1

def find(self, y):
"""Returns starting position of the substring y in the string used for
building the Suffix tree.
:param y: String
:return: Index of the starting position of string y in the string used for building the Suffix tree
-1 if y is not a substring.
"""
node = self.root
while True:
edge = self._edgeLabel(node, node.parent)
if edge.startswith(y):
return node.idx

i = 0
while (i < len(edge) and edge[i] == y[0]):
y = y[1:]
i += 1

if i != 0:
if i == len(edge) and y != '':
pass
else:
return -1

node = node._get_transition_link(y[0])
if not node:
return -1

def find_all(self, y):
y_input = y
node = self.root
while True:
edge = self._edgeLabel(node, node.parent)
if edge.startswith(y):
break

i = 0
while (i < len(edge) and edge[i] == y[0]):
y = y[1:]
i += 1

if i != 0:
if i == len(edge) and y != '':
pass
else:
return []

node = node._get_transition_link(y[0])
if not node:
return []

leaves = node._get_leaves()
return [n.idx for n in leaves]

def _edgeLabel(self, node, parent):
"""Helper method, returns the edge label between a node and it's parent"""
return self.word[node.idx + parent.depth:node.idx + node.depth]

def _terminalSymbolsGenerator(self):
"""Generator of unique terminal symbols used for building the Generalized Suffix Tree.
Unicode Private Use Area U+E000..U+F8FF is used to ensure that terminal symbols
are not part of the input string.
"""
py2 = sys.version[0] < '3'
UPPAs = list(
list(range(0xE000, 0xF8FF + 1)) +
list(range(0xF0000, 0xFFFFD + 1)) +
list(range(0x100000, 0x10FFFD + 1)))
for i in UPPAs:
if py2:
yield (unichr(i))
else:
yield (chr(i))
raise ValueError("To many input strings.")


class _SNode():
"""Class representing a Node in the Suffix tree."""

def __init__(self, idx=-1, parentNode=None, depth=-1):
# Links
self._suffix_link = None
self.transition_links = []
# Properties
self.idx = idx
self.depth = depth
self.parent = parentNode
self.generalized_idxs = {}

def __str__(self):
return ("SNode: idx:" + str(self.idx) + " depth:" + str(self.depth) +
" transitons:" + str(self.transition_links))

def _add_suffix_link(self, snode):
self._suffix_link = snode

def _get_suffix_link(self):
if self._suffix_link != None:
return self._suffix_link
else:
return False

def _get_transition_link(self, suffix):
for node, _suffix in self.transition_links:
if _suffix == '__@__' or suffix == _suffix:
return node
return False

def _add_transition_link(self, snode, suffix=''):
tl = self._get_transition_link(suffix)
if tl: # TODO: imporve this.
self.transition_links.remove((tl, suffix))
self.transition_links.append((snode, suffix))

def _has_transition(self, suffix):
for node, _suffix in self.transition_links:
if _suffix == '__@__' or suffix == _suffix:
return True
return False

def is_leaf(self):
return self.transition_links == []

def _traverse(self, f):
for (node, _) in self.transition_links:
node._traverse(f)
f(self)

def _get_leaves(self):
if self.is_leaf():
return [self]
else:
return [
x for (n, _) in self.transition_links for x in n._get_leaves()
]

+ 9
- 5
notebooks/utils/plot_all_graphs.py View File

@@ -17,15 +17,19 @@ from gklearn.utils.graphfiles import loadDataset, loadGXL
def main():
# MUTAG dataset.
dataset, y = loadDataset("../../datasets/MUTAG/MUTAG_A.txt")
for idx in [65]:#[6]:
for idx in [6]: #[65]:#
G = dataset[idx]
ncolors= []
for node in G.nodes:
if G.nodes[node]['atom'] == '0':
G.nodes[node]['atom'] = 'C'
ncolors.append('#bd3182')
elif G.nodes[node]['atom'] == '1':
G.nodes[node]['atom'] = 'N'
ncolors.append('#3182bd')
elif G.nodes[node]['atom'] == '2':
G.nodes[node]['atom'] = 'O'
ncolors.append('#82bd31')
elif G.nodes[node]['atom'] == '3':
G.nodes[node]['atom'] = 'F'
elif G.nodes[node]['atom'] == '4':
@@ -37,11 +41,11 @@ def main():
ecolors = []
for edge in G.edges:
if G.edges[edge]['bond_type'] == '0':
ecolors.append('orange')
ecolors.append('#bd3182')
elif G.edges[edge]['bond_type'] == '1':
ecolors.append('r')
ecolors.append('#3182bd')
elif G.edges[edge]['bond_type'] == '2':
ecolors.append('purple')
ecolors.append('#82bd31')
elif G.edges[edge]['bond_type'] == '3':
ecolors.append('orange')

@@ -54,7 +58,7 @@ def main():
pos,
node_size=500,
labels=nx.get_node_attributes(G, 'atom'),
node_color='blue',
node_color=ncolors,
font_color='w',
edge_color=ecolors,
width=3,


Loading…
Cancel
Save