diff --git a/.gitignore b/.gitignore index 247159e..5559d48 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,7 @@ build/ .coverage htmlcov + +virtualenv + +.vscode/ diff --git a/gklearn/kernels/structuralspKernel.py b/gklearn/kernels/structuralspKernel.py index ed2d0ad..b200b36 100644 --- a/gklearn/kernels/structuralspKernel.py +++ b/gklearn/kernels/structuralspKernel.py @@ -25,7 +25,6 @@ from gklearn.utils.graphdataset import get_dataset_attributes from gklearn.utils.parallel import parallel_gm from gklearn.utils.trie import Trie - def structuralspkernel(*args, node_label='atom', edge_weight=None, diff --git a/gklearn/utils/unused/suffix_tree.py b/gklearn/utils/unused/suffix_tree.py deleted file mode 100644 index 4b15aa8..0000000 --- a/gklearn/utils/unused/suffix_tree.py +++ /dev/null @@ -1,320 +0,0 @@ -""" -@author: linlin -@references: - [1] `ptrus/suffix-trees `__, 2018.6 -""" - -import sys - - -class STree(): - """Class representing the suffix tree. The generalized suffix tree is supported.""" - - def __init__(self, input=''): - self.root = _SNode() - self.root.depth = 0 - self.root.idx = 0 - self.root.parent = self.root - self.root._add_suffix_link(self.root) - - if not input == '': - self.build(input) - - def _check_input(self, input): - """Checks the validity of the input. - In case of an invalid input throws ValueError. - """ - if isinstance(input, str): - return 'st' - elif isinstance(input, list): - if all(isinstance(item, str) for item in input): - return 'gst' - - raise ValueError("String argument should be of type String or" - " a list of strings") - - def build(self, x): - """Builds the Suffix tree on the given input. - If the input is of type List of Strings: - Generalized Suffix Tree is built. - :param x: String or List of Strings - """ - type = self._check_input(x) - - if type == 'st': - x += next(self._terminalSymbolsGenerator()) - self._build(x) - if type == 'gst': - self._build_generalized(x) - - def _build(self, x): - """Builds a Suffix tree.""" - self.word = x - self._build_McCreight(x) - - def _build_McCreight(self, x): - """Builds a Suffix tree using McCreight O(n) algorithm. - Algorithm based on: - McCreight, Edward M. "A space-economical suffix tree construction algorithm." - ACM, 1976. - Implementation based on: - UH CS - 58093 String Processing Algorithms Lecture Notes - """ - u = self.root - d = 0 - for i in range(len(x)): - while u.depth == d and u._has_transition(x[d + i]): - u = u._get_transition_link(x[d + i]) - d = d + 1 - while d < u.depth and x[u.idx + d] == x[i + d]: - d = d + 1 - if d < u.depth: - u = self._create_node(x, u, d) - self._create_leaf(x, i, u, d) - if not u._get_suffix_link(): - self._compute_slink(x, u) - u = u._get_suffix_link() - d = d - 1 - if d < 0: - d = 0 - - def _create_node(self, x, u, d): - i = u.idx - p = u.parent - v = _SNode(idx=i, depth=d) - v._add_transition_link(u, x[i + d]) - u.parent = v - p._add_transition_link(v, x[i + p.depth]) - v.parent = p - return v - - def _create_leaf(self, x, i, u, d): - w = _SNode() - w.idx = i - w.depth = len(x) - i - u._add_transition_link(w, x[i + d]) - w.parent = u - return w - - def _compute_slink(self, x, u): - d = u.depth - v = u.parent._get_suffix_link() - while v.depth < d - 1: - v = v._get_transition_link(x[u.idx + v.depth + 1]) - if v.depth > d - 1: - v = self._create_node(x, v, d - 1) - u._add_suffix_link(v) - - def _build_Ukkonen(self, x): - """Builds a Suffix tree using Ukkonen's online O(n) algorithm. - Algorithm based on: - Ukkonen, Esko. "On-line construction of suffix trees." - Algorithmica, 1995. - """ - # TODO. - raise NotImplementedError() - - def _build_generalized(self, xs): - """Builds a Generalized Suffix Tree (GST) from the array of strings provided. - """ - terminal_gen = self._terminalSymbolsGenerator() - - _xs = ''.join([x + next(terminal_gen) for x in xs]) - self.word = _xs - self._generalized_word_starts(xs) - self._build(_xs) - self.root._traverse(self._label_generalized) - - def _label_generalized(self, node): - """Helper method that labels the nodes of GST with indexes of strings - found in their descendants. - """ - if node.is_leaf(): - x = {self._get_word_start_index(node.idx)} - else: - x = { - n - for ns in node.transition_links for n in ns[0].generalized_idxs - } - node.generalized_idxs = x - - def _get_word_start_index(self, idx): - """Helper method that returns the index of the string based on node's - starting index""" - i = 0 - for _idx in self.word_starts[1:]: - if idx < _idx: - return i - else: - i += 1 - return i - - def lcs(self, stringIdxs=-1): - """Returns the Largest Common Substring of Strings provided in stringIdxs. - If stringIdxs is not provided, the LCS of all strings is returned. - ::param stringIdxs: Optional: List of indexes of strings. - """ - if stringIdxs == -1 or not isinstance(stringIdxs, list): - stringIdxs = set(range(len(self.word_starts))) - else: - stringIdxs = set(stringIdxs) - - deepestNode = self._find_lcs(self.root, stringIdxs) - start = deepestNode.idx - end = deepestNode.idx + deepestNode.depth - return self.word[start:end] - - def _find_lcs(self, node, stringIdxs): - """Helper method that finds LCS by traversing the labeled GSD.""" - nodes = [ - self._find_lcs(n, stringIdxs) for (n, _) in node.transition_links - if n.generalized_idxs.issuperset(stringIdxs) - ] - - if nodes == []: - return node - - deepestNode = max(nodes, key=lambda n: n.depth) - return deepestNode - - def _generalized_word_starts(self, xs): - """Helper method returns the starting indexes of strings in GST""" - self.word_starts = [] - i = 0 - for n in range(len(xs)): - self.word_starts.append(i) - i += len(xs[n]) + 1 - - def find(self, y): - """Returns starting position of the substring y in the string used for - building the Suffix tree. - :param y: String - :return: Index of the starting position of string y in the string used for building the Suffix tree - -1 if y is not a substring. - """ - node = self.root - while True: - edge = self._edgeLabel(node, node.parent) - if edge.startswith(y): - return node.idx - - i = 0 - while (i < len(edge) and edge[i] == y[0]): - y = y[1:] - i += 1 - - if i != 0: - if i == len(edge) and y != '': - pass - else: - return -1 - - node = node._get_transition_link(y[0]) - if not node: - return -1 - - def find_all(self, y): - y_input = y - node = self.root - while True: - edge = self._edgeLabel(node, node.parent) - if edge.startswith(y): - break - - i = 0 - while (i < len(edge) and edge[i] == y[0]): - y = y[1:] - i += 1 - - if i != 0: - if i == len(edge) and y != '': - pass - else: - return [] - - node = node._get_transition_link(y[0]) - if not node: - return [] - - leaves = node._get_leaves() - return [n.idx for n in leaves] - - def _edgeLabel(self, node, parent): - """Helper method, returns the edge label between a node and it's parent""" - return self.word[node.idx + parent.depth:node.idx + node.depth] - - def _terminalSymbolsGenerator(self): - """Generator of unique terminal symbols used for building the Generalized Suffix Tree. - Unicode Private Use Area U+E000..U+F8FF is used to ensure that terminal symbols - are not part of the input string. - """ - py2 = sys.version[0] < '3' - UPPAs = list( - list(range(0xE000, 0xF8FF + 1)) + - list(range(0xF0000, 0xFFFFD + 1)) + - list(range(0x100000, 0x10FFFD + 1))) - for i in UPPAs: - if py2: - yield (unichr(i)) - else: - yield (chr(i)) - raise ValueError("To many input strings.") - - -class _SNode(): - """Class representing a Node in the Suffix tree.""" - - def __init__(self, idx=-1, parentNode=None, depth=-1): - # Links - self._suffix_link = None - self.transition_links = [] - # Properties - self.idx = idx - self.depth = depth - self.parent = parentNode - self.generalized_idxs = {} - - def __str__(self): - return ("SNode: idx:" + str(self.idx) + " depth:" + str(self.depth) + - " transitons:" + str(self.transition_links)) - - def _add_suffix_link(self, snode): - self._suffix_link = snode - - def _get_suffix_link(self): - if self._suffix_link != None: - return self._suffix_link - else: - return False - - def _get_transition_link(self, suffix): - for node, _suffix in self.transition_links: - if _suffix == '__@__' or suffix == _suffix: - return node - return False - - def _add_transition_link(self, snode, suffix=''): - tl = self._get_transition_link(suffix) - if tl: # TODO: imporve this. - self.transition_links.remove((tl, suffix)) - self.transition_links.append((snode, suffix)) - - def _has_transition(self, suffix): - for node, _suffix in self.transition_links: - if _suffix == '__@__' or suffix == _suffix: - return True - return False - - def is_leaf(self): - return self.transition_links == [] - - def _traverse(self, f): - for (node, _) in self.transition_links: - node._traverse(f) - f(self) - - def _get_leaves(self): - if self.is_leaf(): - return [self] - else: - return [ - x for (n, _) in self.transition_links for x in n._get_leaves() - ] diff --git a/notebooks/utils/plot_all_graphs.py b/notebooks/utils/plot_all_graphs.py index e4f483b..df86148 100644 --- a/notebooks/utils/plot_all_graphs.py +++ b/notebooks/utils/plot_all_graphs.py @@ -17,15 +17,19 @@ from gklearn.utils.graphfiles import loadDataset, loadGXL def main(): # MUTAG dataset. dataset, y = loadDataset("../../datasets/MUTAG/MUTAG_A.txt") - for idx in [65]:#[6]: + for idx in [6]: #[65]:# G = dataset[idx] + ncolors= [] for node in G.nodes: if G.nodes[node]['atom'] == '0': G.nodes[node]['atom'] = 'C' + ncolors.append('#bd3182') elif G.nodes[node]['atom'] == '1': G.nodes[node]['atom'] = 'N' + ncolors.append('#3182bd') elif G.nodes[node]['atom'] == '2': G.nodes[node]['atom'] = 'O' + ncolors.append('#82bd31') elif G.nodes[node]['atom'] == '3': G.nodes[node]['atom'] = 'F' elif G.nodes[node]['atom'] == '4': @@ -37,11 +41,11 @@ def main(): ecolors = [] for edge in G.edges: if G.edges[edge]['bond_type'] == '0': - ecolors.append('orange') + ecolors.append('#bd3182') elif G.edges[edge]['bond_type'] == '1': - ecolors.append('r') + ecolors.append('#3182bd') elif G.edges[edge]['bond_type'] == '2': - ecolors.append('purple') + ecolors.append('#82bd31') elif G.edges[edge]['bond_type'] == '3': ecolors.append('orange') @@ -54,7 +58,7 @@ def main(): pos, node_size=500, labels=nx.get_node_attributes(G, 'atom'), - node_color='blue', + node_color=ncolors, font_color='w', edge_color=ecolors, width=3,