Sunday, 11 January 2015

Suffix trees (PAT tree)


What is suffix?
A suffix of a string is any substring of the string which includes its last letter, including itself.

For Example, take a string “apple”, suffixes are “e”, “le”, “ple”, “pple”, “apple”.

Suffix Tree
Suffix tree is a compressed trie containing all the suffixes of the given text as their keys and positions in the text as their values. Many string operations are done efficiently using suffix trees.

The suffix tree for the word “apple” is a compressed trie containing the words “e”, “le”, “ple”, “pple”, “apple”.

import com.googlecode.concurrenttrees.common.Iterables;
import com.googlecode.concurrenttrees.common.PrettyPrinter;
import com.googlecode.concurrenttrees.radix.node.concrete.DefaultCharArrayNodeFactory;
import com.googlecode.concurrenttrees.radix.node.util.PrettyPrintable;
import com.googlecode.concurrenttrees.suffix.ConcurrentSuffixTree;
import com.googlecode.concurrenttrees.suffix.SuffixTree;

public class SuffixTreeEx {

    public static void main(String args[]) {
        SuffixTree<Integer> tree = new ConcurrentSuffixTree<Integer>(new DefaultCharArrayNodeFactory());

        tree.put("stock", new Integer(1));
        tree.put("bear", new Integer(2));
        tree.put("bid", new Integer(3));
        tree.put("bell", new Integer(4));
        tree.put("buy", new Integer(5));
        tree.put("bull", new Integer(6));
        tree.put("sell", new Integer(7));
        tree.put("stop", new Integer(8));
        tree.put("beap", new Integer(9));

        System.out.println();
        System.out.println("Tree structure:");
        // PrettyPrintable is a non-public API for testing, prints semi-graphical representations of trees...
        PrettyPrinter.prettyPrint((PrettyPrintable) tree, System.out);

        System.out.println();
        System.out.println("Value for 'bull' (exact match): " + tree.getValueForExactKey("bull"));
        System.out.println("Value for 'stop' (exact match): " + tree.getValueForExactKey("stop"));
        System.out.println();
        System.out.println("Keys ending with 'll': " + Iterables.toString(tree.getKeysEndingWith("ll")));
        System.out.println("Keys ending with 'p': " + Iterables.toString(tree.getKeysEndingWith("p")));
        System.out.println("Values for keys ending with 'll': " + Iterables.toString(tree.getValuesForKeysEndingWith("ll")));
        System.out.println("Key-Value pairs for keys ending with 'll': " + Iterables.toString(tree.getKeyValuePairsForKeysEndingWith("ll")));
        System.out.println();
        System.out.println("Keys containing 'ea': " + Iterables.toString(tree.getKeysContaining("ea")));
        System.out.println("Keys containing 'u': " + Iterables.toString(tree.getKeysContaining("u")));
        System.out.println("Values for keys containing 'u': " + Iterables.toString(tree.getValuesForKeysContaining("u")));
        System.out.println("Key-Value pairs for keys containing 'u': " + Iterables.toString(tree.getKeyValuePairsForKeysContaining("u")));
    }
}


Output
Tree structure:
○
├── ○ a
│   ├── ○ p ([beap])
│   └── ○ r ([bear])
├── ○ b
│   ├── ○ e
│   │   ├── ○ a
│   │   │   ├── ○ p ([beap])
│   │   │   └── ○ r ([bear])
│   │   └── ○ ll ([bell])
│   ├── ○ id ([bid])
│   └── ○ u
│       ├── ○ ll ([bull])
│       └── ○ y ([buy])
├── ○ ck ([stock])
├── ○ d ([bid])
├── ○ e
│   ├── ○ a
│   │   ├── ○ p ([beap])
│   │   └── ○ r ([bear])
│   └── ○ ll ([bell, sell])
├── ○ id ([bid])
├── ○ k ([stock])
├── ○ l ([bell, bull, sell])
│   └── ○ l ([bell, bull, sell])
├── ○ o
│   ├── ○ ck ([stock])
│   └── ○ p ([stop])
├── ○ p ([beap, stop])
├── ○ r ([bear])
├── ○ s
│   ├── ○ ell ([sell])
│   └── ○ to
│       ├── ○ ck ([stock])
│       └── ○ p ([stop])
├── ○ to
│   ├── ○ ck ([stock])
│   └── ○ p ([stop])
├── ○ u
│   ├── ○ ll ([bull])
│   └── ○ y ([buy])
└── ○ y ([buy])

Value for 'bull' (exact match): 6
Value for 'stop' (exact match): 8

Keys ending with 'll': [bell, bull, sell]
Keys ending with 'p': [beap, stop]
Values for keys ending with 'll': [4, 6, 7]
Key-Value pairs for keys ending with 'll': [(bell, 4), (bull, 6), (sell, 7)]

Keys containing 'ea': [beap, bear]
Keys containing 'u': [bull, buy]
Values for keys containing 'u': [6, 5]
Key-Value pairs for keys containing 'u': [(bull, 6), (buy, 5)]

If you want to add keys without any value, you can use “VoidValue.SINGLETON”.

import com.googlecode.concurrenttrees.common.Iterables;
import com.googlecode.concurrenttrees.common.PrettyPrinter;
import com.googlecode.concurrenttrees.radix.node.concrete.DefaultCharArrayNodeFactory;
import com.googlecode.concurrenttrees.radix.node.concrete.voidvalue.VoidValue;
import com.googlecode.concurrenttrees.radix.node.util.PrettyPrintable;
import com.googlecode.concurrenttrees.suffix.ConcurrentSuffixTree;
import com.googlecode.concurrenttrees.suffix.SuffixTree;

public class SuffixTreeEx {

    public static void main(String args[]) {
        SuffixTree<VoidValue> tree = new ConcurrentSuffixTree<VoidValue>(new DefaultCharArrayNodeFactory());

        tree.put("stock", VoidValue.SINGLETON);
        tree.put("bear", VoidValue.SINGLETON);
        tree.put("bid", VoidValue.SINGLETON);
        tree.put("bell", VoidValue.SINGLETON);
        tree.put("buy", VoidValue.SINGLETON);
        tree.put("bull", VoidValue.SINGLETON);
        tree.put("sell", VoidValue.SINGLETON);
        tree.put("stop", VoidValue.SINGLETON);
        tree.put("beap", VoidValue.SINGLETON);

        System.out.println();
        System.out.println("Tree structure:");
        // PrettyPrintable is a non-public API for testing, prints semi-graphical representations of trees...
        PrettyPrinter.prettyPrint((PrettyPrintable) tree, System.out);

        System.out.println();
        System.out.println("Value for 'bull' (exact match): " + tree.getValueForExactKey("bull"));
        System.out.println("Value for 'stop' (exact match): " + tree.getValueForExactKey("stop"));
        System.out.println();
        System.out.println("Keys ending with 'll': " + Iterables.toString(tree.getKeysEndingWith("ll")));
        System.out.println("Keys ending with 'p': " + Iterables.toString(tree.getKeysEndingWith("p")));
        System.out.println("Values for keys ending with 'll': " + Iterables.toString(tree.getValuesForKeysEndingWith("ll")));
        System.out.println("Key-Value pairs for keys ending with 'll': " + Iterables.toString(tree.getKeyValuePairsForKeysEndingWith("ll")));
        System.out.println();
        System.out.println("Keys containing 'ea': " + Iterables.toString(tree.getKeysContaining("ea")));
        System.out.println("Keys containing 'u': " + Iterables.toString(tree.getKeysContaining("u")));
        System.out.println("Values for keys containing 'u': " + Iterables.toString(tree.getValuesForKeysContaining("u")));
        System.out.println("Key-Value pairs for keys containing 'u': " + Iterables.toString(tree.getKeyValuePairsForKeysContaining("u")));
    }
}


Output
Tree structure:
○
├── ○ a
│   ├── ○ p ([beap])
│   └── ○ r ([bear])
├── ○ b
│   ├── ○ e
│   │   ├── ○ a
│   │   │   ├── ○ p ([beap])
│   │   │   └── ○ r ([bear])
│   │   └── ○ ll ([bell])
│   ├── ○ id ([bid])
│   └── ○ u
│       ├── ○ ll ([bull])
│       └── ○ y ([buy])
├── ○ ck ([stock])
├── ○ d ([bid])
├── ○ e
│   ├── ○ a
│   │   ├── ○ p ([beap])
│   │   └── ○ r ([bear])
│   └── ○ ll ([bell, sell])
├── ○ id ([bid])
├── ○ k ([stock])
├── ○ l ([bell, bull, sell])
│   └── ○ l ([bell, bull, sell])
├── ○ o
│   ├── ○ ck ([stock])
│   └── ○ p ([stop])
├── ○ p ([beap, stop])
├── ○ r ([bear])
├── ○ s
│   ├── ○ ell ([sell])
│   └── ○ to
│       ├── ○ ck ([stock])
│       └── ○ p ([stop])
├── ○ to
│   ├── ○ ck ([stock])
│   └── ○ p ([stop])
├── ○ u
│   ├── ○ ll ([bull])
│   └── ○ y ([buy])
└── ○ y ([buy])

Value for 'bull' (exact match): -
Value for 'stop' (exact match): -

Keys ending with 'll': [bell, bull, sell]
Keys ending with 'p': [beap, stop]
Values for keys ending with 'll': [-, -, -]
Key-Value pairs for keys ending with 'll': [(bell, -), (bull, -), (sell, -)]

Keys containing 'ea': [beap, bear]
Keys containing 'u': [bull, buy]
Values for keys containing 'u': [-, -]
Key-Value pairs for keys containing 'u': [(bull, -), (buy, -)]

Prevoius                                                 Next                                                 Home

No comments:

Post a Comment