Copyright | © 2017 Herbert Valerio Riedel |
---|---|
License | GPLv3 |
Safe Haskell | Trustworthy |
Language | Haskell2010 |
This module provides the TextSet
container for storing sets of text strings.
This module is intended to be imported qualified
, e.g.
import Data.TextSet.Unboxed (TextSet) import qualified Data.TextSet.Unboxed as TextSet
- data TextSet
- type Key = ShortText
- size :: TextSet -> Int
- null :: TextSet -> Bool
- member :: Key -> TextSet -> Bool
- lookupMin :: TextSet -> Maybe Key
- lookupMax :: TextSet -> Maybe Key
- lookupLE :: Key -> TextSet -> Maybe (Int, Key)
- lookupGE :: Key -> TextSet -> Maybe (Int, Key)
- (!?) :: TextSet -> Int -> Maybe Key
- lookupIndex :: Key -> TextSet -> Maybe Int
- empty :: TextSet
- singleton :: Key -> TextSet
- fromList :: [Key] -> TextSet
- fromDistinctAscList :: [Key] -> TextSet
- fromSet :: Set Key -> TextSet
- toList :: TextSet -> [Key]
- toArray :: TextSet -> TextArray
- toSet :: TextSet -> Set Key
Documentation
A set of unboxed ShortText
strings
The memory footprint of this data-structure is a single heap object (an unlifted ByteArray#
) with the size expressed in words
\[ 3 + n + \left\lceil \frac{1}{w} \sum_{i=0}^{n-1} len(s_i) \right\rceil \]
where the word-size \(w\) is either \(w = 4\) or \(w = 8\) bytes; and where \(len(s_i)\) denotes the UTF-8 size in bytes of the \(i\)-th text string.
NOTE: Depending on whether you UNPACK
the TextSet
wrapper, you need at least one additional word for the pointer to the internal ByteArray#
heap object.
Querying & lookup
size :: TextSet -> Int Source #
\(\mathcal{O}(1)\). Report number of elements in TextSet
.
>>>
size empty
0
>>>
size (singleton "")
1
>>>
size (fromList ["Hey","Hey","Jude"])
2
member :: Key -> TextSet -> Bool Source #
\(\mathcal{O}(\log n)\). Test whether set contains a string.
>>>
member "empty" empty
False
>>>
member "a" (fromList ["a","b","c"])
True
>>>
member "d" (fromList ["a","b","c"])
False
lookupMin :: TextSet -> Maybe Key Source #
\(\mathcal{O}(1)\). Extract minimal element from set.
>>>
lookupMin empty
Nothing
>>>
lookupMin (fromList ["a","b","c"])
Just "a"
lookupMax :: TextSet -> Maybe Key Source #
\(\mathcal{O}(1)\). Extract maximal element from set.
>>>
lookupMax empty
Nothing
>>>
lookupMax (fromList ["a","b","c"])
Just "c"
lookupLE :: Key -> TextSet -> Maybe (Int, Key) Source #
\(\mathcal{O}(\log n)\). Look up "greatest" string (together with its index) in set less or equal to given string.
>>>
lookupLE "a" (fromList ["bb","cc"])
Nothing
>>>
lookupLE "c" (fromList ["bb","cc"])
Just (0,"bb")
>>>
lookupLE "cc" (fromList ["bb","cc"])
Just (1,"cc")
>>>
lookupLE "z" (fromList ["bb","cc"])
Just (1,"cc")
lookupGE :: Key -> TextSet -> Maybe (Int, Key) Source #
\(\mathcal{O}(\log n)\). Look up "least" string (together with its index) in set greater or equal to given string.
>>>
lookupGE "a" (fromList ["bb","cc"])
Just (0,"bb")
>>>
lookupGE "c" (fromList ["bb","cc"])
Just (1,"cc")
>>>
lookupGE "cc" (fromList ["bb","cc"])
Just (1,"cc")
>>>
lookupGE "z" (fromList ["bb","cc"])
Nothing
(!?) :: TextSet -> Int -> Maybe Key Source #
\(\mathcal{O}(1)\). Retrieve \(i\)-th element in the sorted sequence of elements.
>>>
fromList ["Hey","","Jude"] !? 0
Just ""
>>>
fromList ["Hey","","Jude"] !? 1
Just "Hey"
>>>
fromList ["Hey","","Jude"] !? 3
Nothing
See also lookupIndex
.
lookupIndex :: Key -> TextSet -> Maybe Int Source #
\(\mathcal{O}(\log n)\). Look up element in set and report its zero-based index in the sorted sequence elements.
>>>
lookupIndex "" (fromList ["Hey","","Jude"])
Just 0
>>>
lookupIndex "Hey" (fromList ["Hey","","Jude"])
Just 1
>>>
lookupIndex "Law" (fromList ["Hey","","Jude"])
Nothing
See also !?
.
Construction
singleton :: Key -> TextSet Source #
\(\mathcal{O}(1)\). Construct set containing one element.
>>>
toList (singleton "alone")
["alone"]
fromList :: [Key] -> TextSet Source #
\(\mathcal{O}(n \log n)\). Construct set from list of elements.
>>>
toList (fromList ["Hey","Jude","Hey","Law","Hey",""])
["","Hey","Jude","Law"]
fromDistinctAscList :: [Key] -> TextSet Source #
\(\mathcal{O}(n)\). Construct set from list of distinct elements in ascending order.
NOTE: If the input list is not strictly ascending, an error
is thrown.
Deconstruction
toList :: TextSet -> [Key] Source #
\(\mathcal{O}(n)\). Convert TextSet
to list of ShortText
in ascending order.