Copyright | (c) Duncan Coutts 2012-2013 Julian Ospald 2022 |
---|---|
License | BSD-style |
Maintainer | hasufell@posteo.de |
Stability | stable |
Portability | ghc only |
Safe Haskell | Trustworthy |
Language | Haskell2010 |
A compact representation suitable for storing short byte strings in memory.
In typical use cases it can be imported alongside Data.ByteString, e.g.
import qualified Data.ByteString as B import qualified Data.ByteString.Short as B (ShortByteString, toShort, fromShort)
Other ShortByteString
operations clash with Data.ByteString or Prelude
functions however, so they should be imported qualified
with a different
alias e.g.
import qualified Data.ByteString.Short as B.Short
Synopsis
- data ShortByteString = SBS ByteArray#
- empty :: ShortByteString
- singleton :: Word8 -> ShortByteString
- pack :: [Word8] -> ShortByteString
- unpack :: ShortByteString -> [Word8]
- fromShort :: ShortByteString -> ByteString
- toShort :: ByteString -> ShortByteString
- snoc :: ShortByteString -> Word8 -> ShortByteString
- cons :: Word8 -> ShortByteString -> ShortByteString
- append :: ShortByteString -> ShortByteString -> ShortByteString
- last :: HasCallStack => ShortByteString -> Word8
- tail :: HasCallStack => ShortByteString -> ShortByteString
- uncons :: ShortByteString -> Maybe (Word8, ShortByteString)
- head :: HasCallStack => ShortByteString -> Word8
- init :: HasCallStack => ShortByteString -> ShortByteString
- unsnoc :: ShortByteString -> Maybe (ShortByteString, Word8)
- null :: ShortByteString -> Bool
- length :: ShortByteString -> Int
- isValidUtf8 :: ShortByteString -> Bool
- map :: (Word8 -> Word8) -> ShortByteString -> ShortByteString
- reverse :: ShortByteString -> ShortByteString
- intercalate :: ShortByteString -> [ShortByteString] -> ShortByteString
- foldl :: (a -> Word8 -> a) -> a -> ShortByteString -> a
- foldl' :: (a -> Word8 -> a) -> a -> ShortByteString -> a
- foldl1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
- foldl1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
- foldr :: (Word8 -> a -> a) -> a -> ShortByteString -> a
- foldr' :: (Word8 -> a -> a) -> a -> ShortByteString -> a
- foldr1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
- foldr1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
- all :: (Word8 -> Bool) -> ShortByteString -> Bool
- any :: (Word8 -> Bool) -> ShortByteString -> Bool
- concat :: [ShortByteString] -> ShortByteString
- replicate :: Int -> Word8 -> ShortByteString
- unfoldr :: (a -> Maybe (Word8, a)) -> a -> ShortByteString
- unfoldrN :: forall a. Int -> (a -> Maybe (Word8, a)) -> a -> (ShortByteString, Maybe a)
- take :: Int -> ShortByteString -> ShortByteString
- takeEnd :: Int -> ShortByteString -> ShortByteString
- takeWhileEnd :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
- takeWhile :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
- drop :: Int -> ShortByteString -> ShortByteString
- dropEnd :: Int -> ShortByteString -> ShortByteString
- dropWhile :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
- dropWhileEnd :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
- breakEnd :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
- break :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
- span :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
- spanEnd :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
- splitAt :: Int -> ShortByteString -> (ShortByteString, ShortByteString)
- split :: Word8 -> ShortByteString -> [ShortByteString]
- splitWith :: (Word8 -> Bool) -> ShortByteString -> [ShortByteString]
- stripSuffix :: ShortByteString -> ShortByteString -> Maybe ShortByteString
- stripPrefix :: ShortByteString -> ShortByteString -> Maybe ShortByteString
- isInfixOf :: ShortByteString -> ShortByteString -> Bool
- isPrefixOf :: ShortByteString -> ShortByteString -> Bool
- isSuffixOf :: ShortByteString -> ShortByteString -> Bool
- breakSubstring :: ShortByteString -> ShortByteString -> (ShortByteString, ShortByteString)
- elem :: Word8 -> ShortByteString -> Bool
- find :: (Word8 -> Bool) -> ShortByteString -> Maybe Word8
- filter :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
- partition :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
- index :: HasCallStack => ShortByteString -> Int -> Word8
- indexMaybe :: ShortByteString -> Int -> Maybe Word8
- (!?) :: ShortByteString -> Int -> Maybe Word8
- elemIndex :: Word8 -> ShortByteString -> Maybe Int
- elemIndices :: Word8 -> ShortByteString -> [Int]
- count :: Word8 -> ShortByteString -> Int
- findIndex :: (Word8 -> Bool) -> ShortByteString -> Maybe Int
- findIndices :: (Word8 -> Bool) -> ShortByteString -> [Int]
- packCString :: CString -> IO ShortByteString
- packCStringLen :: CStringLen -> IO ShortByteString
- useAsCString :: ShortByteString -> (CString -> IO a) -> IO a
- useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a
The ShortByteString
type
data ShortByteString Source #
A compact representation of a Word8
vector.
It has a lower memory overhead than a ByteString
and does not
contribute to heap fragmentation. It can be converted to or from a
ByteString
(at the cost of copying the string data). It supports very few
other operations.
It is suitable for use as an internal representation for code that needs
to keep many short strings in memory, but it should not be used as an
interchange type. That is, it should not generally be used in public APIs.
The ByteString
type is usually more suitable for use in interfaces; it is
more flexible and it supports a wide range of operations.
Instances
Memory overhead
With GHC, the memory overheads are as follows, expressed in words and in bytes (words are 4 and 8 bytes on 32 or 64bit machines respectively).
ByteString
unshared: 8 words; 32 or 64 bytes.ByteString
shared substring: 4 words; 16 or 32 bytes.ShortByteString
: 4 words; 16 or 32 bytes.
For the string data itself, both ShortByteString
and ByteString
use
one byte per element, rounded up to the nearest word. For example,
including the overheads, a length 10 ShortByteString
would take
16 + 12 = 28
bytes on a 32bit platform and 32 + 16 = 48
bytes on a
64bit platform.
These overheads can all be reduced by 1 word (4 or 8 bytes) when the
ShortByteString
or ByteString
is unpacked into another constructor.
For example:
data ThingId = ThingId {-# UNPACK #-} !Int {-# UNPACK #-} !ShortByteString
This will take 1 + 1 + 3
words (the ThingId
constructor +
unpacked Int
+ unpacked ShortByteString
), plus the words for the
string data.
Heap fragmentation
With GHC, the ByteString
representation uses pinned memory,
meaning it cannot be moved by the GC. This is usually the right thing to
do for larger strings, but for small strings using pinned memory can
lead to heap fragmentation which wastes space. The ShortByteString
type (and the Text
type from the text
package) use unpinned memory
so they do not contribute to heap fragmentation. In addition, with GHC,
small unpinned strings are allocated in the same way as normal heap
allocations, rather than in a separate pinned area.
Introducing and eliminating ShortByteString
s
empty :: ShortByteString Source #
O(1). The empty ShortByteString
.
singleton :: Word8 -> ShortByteString Source #
O(1) Convert a Word8
into a ShortByteString
Since: 0.11.3.0
pack :: [Word8] -> ShortByteString Source #
O(n). Convert a list into a ShortByteString
unpack :: ShortByteString -> [Word8] Source #
O(n). Convert a ShortByteString
into a list.
fromShort :: ShortByteString -> ByteString Source #
O(n). Convert a ShortByteString
into a ByteString
.
toShort :: ByteString -> ShortByteString Source #
O(n). Convert a ByteString
into a ShortByteString
.
This makes a copy, so does not retain the input string.
Basic interface
snoc :: ShortByteString -> Word8 -> ShortByteString infixl 5 Source #
cons :: Word8 -> ShortByteString -> ShortByteString infixr 5 Source #
last :: HasCallStack => ShortByteString -> Word8 Source #
O(1) Extract the last element of a ShortByteString, which must be finite and non-empty. An exception will be thrown in the case of an empty ShortByteString.
This is a partial function, consider using unsnoc
instead.
Since: 0.11.3.0
tail :: HasCallStack => ShortByteString -> ShortByteString Source #
O(n) Extract the elements after the head of a ShortByteString, which must be non-empty. An exception will be thrown in the case of an empty ShortByteString.
This is a partial function, consider using uncons
instead.
Note: copies the entire byte array
Since: 0.11.3.0
uncons :: ShortByteString -> Maybe (Word8, ShortByteString) Source #
O(n) Extract the head and tail of a ByteString, returning Nothing if it is empty.
Since: 0.11.3.0
head :: HasCallStack => ShortByteString -> Word8 Source #
O(1) Extract the first element of a ShortByteString, which must be non-empty. An exception will be thrown in the case of an empty ShortByteString.
This is a partial function, consider using uncons
instead.
Since: 0.11.3.0
init :: HasCallStack => ShortByteString -> ShortByteString Source #
O(n) Return all the elements of a ShortByteString
except the last one.
An exception will be thrown in the case of an empty ShortByteString.
This is a partial function, consider using unsnoc
instead.
Note: copies the entire byte array
Since: 0.11.3.0
unsnoc :: ShortByteString -> Maybe (ShortByteString, Word8) Source #
null :: ShortByteString -> Bool Source #
O(1) Test whether a ShortByteString
is empty.
length :: ShortByteString -> Int Source #
O(1) The length of a ShortByteString
.
Encoding validation
isValidUtf8 :: ShortByteString -> Bool Source #
O(n) Check whether a ShortByteString
represents valid UTF-8.
Since: 0.11.3.0
Transforming ShortByteStrings
map :: (Word8 -> Word8) -> ShortByteString -> ShortByteString Source #
O(n) map
f xs
is the ShortByteString obtained by applying f
to each
element of xs
.
Since: 0.11.3.0
reverse :: ShortByteString -> ShortByteString Source #
O(n) reverse
xs
efficiently returns the elements of xs
in reverse order.
Since: 0.11.3.0
intercalate :: ShortByteString -> [ShortByteString] -> ShortByteString Source #
O(n) The intercalate
function takes a ShortByteString
and a list of
ShortByteString
s and concatenates the list after interspersing the first
argument between each element of the list.
Since: 0.11.3.0
Reducing ShortByteString
s (folds)
foldl :: (a -> Word8 -> a) -> a -> ShortByteString -> a Source #
foldl
, applied to a binary operator, a starting value (typically
the left-identity of the operator), and a ShortByteString, reduces the
ShortByteString using the binary operator, from left to right.
Since: 0.11.3.0
foldl' :: (a -> Word8 -> a) -> a -> ShortByteString -> a Source #
foldl1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8 Source #
foldl1
is a variant of foldl
that has no starting value
argument, and thus must be applied to non-empty ShortByteString
s.
An exception will be thrown in the case of an empty ShortByteString.
Since: 0.11.3.0
foldl1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8 Source #
foldr :: (Word8 -> a -> a) -> a -> ShortByteString -> a Source #
foldr
, applied to a binary operator, a starting value
(typically the right-identity of the operator), and a ShortByteString,
reduces the ShortByteString using the binary operator, from right to left.
Since: 0.11.3.0
foldr' :: (Word8 -> a -> a) -> a -> ShortByteString -> a Source #
foldr1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8 Source #
foldr1
is a variant of foldr
that has no starting value argument,
and thus must be applied to non-empty ShortByteString
s
An exception will be thrown in the case of an empty ShortByteString.
Since: 0.11.3.0
foldr1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8 Source #
Special folds
all :: (Word8 -> Bool) -> ShortByteString -> Bool Source #
O(n) Applied to a predicate and a ShortByteString
, all
determines
if all elements of the ShortByteString
satisfy the predicate.
Since: 0.11.3.0
any :: (Word8 -> Bool) -> ShortByteString -> Bool Source #
O(n) Applied to a predicate and a ByteString, any
determines if
any element of the ByteString
satisfies the predicate.
Since: 0.11.3.0
concat :: [ShortByteString] -> ShortByteString Source #
Generating and unfolding ByteStrings
replicate :: Int -> Word8 -> ShortByteString Source #
O(n) replicate
n x
is a ByteString of length n
with x
the value of every element. The following holds:
replicate w c = unfoldr w (\u -> Just (u,u)) c
Since: 0.11.3.0
unfoldr :: (a -> Maybe (Word8, a)) -> a -> ShortByteString Source #
O(n), where n is the length of the result. The unfoldr
function is analogous to the List 'unfoldr'. unfoldr
builds a
ShortByteString from a seed value. The function takes the element and
returns Nothing
if it is done producing the ShortByteString or returns
Just
(a,b)
, in which case, a
is the next byte in the string,
and b
is the seed value for further production.
This function is not efficient/safe. It will build a list of [Word8]
and run the generator until it returns Nothing
, otherwise recurse infinitely,
then finally create a ShortByteString
.
If you know the maximum length, consider using unfoldrN
.
Examples:
unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 == pack [0, 1, 2, 3, 4, 5]
Since: 0.11.3.0
unfoldrN :: forall a. Int -> (a -> Maybe (Word8, a)) -> a -> (ShortByteString, Maybe a) Source #
O(n) Like unfoldr
, unfoldrN
builds a ShortByteString from a seed
value. However, the length of the result is limited by the first
argument to unfoldrN
. This function is more efficient than unfoldr
when the maximum length of the result is known.
The following equation relates unfoldrN
and unfoldr
:
fst (unfoldrN n f s) == take n (unfoldr f s)
Since: 0.11.3.0
Substrings
Breaking strings
take :: Int -> ShortByteString -> ShortByteString Source #
takeEnd :: Int -> ShortByteString -> ShortByteString Source #
takeWhileEnd :: (Word8 -> Bool) -> ShortByteString -> ShortByteString Source #
Returns the longest (possibly empty) suffix of elements satisfying the predicate.
is equivalent to takeWhileEnd
p
.reverse
. takeWhile
p . reverse
Since: 0.11.3.0
takeWhile :: (Word8 -> Bool) -> ShortByteString -> ShortByteString Source #
Similar to takeWhile
,
returns the longest (possibly empty) prefix of elements
satisfying the predicate.
Since: 0.11.3.0
drop :: Int -> ShortByteString -> ShortByteString Source #
dropEnd :: Int -> ShortByteString -> ShortByteString Source #
dropWhile :: (Word8 -> Bool) -> ShortByteString -> ShortByteString Source #
Similar to dropWhile
,
drops the longest (possibly empty) prefix of elements
satisfying the predicate and returns the remainder.
Note: copies the entire byte array
Since: 0.11.3.0
dropWhileEnd :: (Word8 -> Bool) -> ShortByteString -> ShortByteString Source #
Similar to dropWhileEnd
,
drops the longest (possibly empty) suffix of elements
satisfying the predicate and returns the remainder.
is equivalent to dropWhileEnd
p
.reverse
. dropWhile
p . reverse
Since: 0.11.3.0
breakEnd :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) Source #
Returns the longest (possibly empty) suffix of elements which do not satisfy the predicate and the remainder of the string.
breakEnd
p
is equivalent to
and to spanEnd
(not . p)(
.takeWhileEnd
(not . p) &&& dropWhileEnd
(not . p))
Since: 0.11.3.0
break :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) Source #
span :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) Source #
spanEnd :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) Source #
Returns the longest (possibly empty) suffix of elements satisfying the predicate and the remainder of the string.
spanEnd
p
is equivalent to
and to breakEnd
(not . p)(
.takeWhileEnd
p &&& dropWhileEnd
p)
We have
spanEnd (not . isSpace) "x y z" == ("x y ", "z")
and
spanEnd (not . isSpace) sbs == let (x, y) = span (not . isSpace) (reverse sbs) in (reverse y, reverse x)
Since: 0.11.3.0
splitAt :: Int -> ShortByteString -> (ShortByteString, ShortByteString) Source #
split :: Word8 -> ShortByteString -> [ShortByteString] Source #
O(n) Break a ShortByteString
into pieces separated by the byte
argument, consuming the delimiter. I.e.
split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 split 120 "x" == ["",""] -- fromEnum 'x' == 120 split undefined "" == [] -- and not [""]
and
intercalate [c] . split c == id split == splitWith . (==)
Note: copies the substrings
Since: 0.11.3.0
splitWith :: (Word8 -> Bool) -> ShortByteString -> [ShortByteString] Source #
O(n) Splits a ShortByteString
into components delimited by
separators, where the predicate returns True for a separator element.
The resulting components do not contain the separators. Two adjacent
separators result in an empty component in the output. eg.
splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 splitWith undefined "" == [] -- and not [""]
Since: 0.11.3.0
stripSuffix :: ShortByteString -> ShortByteString -> Maybe ShortByteString Source #
O(n) The stripSuffix
function takes two ShortByteStrings and returns Just
the remainder of the second iff the first is its suffix, and otherwise
Nothing
.
Since: 0.11.3.0
stripPrefix :: ShortByteString -> ShortByteString -> Maybe ShortByteString Source #
O(n) The stripPrefix
function takes two ShortByteStrings and returns Just
the remainder of the second iff the first is its prefix, and otherwise
Nothing
.
Since: 0.11.3.0
Predicates
isInfixOf :: ShortByteString -> ShortByteString -> Bool Source #
Check whether one string is a substring of another.
Since: 0.11.3.0
isPrefixOf :: ShortByteString -> ShortByteString -> Bool Source #
O(n) The isPrefixOf
function takes two ShortByteStrings and returns True
Since: 0.11.3.0
isSuffixOf :: ShortByteString -> ShortByteString -> Bool Source #
O(n) The isSuffixOf
function takes two ShortByteStrings and returns True
iff the first is a suffix of the second.
The following holds:
isSuffixOf x y == reverse x `isPrefixOf` reverse y
Since: 0.11.3.0
Search for arbitrary substrings
:: ShortByteString | String to search for |
-> ShortByteString | String to search in |
-> (ShortByteString, ShortByteString) | Head and tail of string broken at substring |
Break a string on a substring, returning a pair of the part of the string prior to the match, and the rest of the string.
The following relationships hold:
break (== c) l == breakSubstring (singleton c) l
For example, to tokenise a string, dropping delimiters:
tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) where (h,t) = breakSubstring x y
To skip to the first occurence of a string:
snd (breakSubstring x y)
To take the parts of a string before a delimiter:
fst (breakSubstring x y)
Note that calling `breakSubstring x` does some preprocessing work, so you should avoid unnecessarily duplicating breakSubstring calls with the same pattern.
Since: 0.11.3.0
Searching ShortByteStrings
Searching by equality
elem :: Word8 -> ShortByteString -> Bool Source #
O(n) elem
is the ShortByteString
membership predicate.
Since: 0.11.3.0
Searching with a predicate
filter :: (Word8 -> Bool) -> ShortByteString -> ShortByteString Source #
O(n) filter
, applied to a predicate and a ByteString,
returns a ByteString containing those characters that satisfy the
predicate.
Since: 0.11.3.0
partition :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) Source #
O(n) The partition
function takes a predicate a ByteString and returns
the pair of ByteStrings with elements which do and do not satisfy the
predicate, respectively; i.e.,
partition p bs == (filter p sbs, filter (not . p) sbs)
Since: 0.11.3.0
Indexing ShortByteStrings
index :: HasCallStack => ShortByteString -> Int -> Word8 Source #
O(1) ShortByteString
index (subscript) operator, starting from 0.
indexMaybe :: ShortByteString -> Int -> Maybe Word8 Source #
elemIndex :: Word8 -> ShortByteString -> Maybe Int Source #
O(n) The elemIndex
function returns the index of the first
element in the given ShortByteString
which is equal to the query
element, or Nothing
if there is no such element.
Since: 0.11.3.0
elemIndices :: Word8 -> ShortByteString -> [Int] Source #
O(n) The elemIndices
function extends elemIndex
, by returning
the indices of all elements equal to the query element, in ascending order.
Since: 0.11.3.0
count :: Word8 -> ShortByteString -> Int Source #
count returns the number of times its argument appears in the ShortByteString
Since: 0.11.3.0
findIndex :: (Word8 -> Bool) -> ShortByteString -> Maybe Int Source #
O(n) The findIndex
function takes a predicate and a ShortByteString
and
returns the index of the first element in the ByteString
satisfying the predicate.
Since: 0.11.3.0
findIndices :: (Word8 -> Bool) -> ShortByteString -> [Int] Source #
O(n) The findIndices
function extends findIndex
, by returning the
indices of all elements satisfying the predicate, in ascending order.
Since: 0.11.3.0
Low level conversions
Packing CString
s and pointers
packCString :: CString -> IO ShortByteString Source #
O(n). Construct a new ShortByteString
from a CString
. The
resulting ShortByteString
is an immutable copy of the original
CString
, and is managed on the Haskell heap. The original
CString
must be null terminated.
Since: 0.10.10.0
packCStringLen :: CStringLen -> IO ShortByteString Source #
O(n). Construct a new ShortByteString
from a CStringLen
. The
resulting ShortByteString
is an immutable copy of the original CStringLen
.
The ShortByteString
is a normal Haskell value and will be managed on the
Haskell heap.
Since: 0.10.10.0
Using ShortByteStrings as CString
s
useAsCString :: ShortByteString -> (CString -> IO a) -> IO a Source #
O(n) construction. Use a ShortByteString
with a function requiring a
null-terminated CString
. The CString
is a copy and will be freed
automatically; it must not be stored or used after the
subcomputation finishes.
Since: 0.10.10.0
useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a Source #
O(n) construction. Use a ShortByteString
with a function requiring a CStringLen
.
As for useAsCString
this function makes a copy of the original ShortByteString
.
It must not be stored or used after the subcomputation finishes.
Since: 0.10.10.0