Copyright | (c) The University of Glasgow 2001 (c) David Roundy 2003-2005 (c) Simon Marlow 2005 (c) Bjorn Bringert 2006 (c) Don Stewart 2005-2008 (c) Duncan Coutts 2006-2013 |
---|---|
License | BSD-style |
Maintainer | dons00@gmail.com, duncan@community.haskell.org |
Stability | stable |
Portability | portable |
Safe Haskell | Trustworthy |
Language | Haskell2010 |
A time- and space-efficient implementation of byte vectors using
packed Word8 arrays, suitable for high performance use, both in terms
of large data quantities and high speed requirements. Byte vectors
are encoded as strict Word8
arrays of bytes, held in a ForeignPtr
,
and can be passed between C and Haskell with little effort.
The recomended way to assemble ByteStrings from smaller parts is to use the builder monoid from Data.ByteString.Builder.
This module is intended to be imported qualified
, to avoid name
clashes with Prelude functions. eg.
import qualified Data.ByteString as B
Original GHC implementation by Bryan O'Sullivan.
Rewritten to use UArray
by Simon Marlow.
Rewritten to support slices and use ForeignPtr
by David Roundy.
Rewritten again and extended by Don Stewart and Duncan Coutts.
Synopsis
- data ByteString
- type StrictByteString = ByteString
- empty :: ByteString
- singleton :: Word8 -> ByteString
- pack :: [Word8] -> ByteString
- unpack :: ByteString -> [Word8]
- fromStrict :: StrictByteString -> LazyByteString
- toStrict :: LazyByteString -> StrictByteString
- fromFilePath :: FilePath -> IO ByteString
- toFilePath :: ByteString -> IO FilePath
- cons :: Word8 -> ByteString -> ByteString
- snoc :: ByteString -> Word8 -> ByteString
- append :: ByteString -> ByteString -> ByteString
- head :: HasCallStack => ByteString -> Word8
- uncons :: ByteString -> Maybe (Word8, ByteString)
- unsnoc :: ByteString -> Maybe (ByteString, Word8)
- last :: HasCallStack => ByteString -> Word8
- tail :: HasCallStack => ByteString -> ByteString
- init :: HasCallStack => ByteString -> ByteString
- null :: ByteString -> Bool
- length :: ByteString -> Int
- map :: (Word8 -> Word8) -> ByteString -> ByteString
- reverse :: ByteString -> ByteString
- intersperse :: Word8 -> ByteString -> ByteString
- intercalate :: ByteString -> [ByteString] -> ByteString
- transpose :: [ByteString] -> [ByteString]
- foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
- foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
- foldl1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8
- foldl1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8
- foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
- foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
- foldr1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8
- foldr1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8
- concat :: [ByteString] -> ByteString
- concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
- any :: (Word8 -> Bool) -> ByteString -> Bool
- all :: (Word8 -> Bool) -> ByteString -> Bool
- maximum :: HasCallStack => ByteString -> Word8
- minimum :: HasCallStack => ByteString -> Word8
- scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
- scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
- scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
- scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
- mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
- mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
- replicate :: Int -> Word8 -> ByteString
- unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
- unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
- take :: Int -> ByteString -> ByteString
- takeEnd :: Int -> ByteString -> ByteString
- drop :: Int -> ByteString -> ByteString
- dropEnd :: Int -> ByteString -> ByteString
- splitAt :: Int -> ByteString -> (ByteString, ByteString)
- takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
- takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
- dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
- dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
- span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
- spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
- break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
- breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
- group :: ByteString -> [ByteString]
- groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
- inits :: ByteString -> [ByteString]
- tails :: ByteString -> [ByteString]
- initsNE :: ByteString -> NonEmpty ByteString
- tailsNE :: ByteString -> NonEmpty ByteString
- stripPrefix :: ByteString -> ByteString -> Maybe ByteString
- stripSuffix :: ByteString -> ByteString -> Maybe ByteString
- split :: Word8 -> ByteString -> [ByteString]
- splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
- isPrefixOf :: ByteString -> ByteString -> Bool
- isSuffixOf :: ByteString -> ByteString -> Bool
- isInfixOf :: ByteString -> ByteString -> Bool
- isValidUtf8 :: ByteString -> Bool
- breakSubstring :: ByteString -> ByteString -> (ByteString, ByteString)
- elem :: Word8 -> ByteString -> Bool
- notElem :: Word8 -> ByteString -> Bool
- find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
- filter :: (Word8 -> Bool) -> ByteString -> ByteString
- partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
- index :: HasCallStack => ByteString -> Int -> Word8
- indexMaybe :: ByteString -> Int -> Maybe Word8
- (!?) :: ByteString -> Int -> Maybe Word8
- elemIndex :: Word8 -> ByteString -> Maybe Int
- elemIndices :: Word8 -> ByteString -> [Int]
- elemIndexEnd :: Word8 -> ByteString -> Maybe Int
- findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
- findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
- findIndexEnd :: (Word8 -> Bool) -> ByteString -> Maybe Int
- count :: Word8 -> ByteString -> Int
- zip :: ByteString -> ByteString -> [(Word8, Word8)]
- zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
- packZipWith :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
- unzip :: [(Word8, Word8)] -> (ByteString, ByteString)
- sort :: ByteString -> ByteString
- copy :: ByteString -> ByteString
- packCString :: CString -> IO ByteString
- packCStringLen :: CStringLen -> IO ByteString
- useAsCString :: ByteString -> (CString -> IO a) -> IO a
- useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
- getLine :: IO ByteString
- getContents :: IO ByteString
- putStr :: ByteString -> IO ()
- interact :: (ByteString -> ByteString) -> IO ()
- readFile :: FilePath -> IO ByteString
- writeFile :: FilePath -> ByteString -> IO ()
- appendFile :: FilePath -> ByteString -> IO ()
- hGetLine :: Handle -> IO ByteString
- hGetContents :: Handle -> IO ByteString
- hGet :: Handle -> Int -> IO ByteString
- hGetSome :: Handle -> Int -> IO ByteString
- hGetNonBlocking :: Handle -> Int -> IO ByteString
- hPut :: Handle -> ByteString -> IO ()
- hPutNonBlocking :: Handle -> ByteString -> IO ByteString
- hPutStr :: Handle -> ByteString -> IO ()
Strict ByteString
data ByteString Source #
A space-efficient representation of a Word8
vector, supporting many
efficient operations.
A ByteString
contains 8-bit bytes, or by using the operations from
Data.ByteString.Char8 it can be interpreted as containing 8-bit
characters.
Instances
type StrictByteString = ByteString Source #
Type synonym for the strict flavour of ByteString
.
Since: 0.11.2.0
Heap fragmentation
With GHC, the ByteString
representation uses pinned memory,
meaning it cannot be moved by GC. While this is ideal for use with
the foreign function interface and is usually efficient, this
representation may lead to issues with heap fragmentation and wasted
space if the program selectively retains a fraction of many small
ByteString
s, keeping them live in memory over long durations.
While ByteString
is indispensable when working with large blobs of
data and especially when interfacing with native C libraries, be sure
to also check the ShortByteString
type.
As a type backed by unpinned memory, ShortByteString
behaves
similarly to Text
(from the text
package) on the heap, completely
avoids fragmentation issues, and in many use-cases may better suit
your bytestring-storage needs.
Introducing and eliminating ByteString
s
empty :: ByteString Source #
O(1) The empty ByteString
singleton :: Word8 -> ByteString Source #
O(1) Convert a Word8
into a ByteString
pack :: [Word8] -> ByteString Source #
O(n) Convert a [
into a Word8
]ByteString
.
For applications with large numbers of string literals, pack
can be a
bottleneck. In such cases, consider using unsafePackAddress
(GHC only).
unpack :: ByteString -> [Word8] Source #
O(n) Converts a ByteString
to a [
.Word8
]
fromStrict :: StrictByteString -> LazyByteString Source #
O(1) Convert a StrictByteString
into a LazyByteString
.
toStrict :: LazyByteString -> StrictByteString Source #
O(n) Convert a LazyByteString
into a StrictByteString
.
Note that this is an expensive operation that forces the whole
LazyByteString
into memory and then copies all the data. If possible, try to
avoid converting back and forth between strict and lazy bytestrings.
fromFilePath :: FilePath -> IO ByteString Source #
Convert a FilePath
to a ByteString
.
The FilePath
type is expected to use the file system encoding
as reported by getFileSystemEncoding
. This
encoding allows for round-tripping of arbitrary data on platforms
that allow arbitrary bytes in their paths. This conversion
function does the same thing that openFile
would
do when decoding the FilePath
.
This function is in IO
because the file system encoding can be
changed. If the encoding can be assumed to be constant in your
use case, you may invoke this function via unsafePerformIO
.
Since: 0.11.2.0
toFilePath :: ByteString -> IO FilePath Source #
Convert a ByteString
to a FilePath
.
This function uses the file system encoding, and resulting FilePath
s
can be safely used with standard IO functions and will reference the
correct path in the presence of arbitrary non-UTF-8 encoded paths.
This function is in IO
because the file system encoding can be
changed. If the encoding can be assumed to be constant in your
use case, you may invoke this function via unsafePerformIO
.
Since: 0.11.2.0
Basic interface
cons :: Word8 -> ByteString -> ByteString infixr 5 Source #
O(n) cons
is analogous to (:) for lists, but of different
complexity, as it requires making a copy.
snoc :: ByteString -> Word8 -> ByteString infixl 5 Source #
O(n) Append a byte to the end of a ByteString
append :: ByteString -> ByteString -> ByteString Source #
O(n) Append two ByteStrings
head :: HasCallStack => ByteString -> Word8 Source #
O(1) Extract the first element of a ByteString, which must be non-empty. An exception will be thrown in the case of an empty ByteString.
This is a partial function, consider using uncons
instead.
uncons :: ByteString -> Maybe (Word8, ByteString) Source #
unsnoc :: ByteString -> Maybe (ByteString, Word8) Source #
last :: HasCallStack => ByteString -> Word8 Source #
O(1) Extract the last element of a ByteString, which must be finite and non-empty. An exception will be thrown in the case of an empty ByteString.
This is a partial function, consider using unsnoc
instead.
tail :: HasCallStack => ByteString -> ByteString Source #
O(1) Extract the elements after the head of a ByteString, which must be non-empty. An exception will be thrown in the case of an empty ByteString.
This is a partial function, consider using uncons
instead.
init :: HasCallStack => ByteString -> ByteString Source #
O(1) Returns all the elements of a ByteString
except the last one.
An exception will be thrown in the case of an empty ByteString.
This is a partial function, consider using unsnoc
instead.
null :: ByteString -> Bool Source #
O(1) Test whether a ByteString is empty.
Transforming ByteStrings
map :: (Word8 -> Word8) -> ByteString -> ByteString Source #
O(n) map
f xs
is the ByteString obtained by applying f
to each
element of xs
.
reverse :: ByteString -> ByteString Source #
O(n) reverse
xs
efficiently returns the elements of xs
in reverse order.
intersperse :: Word8 -> ByteString -> ByteString Source #
O(n) The intersperse
function takes a Word8
and a
ByteString
and `intersperses' that byte between the elements of
the ByteString
. It is analogous to the intersperse function on
Lists.
intercalate :: ByteString -> [ByteString] -> ByteString Source #
O(n) The intercalate
function takes a ByteString
and a list of
ByteString
s and concatenates the list after interspersing the first
argument between each element of the list.
transpose :: [ByteString] -> [ByteString] Source #
The transpose
function transposes the rows and columns of its
ByteString
argument.
Reducing ByteString
s (folds)
foldl :: (a -> Word8 -> a) -> a -> ByteString -> a Source #
foldl
, applied to a binary operator, a starting value (typically
the left-identity of the operator), and a ByteString, reduces the
ByteString using the binary operator, from left to right.
foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a Source #
foldl1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8 Source #
foldl1
is a variant of foldl
that has no starting value
argument, and thus must be applied to non-empty ByteString
s.
An exception will be thrown in the case of an empty ByteString.
foldl1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8 Source #
foldr :: (Word8 -> a -> a) -> a -> ByteString -> a Source #
foldr
, applied to a binary operator, a starting value
(typically the right-identity of the operator), and a ByteString,
reduces the ByteString using the binary operator, from right to left.
foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a Source #
foldr1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8 Source #
foldr1
is a variant of foldr
that has no starting value argument,
and thus must be applied to non-empty ByteString
s
An exception will be thrown in the case of an empty ByteString.
foldr1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ByteString -> Word8 Source #
Special folds
concat :: [ByteString] -> ByteString Source #
O(n) Concatenate a list of ByteStrings.
concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString Source #
Map a function over a ByteString
and concatenate the results
any :: (Word8 -> Bool) -> ByteString -> Bool Source #
O(n) Applied to a predicate and a ByteString, any
determines if
any element of the ByteString
satisfies the predicate.
all :: (Word8 -> Bool) -> ByteString -> Bool Source #
O(n) Applied to a predicate and a ByteString
, all
determines
if all elements of the ByteString
satisfy the predicate.
maximum :: HasCallStack => ByteString -> Word8 Source #
O(n) maximum
returns the maximum value from a ByteString
An exception will be thrown in the case of an empty ByteString.
minimum :: HasCallStack => ByteString -> Word8 Source #
O(n) minimum
returns the minimum value from a ByteString
An exception will be thrown in the case of an empty ByteString.
Building ByteStrings
Scans
:: (Word8 -> Word8 -> Word8) | accumulator -> element -> new accumulator |
-> Word8 | starting value of accumulator |
-> ByteString | input of length n |
-> ByteString | output of length n+1 |
scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString Source #
:: (Word8 -> Word8 -> Word8) | element -> accumulator -> new accumulator |
-> Word8 | starting value of accumulator |
-> ByteString | input of length n |
-> ByteString | output of length n+1 |
scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString Source #
Accumulating maps
mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) Source #
mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) Source #
Generating and unfolding ByteStrings
replicate :: Int -> Word8 -> ByteString Source #
O(n) replicate
n x
is a ByteString of length n
with x
the value of every element. The following holds:
replicate w c = fst (unfoldrN w (\u -> Just (u,u)) c)
unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString Source #
O(n), where n is the length of the result. The unfoldr
function is analogous to the List 'unfoldr'. unfoldr
builds a
ByteString from a seed value. The function takes the element and
returns Nothing
if it is done producing the ByteString or returns
Just
(a,b)
, in which case, a
is the next byte in the string,
and b
is the seed value for further production.
Examples:
unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 == pack [0, 1, 2, 3, 4, 5]
unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) Source #
O(n) Like unfoldr
, unfoldrN
builds a ByteString from a seed
value. However, the length of the result is limited by the first
argument to unfoldrN
. This function is more efficient than unfoldr
when the maximum length of the result is known.
The following equation relates unfoldrN
and unfoldr
:
fst (unfoldrN n f s) == take n (unfoldr f s)
Substrings
Breaking strings
take :: Int -> ByteString -> ByteString Source #
takeEnd :: Int -> ByteString -> ByteString Source #
drop :: Int -> ByteString -> ByteString Source #
dropEnd :: Int -> ByteString -> ByteString Source #
splitAt :: Int -> ByteString -> (ByteString, ByteString) Source #
takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString Source #
Similar to takeWhile
,
returns the longest (possibly empty) prefix of elements
satisfying the predicate.
takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString Source #
Returns the longest (possibly empty) suffix of elements satisfying the predicate.
is equivalent to takeWhileEnd
p
.reverse
. takeWhile
p . reverse
Since: 0.10.12.0
dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString Source #
Similar to dropWhile
,
drops the longest (possibly empty) prefix of elements
satisfying the predicate and returns the remainder.
dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString Source #
Similar to dropWhileEnd
,
drops the longest (possibly empty) suffix of elements
satisfying the predicate and returns the remainder.
is equivalent to dropWhileEnd
p
.reverse
. dropWhile
p . reverse
Since: 0.10.12.0
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) Source #
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) Source #
Returns the longest (possibly empty) suffix of elements satisfying the predicate and the remainder of the string.
spanEnd
p
is equivalent to
and to breakEnd
(not . p)(
.takeWhileEnd
p &&& dropWhileEnd
p)
We have
spanEnd (not . isSpace) "x y z" == ("x y ", "z")
and
spanEnd (not . isSpace) ps == let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x)
break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) Source #
Similar to break
,
returns the longest (possibly empty) prefix of elements which do not
satisfy the predicate and the remainder of the string.
break
p
is equivalent to
and to span
(not . p)(
.takeWhile
(not . p) &&& dropWhile
(not . p))
Under GHC, a rewrite rule will transform break (==) into a call to the specialised breakByte:
break ((==) x) = breakByte x break (==x) = breakByte x
breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) Source #
Returns the longest (possibly empty) suffix of elements which do not satisfy the predicate and the remainder of the string.
breakEnd
p
is equivalent to
and to spanEnd
(not . p)(
.takeWhileEnd
(not . p) &&& dropWhileEnd
(not . p))
group :: ByteString -> [ByteString] Source #
The group
function takes a ByteString and returns a list of
ByteStrings such that the concatenation of the result is equal to the
argument. Moreover, each string in the result contains only equal
elements. For example,
group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
It is a special case of groupBy
, which allows the programmer to
supply their own equality test. It is about 40% faster than
groupBy (==)
groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] Source #
inits :: ByteString -> [ByteString] Source #
O(n) Returns all initial segments of the given ByteString
, shortest first.
tails :: ByteString -> [ByteString] Source #
O(n) Returns all final segments of the given ByteString
, longest first.
initsNE :: ByteString -> NonEmpty ByteString Source #
O(n) Returns all initial segments of the given ByteString
, shortest first.
Since: 0.11.4.0
tailsNE :: ByteString -> NonEmpty ByteString Source #
O(n) Returns all final segments of the given ByteString
, longest first.
Since: 0.11.4.0
stripPrefix :: ByteString -> ByteString -> Maybe ByteString Source #
O(n) The stripPrefix
function takes two ByteStrings and returns Just
the remainder of the second iff the first is its prefix, and otherwise
Nothing
.
Since: 0.10.8.0
stripSuffix :: ByteString -> ByteString -> Maybe ByteString Source #
O(n) The stripSuffix
function takes two ByteStrings and returns Just
the remainder of the second iff the first is its suffix, and otherwise
Nothing
.
Breaking into many substrings
split :: Word8 -> ByteString -> [ByteString] Source #
O(n) Break a ByteString
into pieces separated by the byte
argument, consuming the delimiter. I.e.
split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 split 120 "x" == ["",""] -- fromEnum 'x' == 120 split undefined "" == [] -- and not [""]
and
intercalate [c] . split c == id split == splitWith . (==)
As for all splitting functions in this library, this function does
not copy the substrings, it just constructs new ByteString
s that
are slices of the original.
splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] Source #
O(n) Splits a ByteString
into components delimited by
separators, where the predicate returns True for a separator element.
The resulting components do not contain the separators. Two adjacent
separators result in an empty component in the output. eg.
splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 splitWith undefined "" == [] -- and not [""]
Predicates
isPrefixOf :: ByteString -> ByteString -> Bool Source #
O(n) The isPrefixOf
function takes two ByteStrings and returns True
if the first is a prefix of the second.
isSuffixOf :: ByteString -> ByteString -> Bool Source #
O(n) The isSuffixOf
function takes two ByteStrings and returns True
iff the first is a suffix of the second.
The following holds:
isSuffixOf x y == reverse x `isPrefixOf` reverse y
However, the real implementation uses memcmp to compare the end of the string only, with no reverse required..
isInfixOf :: ByteString -> ByteString -> Bool Source #
Check whether one string is a substring of another.
Encoding validation
isValidUtf8 :: ByteString -> Bool Source #
O(n) Check whether a ByteString
represents valid UTF-8.
Since: 0.11.2.0
Search for arbitrary substrings
:: ByteString | String to search for |
-> ByteString | String to search in |
-> (ByteString, ByteString) | Head and tail of string broken at substring |
Break a string on a substring, returning a pair of the part of the string prior to the match, and the rest of the string.
The following relationships hold:
break (== c) l == breakSubstring (singleton c) l
For example, to tokenise a string, dropping delimiters:
tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) where (h,t) = breakSubstring x y
To skip to the first occurrence of a string:
snd (breakSubstring x y)
To take the parts of a string before a delimiter:
fst (breakSubstring x y)
Note that calling `breakSubstring x` does some preprocessing work, so you should avoid unnecessarily duplicating breakSubstring calls with the same pattern.
Searching ByteStrings
Searching by equality
elem :: Word8 -> ByteString -> Bool Source #
O(n) elem
is the ByteString
membership predicate.
Searching with a predicate
filter :: (Word8 -> Bool) -> ByteString -> ByteString Source #
O(n) filter
, applied to a predicate and a ByteString,
returns a ByteString containing those characters that satisfy the
predicate.
partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) Source #
O(n) The partition
function takes a predicate a ByteString and returns
the pair of ByteStrings with elements which do and do not satisfy the
predicate, respectively; i.e.,
partition p bs == (filter p xs, filter (not . p) xs)
Indexing ByteStrings
index :: HasCallStack => ByteString -> Int -> Word8 Source #
O(1) ByteString
index (subscript) operator, starting from 0.
This is a partial function, consider using indexMaybe
instead.
indexMaybe :: ByteString -> Int -> Maybe Word8 Source #
elemIndex :: Word8 -> ByteString -> Maybe Int Source #
O(n) The elemIndex
function returns the index of the first
element in the given ByteString
which is equal to the query
element, or Nothing
if there is no such element.
This implementation uses memchr(3).
elemIndices :: Word8 -> ByteString -> [Int] Source #
O(n) The elemIndices
function extends elemIndex
, by returning
the indices of all elements equal to the query element, in ascending order.
This implementation uses memchr(3).
elemIndexEnd :: Word8 -> ByteString -> Maybe Int Source #
O(n) The elemIndexEnd
function returns the last index of the
element in the given ByteString
which is equal to the query
element, or Nothing
if there is no such element. The following
holds:
elemIndexEnd c xs = case elemIndex c (reverse xs) of Nothing -> Nothing Just i -> Just (length xs - 1 - i)
findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int Source #
O(n) The findIndex
function takes a predicate and a ByteString
and
returns the index of the first element in the ByteString
satisfying the predicate.
findIndices :: (Word8 -> Bool) -> ByteString -> [Int] Source #
O(n) The findIndices
function extends findIndex
, by returning the
indices of all elements satisfying the predicate, in ascending order.
findIndexEnd :: (Word8 -> Bool) -> ByteString -> Maybe Int Source #
O(n) The findIndexEnd
function takes a predicate and a ByteString
and
returns the index of the last element in the ByteString
satisfying the predicate.
Since: 0.10.12.0
count :: Word8 -> ByteString -> Int Source #
count returns the number of times its argument appears in the ByteString
count = length . elemIndices
But more efficiently than using length on the intermediate list.
Zipping and unzipping ByteStrings
zip :: ByteString -> ByteString -> [(Word8, Word8)] Source #
zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] Source #
packZipWith :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString Source #
A specialised version of zipWith
for the common case of a
simultaneous map over two ByteStrings, to build a 3rd.
Since: 0.11.1.0
unzip :: [(Word8, Word8)] -> (ByteString, ByteString) Source #
Ordered ByteStrings
sort :: ByteString -> ByteString Source #
O(n) Sort a ByteString efficiently, using counting sort.
Low level conversions
Copying ByteStrings
copy :: ByteString -> ByteString Source #
O(n) Make a copy of the ByteString
with its own storage.
This is mainly useful to allow the rest of the data pointed
to by the ByteString
to be garbage collected, for example
if a large string has been read in, and only a small part of it
is needed in the rest of the program.
Packing CString
s and pointers
packCString :: CString -> IO ByteString Source #
O(n). Construct a new ByteString
from a CString
. The
resulting ByteString
is an immutable copy of the original
CString
, and is managed on the Haskell heap. The original
CString
must be null terminated.
packCStringLen :: CStringLen -> IO ByteString Source #
O(n). Construct a new ByteString
from a CStringLen
. The
resulting ByteString
is an immutable copy of the original CStringLen
.
The ByteString
is a normal Haskell value and will be managed on the
Haskell heap.
Using ByteStrings as CString
s
useAsCString :: ByteString -> (CString -> IO a) -> IO a Source #
O(n) construction Use a ByteString
with a function requiring a
null-terminated CString
. The CString
is a copy and will be freed
automatically; it must not be stored or used after the
subcomputation finishes.
useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a Source #
O(n) construction Use a ByteString
with a function requiring a CStringLen
.
As for useAsCString
this function makes a copy of the original ByteString
.
It must not be stored or used after the subcomputation finishes.
Beware that this function is not required to add a terminating NUL
byte at the end of the CStringLen
it provides.
If you need to construct a pointer to a null-terminated sequence, use useAsCString
(and measure length independently if desired).
I/O with ByteString
s
Standard input and output
getLine :: IO ByteString Source #
Deprecated: Deprecated since bytestring-0.12
. Use getLine
instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)
Read a line from stdin.
getContents :: IO ByteString Source #
getContents. Read stdin strictly. Equivalent to hGetContents stdin
The Handle
is closed after the contents have been read.
interact :: (ByteString -> ByteString) -> IO () Source #
The interact function takes a function of type ByteString -> ByteString
as its argument. The entire input from the standard input device is passed
to this function as its argument, and the resulting string is output on the
standard output device.
Files
readFile :: FilePath -> IO ByteString Source #
Read an entire file strictly into a ByteString
.
writeFile :: FilePath -> ByteString -> IO () Source #
Write a ByteString
to a file.
appendFile :: FilePath -> ByteString -> IO () Source #
Append a ByteString
to a file.
I/O with Handles
hGetLine :: Handle -> IO ByteString Source #
Deprecated: Deprecated since bytestring-0.12
. Use hGetLine
instead. (Functions that rely on ASCII encodings belong in Data.ByteString.Char8)
Read a line from a handle
hGetContents :: Handle -> IO ByteString Source #
Read a handle's entire contents strictly into a ByteString
.
This function reads chunks at a time, increasing the chunk size on each
read. The final string is then reallocated to the appropriate size. For
files > half of available memory, this may lead to memory exhaustion.
Consider using readFile
in this case.
The Handle is closed once the contents have been read, or if an exception is thrown.
hGet :: Handle -> Int -> IO ByteString Source #
Read a ByteString
directly from the specified Handle
. This
is far more efficient than reading the characters into a String
and then using pack
. First argument is the Handle to read from,
and the second is the number of bytes to read. It returns the bytes
read, up to n, or empty
if EOF has been reached.
hGet
is implemented in terms of hGetBuf
.
If the handle is a pipe or socket, and the writing end
is closed, hGet
will behave as if EOF was reached.
hGetSome :: Handle -> Int -> IO ByteString Source #
Like hGet
, except that a shorter ByteString
may be returned
if there are not enough bytes immediately available to satisfy the
whole request. hGetSome
only blocks if there is no data
available, and EOF has not yet been reached.
hGetNonBlocking :: Handle -> Int -> IO ByteString Source #
hGetNonBlocking is similar to hGet
, except that it will never block
waiting for data to become available, instead it returns only whatever data
is available. If there is no data available to be read, hGetNonBlocking
returns empty
.
Note: on Windows and with Haskell implementation other than GHC, this
function does not work correctly; it behaves identically to hGet
.
hPut :: Handle -> ByteString -> IO () Source #
Outputs a ByteString
to the specified Handle
.
hPutNonBlocking :: Handle -> ByteString -> IO ByteString Source #
Similar to hPut
except that it will never block. Instead it returns
any tail that did not get written. This tail may be empty
in the case that
the whole string was written, or the whole original string if nothing was
written. Partial writes are also possible.
Note: on Windows and with Haskell implementation other than GHC, this
function does not work correctly; it behaves identically to hPut
.