Copyright | (c) Iavor S. Diatchki 2009 |
---|---|
License | BSD3-style (see LICENSE) |
Maintainer | emertens@galois.com |
Stability | experimental |
Portability | portable |
Safe Haskell | Trustworthy |
Language | Haskell98 |
- data UTF8 string
- class (Num s, Ord s) => UTF8Bytes b s | b -> s
- fromString :: UTF8Bytes string index => String -> UTF8 string
- toString :: UTF8Bytes string index => UTF8 string -> String
- fromRep :: string -> UTF8 string
- toRep :: UTF8 string -> string
- replacement_char :: Char
- uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string)
- splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
- take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
- drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
- span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
- break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
- foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a
- foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a
- length :: UTF8Bytes string index => UTF8 string -> index
- lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
- lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
- null :: UTF8Bytes string index => UTF8 string -> Bool
- decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index)
- byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
- byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
- byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
Representation
The type of strings that are represented using the UTF8 encoding. The parameter is the type of the container for the representation.
fromString :: UTF8Bytes string index => String -> UTF8 string Source
Converts a Haskell string into a UTF8 encoded string. Complexity: linear.
toString :: UTF8Bytes string index => UTF8 string -> String Source
Convert a UTF8 encoded string into a Haskell string.
Invalid characters are replaced by replacement_char
.
Complexity: linear.
replacement_char :: Char Source
This character is used to mark errors in a UTF8 encoded string.
Character based operations
uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string) Source
Get the first character of a byte string, if any.
Invalid characters are replaced by replacement_char
.
splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) Source
Split after a given number of characters.
Negative values are treated as if they are 0.
See also bytesSplitAt
.
take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string Source
take n s
returns the first n
characters of s
.
If s
has less than n
characters, then we return the whole of s
.
drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string Source
drop n s
returns the s
without its first n
characters.
If s
has less than n
characters, then we return an empty string.
span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) Source
Split a string into two parts: the first is the longest prefix that contains only characters that satisfy the predicate; the second part is the rest of the string. Invalid characters are passed as '\0xFFFD' to the predicate.
break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) Source
Split a string into two parts: the first is the longest prefix
that contains only characters that do not satisfy the predicate; the second
part is the rest of the string.
Invalid characters are passed as replacement_char
to the predicate.
foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a Source
Traverse a bytestring (left biased). This function is strict in the accumulator.
foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a Source
Traverse a bytestring (right biased).
length :: UTF8Bytes string index => UTF8 string -> index Source
Counts the number of characters encoded in the bytestring. Note that this includes replacement characters. The function is linear in the number of bytes in the representation.
lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string] Source
Split a string into a list of lines. Lines are terminated by '\n' or the end of the string. Empty lines may not be terminated by the end of the string. See also 'lines\''.
lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string] Source
Split a string into a list of lines.
Lines are terminated by '\n' or the end of the string.
Empty lines may not be terminated by the end of the string.
This function preserves the terminators.
See also lines
.
Representation based operations
null :: UTF8Bytes string index => UTF8 string -> Bool Source
Checks if there are no more bytes in the underlying representation.
decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index) Source
Extract the first character for the underlying representation,
if one is available. It also returns the number of bytes used
in the representation of the character.
See also uncons
, dropBytes
.
byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) Source
Split after a given number of bytes in the underlying representation.
See also splitAt
.