Safe Haskell | None |
---|---|
Language | Haskell2010 |
This is an internal module. You probably don't need to import this. Import Regex.Text instead.
WARNING
Definitions in this module allow violating invariants that would otherwise be guaranteed by non-internal modules. Use at your own risk!
Synopsis
- data TextToken = TextToken {}
- type REText = RE TextToken
- textTokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b
- token :: (Char -> Maybe a) -> REText a
- satisfy :: (Char -> Bool) -> REText Char
- char :: Char -> REText Char
- charIgnoreCase :: Char -> REText Char
- anyChar :: REText Char
- oneOf :: CharSet -> REText Char
- text :: Text -> REText Text
- textIgnoreCase :: Text -> REText Text
- manyText :: REText Text
- someText :: REText Text
- manyTextMin :: REText Text
- someTextMin :: REText Text
- manyTextOf :: CharSet -> REText Text
- someTextOf :: CharSet -> REText Text
- manyTextOfMin :: CharSet -> REText Text
- someTextOfMin :: CharSet -> REText Text
- naturalDec :: REText Natural
- integerDec :: REText a -> REText Integer
- naturalHex :: REText Natural
- integerHex :: REText a -> REText Integer
- wordRangeDec :: (Word, Word) -> REText Word
- intRangeDec :: REText a -> (Int, Int) -> REText Int
- wordRangeHex :: (Word, Word) -> REText Word
- intRangeHex :: REText a -> (Int, Int) -> REText Int
- wordDecN :: Int -> REText Word
- wordHexN :: Int -> REText Word
- toMatch :: REText a -> REText Text
- withMatch :: REText a -> REText (Text, a)
- reParse :: REText a -> Text -> Maybe a
- type ParserText = Parser TextToken
- parse :: ParserText a -> Text -> Maybe a
- parseSure :: ParserText a -> Text -> a
- find :: REText a -> Text -> Maybe a
- findAll :: REText a -> Text -> [a]
- splitOn :: REText a -> Text -> [Text]
- replace :: REText Text -> Text -> Maybe Text
- replaceAll :: REText Text -> Text -> Text
Documentation
The token type used for parsing Text
.
type REText = RE TextToken Source #
A type alias for convenience.
A function which accepts a RE c a
will accept a REText a
.
textTokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b Source #
token :: (Char -> Maybe a) -> REText a Source #
Parse a Char
into an a
if the given function returns Just
.
charIgnoreCase :: Char -> REText Char Source #
Parse the given Char
, ignoring case.
Comparisons are performed after applying simple case folding as described by the Unicode standard.
textIgnoreCase :: Text -> REText Text Source #
Parse the given Text
, ignoring case.
Comparisons are performed after applying simple case folding as described by the Unicode standard.
manyTextMin :: REText Text Source #
Parse any Text
. Minimal, i.e. biased towards matching less.
someTextMin :: REText Text Source #
Parse any non-empty Text
. Minimal, i.e. biased towards matching less.
manyTextOf :: CharSet -> REText Text Source #
Parse any Text
containing members of the CharSet
.
Biased towards matching more.
someTextOf :: CharSet -> REText Text Source #
Parse any non-empty Text
containing members of the CharSet
.
Biased towards matching more.
manyTextOfMin :: CharSet -> REText Text Source #
Parse any Text
containing members of the CharSet
.
Minimal, i.e. biased towards matching less.
someTextOfMin :: CharSet -> REText Text Source #
Parse any non-empty Text
containing members of the CharSet
.
Minimal, i.e. biased towards matching less.
naturalDec :: REText Natural Source #
Parse a decimal Natural
.
Leading zeros are not accepted. Biased towards matching more.
integerDec :: REText a -> REText Integer Source #
Parse a decimal Integer
. Parse an optional sign, '-'
or '+'
,
followed by the given RE
, followed by the absolute value of the integer.
Leading zeros are not accepted. Biased towards matching more.
naturalHex :: REText Natural Source #
Parse a hexadecimal Natural
. Both uppercase 'A'..'F'
and lowercase
'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
integerHex :: REText a -> REText Integer Source #
Parse a hexadecimal Integer
. Parse an optional sign, '-'
or '+'
,
followed by the given RE
, followed by the absolute value of the integer.
Both uppercase 'A'..'F'
and lowercase 'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
wordRangeDec :: (Word, Word) -> REText Word Source #
Parse a decimal Word
in the range [low..high]
.
Leading zeros are not accepted. Biased towards matching more.
intRangeDec :: REText a -> (Int, Int) -> REText Int Source #
Parse a decimal Int
in the range [low..high]
. Parse an optional sign,
'-'
or '+'
, followed by the given RE
, followed by the absolute
value of the integer.
Leading zeros are not accepted. Biased towards matching more.
wordRangeHex :: (Word, Word) -> REText Word Source #
Parse a hexadecimal Word
in the range [low..high]
. Both uppercase
'A'..'F'
and lowercase 'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
intRangeHex :: REText a -> (Int, Int) -> REText Int Source #
Parse a hexadecimal Int
in the range [low..high]
. Parse an optional
sign, '-'
or '+'
, followed by the given RE
, followed by the
absolute value of the integer.
Both uppercase 'A'..'F'
and lowercase 'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
wordDecN :: Int -> REText Word Source #
Parse a Word
of exactly n decimal digits, including any leading zeros.
Will not parse values that do not fit in a Word
.
Biased towards matching more.
wordHexN :: Int -> REText Word Source #
Parse a Word
of exactly n hexadecimal digits, including any leading
zeros. Both uppercase 'A'..'F'
and lowercase 'a'..'f'
are
accepted. Will not parse values that do not fit in a Word
.
Biased towards matching more.
toMatch :: REText a -> REText Text Source #
Rebuild the RE
such that the result is the matched Text
instead.
withMatch :: REText a -> REText (Text, a) Source #
Rebuild the RE
to include the matched Text
alongside the result.
reParse :: REText a -> Text -> Maybe a Source #
\(O(mn \log m)\). Parse a Text
with a REText
.
Parses the entire Text
, not just a prefix or a substring.
Uses compile
, see the note there.
If parsing multiple Text
s using the same RE
, it is wasteful to compile
the RE
every time. So, prefer to
- Compile once with
compile
orcompileBounded
and use the compiledParserText
withparse
as many times as required. - Alternately, partially apply this function to a
RE
and use the function as many times as required.
type ParserText = Parser TextToken Source #
A type alias for convenience.
A function which accepts a Parser c a
will accept a ParserText a
.
parse :: ParserText a -> Text -> Maybe a Source #
\(O(mn \log m)\). Parse a Text
with a ParserText
.
Parses the entire Text
, not just a prefix or a substring.
parseSure :: ParserText a -> Text -> a Source #
\(O(mn \log m)\). Parse a Text
with a ParserText
. Calls error
on
parse failure.
For use with parsers that are known to never fail.
Parses the entire Text
, not just a prefix or a substring.
find :: REText a -> Text -> Maybe a Source #
\(O(mn \log m)\). Find the first occurence of the given RE
in a Text
.
Examples
>>>
find (text "meow") "homeowner"
Just "meow"
To test whether a Text
is present in another Text
, like above, prefer
Data.Text.
.isInfixOf
>>>
find (textIgnoreCase "haskell") "Look I'm Haskelling!"
Just "Haskell">>>
find (text "backtracking") "parser-regex"
Nothing
findAll :: REText a -> Text -> [a] Source #
\(O(mn \log m)\). Find all non-overlapping occurences of the given RE
in
the Text
.
Examples
>>>
findAll (text "ana") "banananana"
["ana","ana"]
data Roll = Roll Natural -- ^ Rolls Natural -- ^ Faces on the die deriving Show roll :: REText Roll roll = Roll <$> (naturalDec
<|> pure 1) <*char
'd' <*> naturalDec
>>>
findAll roll "3d6, d10, 2d10"
[Roll 3 6,Roll 1 10,Roll 2 10]
splitOn :: REText a -> Text -> [Text] Source #
\(O(mn \log m)\). Split a Text
at occurences of the given RE
.
Examples
>>>
splitOn (char ' ') "Glasses are really versatile"
["Glasses","are","really","versatile"]
For simple splitting, like above, prefer Data.Text.
,
words
Data.Text.
, lines
Data.Text.
or
split
Data.Text.
, whichever is applicable.splitOn
>>>
splitOn (char ' ' *> oneOf "+-=" *> char ' ') "3 - 1 + 1/2 - 2 = 0"
["3","1","1/2","2","0"]
If the Text
starts or ends with a delimiter, the result will contain
empty Text
s at those positions.
>>>
splitOn (char 'a') "ayaya"
["","y","y",""]
replace :: REText Text -> Text -> Maybe Text Source #
\(O(mn \log m)\). Replace the first match of the given RE
with its
result. If there is no match, the result is Nothing
.
Examples
>>>
replace ("world" <$ text "Haskell") "Hello, Haskell!"
Just "Hello, world!"
>>>
replace ("," <$ some (char '.')) "one...two...ten"
Just "one,two...ten"
replaceAll :: REText Text -> Text -> Text Source #
\(O(mn \log m)\). Replace all non-overlapping matches of the given RE
with their results.
Examples
>>>
replaceAll (" and " <$ text ", ") "red, blue, green"
"red and blue and green"
For simple replacements like above, prefer Data.Text.
.replace
>>>
replaceAll ("Fruit" <$ text "Time" <|> "a banana" <$ text "an arrow") "Time flies like an arrow"
"Fruit flies like a banana"
sep =oneOf
"-./" digits n =toMatch
(replicateM_
n (oneOfdigit
)) toYmd d m y = mconcat [y, "-", m, "-", d] date = toYmd <$> digits 2 <* sep <*> digits 2 <* sep <*> digits 4
>>>
replaceAll date "01/01/1970, 01-04-1990, 03.07.2011"
"1970-01-01, 1990-04-01, 2011-07-03"