Safe Haskell | None |
---|---|
Language | Haskell2010 |
Regex.Internal.Text
Description
This is an internal module. You probably don't need to import this. Import Regex.Text instead.
WARNING
Definitions in this module allow violating invariants that would otherwise be guaranteed by non-internal modules. Use at your own risk!
Synopsis
- data TextToken = TextToken {}
- type REText = RE TextToken
- textTokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b
- token :: (Char -> Maybe a) -> REText a
- satisfy :: (Char -> Bool) -> REText Char
- char :: Char -> REText Char
- charIgnoreCase :: Char -> REText Char
- anyChar :: REText Char
- oneOf :: CharSet -> REText Char
- text :: Text -> REText Text
- textIgnoreCase :: Text -> REText Text
- manyText :: REText Text
- someText :: REText Text
- manyTextMin :: REText Text
- someTextMin :: REText Text
- manyTextOf :: CharSet -> REText Text
- someTextOf :: CharSet -> REText Text
- manyTextOfMin :: CharSet -> REText Text
- someTextOfMin :: CharSet -> REText Text
- naturalDec :: REText Natural
- integerDec :: REText a -> REText Integer
- naturalHex :: REText Natural
- integerHex :: REText a -> REText Integer
- wordRangeDec :: (Word, Word) -> REText Word
- intRangeDec :: REText a -> (Int, Int) -> REText Int
- wordRangeHex :: (Word, Word) -> REText Word
- intRangeHex :: REText a -> (Int, Int) -> REText Int
- wordDecN :: Int -> REText Word
- wordHexN :: Int -> REText Word
- toMatch :: REText a -> REText Text
- withMatch :: REText a -> REText (Text, a)
- reParse :: REText a -> Text -> Maybe a
- type ParserText = Parser TextToken
- parse :: ParserText a -> Text -> Maybe a
- parseSure :: ParserText a -> Text -> a
- find :: REText a -> Text -> Maybe a
- findAll :: REText a -> Text -> [a]
- splitOn :: REText a -> Text -> [Text]
- replace :: REText Text -> Text -> Maybe Text
- replaceAll :: REText Text -> Text -> Text
Documentation
The token type used for parsing Text
.
type REText = RE TextToken Source #
A type alias for convenience.
A function which accepts a RE c a
will accept a REText a
.
textTokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b Source #
token :: (Char -> Maybe a) -> REText a Source #
Parse a Char
into an a
if the given function returns Just
.
charIgnoreCase :: Char -> REText Char Source #
Parse the given Char
, ignoring case.
Comparisons are performed after applying simple case folding as described by the Unicode standard.
textIgnoreCase :: Text -> REText Text Source #
Parse the given Text
, ignoring case.
Comparisons are performed after applying simple case folding as described by the Unicode standard.
manyTextMin :: REText Text Source #
Parse any Text
. Minimal, i.e. biased towards matching less.
someTextMin :: REText Text Source #
Parse any non-empty Text
. Minimal, i.e. biased towards matching less.
manyTextOf :: CharSet -> REText Text Source #
Parse any Text
containing members of the CharSet
.
Biased towards matching more.
someTextOf :: CharSet -> REText Text Source #
Parse any non-empty Text
containing members of the CharSet
.
Biased towards matching more.
manyTextOfMin :: CharSet -> REText Text Source #
Parse any Text
containing members of the CharSet
.
Minimal, i.e. biased towards matching less.
someTextOfMin :: CharSet -> REText Text Source #
Parse any non-empty Text
containing members of the CharSet
.
Minimal, i.e. biased towards matching less.
naturalDec :: REText Natural Source #
Parse a decimal Natural
.
Leading zeros are not accepted. Biased towards matching more.
integerDec :: REText a -> REText Integer Source #
Parse a decimal Integer
. Parse an optional sign, '-'
or '+'
,
followed by the given RE
, followed by the absolute value of the integer.
Leading zeros are not accepted. Biased towards matching more.
naturalHex :: REText Natural Source #
Parse a hexadecimal Natural
. Both uppercase 'A'..'F'
and lowercase
'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
integerHex :: REText a -> REText Integer Source #
Parse a hexadecimal Integer
. Parse an optional sign, '-'
or '+'
,
followed by the given RE
, followed by the absolute value of the integer.
Both uppercase 'A'..'F'
and lowercase 'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
wordRangeDec :: (Word, Word) -> REText Word Source #
Parse a decimal Word
in the range [low..high]
.
Leading zeros are not accepted. Biased towards matching more.
intRangeDec :: REText a -> (Int, Int) -> REText Int Source #
Parse a decimal Int
in the range [low..high]
. Parse an optional sign,
'-'
or '+'
, followed by the given RE
, followed by the absolute
value of the integer.
Leading zeros are not accepted. Biased towards matching more.
wordRangeHex :: (Word, Word) -> REText Word Source #
Parse a hexadecimal Word
in the range [low..high]
. Both uppercase
'A'..'F'
and lowercase 'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
intRangeHex :: REText a -> (Int, Int) -> REText Int Source #
Parse a hexadecimal Int
in the range [low..high]
. Parse an optional
sign, '-'
or '+'
, followed by the given RE
, followed by the
absolute value of the integer.
Both uppercase 'A'..'F'
and lowercase 'a'..'f'
are accepted.
Leading zeros are not accepted. Biased towards matching more.
wordDecN :: Int -> REText Word Source #
Parse a Word
of exactly n decimal digits, including any leading zeros.
Will not parse values that do not fit in a Word
.
Biased towards matching more.
wordHexN :: Int -> REText Word Source #
Parse a Word
of exactly n hexadecimal digits, including any leading
zeros. Both uppercase 'A'..'F'
and lowercase 'a'..'f'
are
accepted. Will not parse values that do not fit in a Word
.
Biased towards matching more.
toMatch :: REText a -> REText Text Source #
Rebuild the RE
such that the result is the matched Text
instead.
withMatch :: REText a -> REText (Text, a) Source #
Rebuild the RE
to include the matched Text
alongside the result.
reParse :: REText a -> Text -> Maybe a Source #
\(O(mn \log m)\). Parse a Text
with a REText
.
Parses the entire Text
, not just a prefix or a substring.
Uses compile
, see the note there.
If parsing multiple Text
s using the same RE
, it is wasteful to compile
the RE
every time. So, prefer to
- Compile once with
compile
orcompileBounded
and use the compiledParserText
withparse
as many times as required. - Alternately, partially apply this function to a
RE
and use the function as many times as required.
type ParserText = Parser TextToken Source #
A type alias for convenience.
A function which accepts a Parser c a
will accept a ParserText a
.
parse :: ParserText a -> Text -> Maybe a Source #
\(O(mn \log m)\). Parse a Text
with a ParserText
.
Parses the entire Text
, not just a prefix or a substring.
parseSure :: ParserText a -> Text -> a Source #
\(O(mn \log m)\). Parse a Text
with a ParserText
. Calls error
on
parse failure.
For use with parsers that are known to never fail.
Parses the entire Text
, not just a prefix or a substring.
find :: REText a -> Text -> Maybe a Source #
\(O(mn \log m)\). Find the first occurence of the given RE
in a Text
.
Examples
>>>
find (text "meow") "homeowner"
Just "meow"
To test whether a Text
is present in another Text
, like above, prefer
Data.Text.
.isInfixOf
>>>
find (textIgnoreCase "haskell") "Look I'm Haskelling!"
Just "Haskell">>>
find (text "backtracking") "parser-regex"
Nothing
findAll :: REText a -> Text -> [a] Source #
\(O(mn \log m)\). Find all non-overlapping occurences of the given RE
in
the Text
.
Examples
>>>
findAll (text "ana") "banananana"
["ana","ana"]
data Roll = Roll Natural -- ^ Rolls Natural -- ^ Faces on the die deriving Show roll :: REText Roll roll = Roll <$> (naturalDec
<|> pure 1) <*char
'd' <*> naturalDec
>>>
findAll roll "3d6, d10, 2d10"
[Roll 3 6,Roll 1 10,Roll 2 10]
splitOn :: REText a -> Text -> [Text] Source #
\(O(mn \log m)\). Split a Text
at occurences of the given RE
.
Examples
>>>
splitOn (char ' ') "Glasses are really versatile"
["Glasses","are","really","versatile"]
For simple splitting, like above, prefer Data.Text.
,
words
Data.Text.
, lines
Data.Text.
or
split
Data.Text.
, whichever is applicable.splitOn
>>>
splitOn (char ' ' *> oneOf "+-=" *> char ' ') "3 - 1 + 1/2 - 2 = 0"
["3","1","1/2","2","0"]
If the Text
starts or ends with a delimiter, the result will contain
empty Text
s at those positions.
>>>
splitOn (char 'a') "ayaya"
["","y","y",""]
replace :: REText Text -> Text -> Maybe Text Source #
\(O(mn \log m)\). Replace the first match of the given RE
with its
result. If there is no match, the result is Nothing
.
Examples
>>>
replace ("world" <$ text "Haskell") "Hello, Haskell!"
Just "Hello, world!"
>>>
replace ("," <$ some (char '.')) "one...two...ten"
Just "one,two...ten"
replaceAll :: REText Text -> Text -> Text Source #
\(O(mn \log m)\). Replace all non-overlapping matches of the given RE
with their results.
Examples
>>>
replaceAll (" and " <$ text ", ") "red, blue, green"
"red and blue and green"
For simple replacements like above, prefer Data.Text.
.replace
>>>
replaceAll ("Fruit" <$ text "Time" <|> "a banana" <$ text "an arrow") "Time flies like an arrow"
"Fruit flies like a banana"
sep =oneOf
"-./" digits n =toMatch
(replicateM_
n (oneOfdigit
)) toYmd d m y = mconcat [y, "-", m, "-", d] date = toYmd <$> digits 2 <* sep <*> digits 2 <* sep <*> digits 4
>>>
replaceAll date "01/01/1970, 01-04-1990, 03.07.2011"
"1970-01-01, 1990-04-01, 2011-07-03"