Safe Haskell	None
Language	Haskell2010

Regex.Internal.Text

Description

This is an internal module. You probably don't need to import this. Import Regex.Text instead.

WARNING

Definitions in this module allow violating invariants that would otherwise be guaranteed by non-internal modules. Use at your own risk!

Synopsis

data TextToken = TextToken {
- tArr :: !Array
- tOffset :: !Int
- tChar :: !Char
}
type REText = RE TextToken
textTokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b
token :: (Char -> Maybe a) -> REText a
satisfy :: (Char -> Bool) -> REText Char
char :: Char -> REText Char
charIgnoreCase :: Char -> REText Char
anyChar :: REText Char
oneOf :: CharSet -> REText Char
text :: Text -> REText Text
textIgnoreCase :: Text -> REText Text
manyText :: REText Text
someText :: REText Text
manyTextMin :: REText Text
someTextMin :: REText Text
manyTextOf :: CharSet -> REText Text
someTextOf :: CharSet -> REText Text
manyTextOfMin :: CharSet -> REText Text
someTextOfMin :: CharSet -> REText Text
naturalDec :: REText Natural
integerDec :: REText a -> REText Integer
naturalHex :: REText Natural
integerHex :: REText a -> REText Integer
wordRangeDec :: (Word, Word) -> REText Word
intRangeDec :: REText a -> (Int, Int) -> REText Int
wordRangeHex :: (Word, Word) -> REText Word
intRangeHex :: REText a -> (Int, Int) -> REText Int
wordDecN :: Int -> REText Word
wordHexN :: Int -> REText Word
toMatch :: REText a -> REText Text
withMatch :: REText a -> REText (Text, a)
reParse :: REText a -> Text -> Maybe a
type ParserText = Parser TextToken
parse :: ParserText a -> Text -> Maybe a
parseSure :: ParserText a -> Text -> a
find :: REText a -> Text -> Maybe a
findAll :: REText a -> Text -> [a]
splitOn :: REText a -> Text -> [Text]
replace :: REText Text -> Text -> Maybe Text
replaceAll :: REText Text -> Text -> Text

Documentation

data TextToken Source #

The token type used for parsing Text.

Constructors

TextToken
Fields tArr :: !Array tOffset :: !Int tChar :: !Char

type REText = RE TextToken Source #

A type alias for convenience.

A function which accepts a RE c a will accept a REText a.

textTokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b Source #

token :: (Char -> Maybe a) -> REText a Source #

Parse a Char into an a if the given function returns Just.

satisfy :: (Char -> Bool) -> REText Char Source #

Parse a Char if it satisfies the given predicate.

char :: Char -> REText Char Source #

Parse the given Char.

charIgnoreCase :: Char -> REText Char Source #

Parse the given Char, ignoring case.

Comparisons are performed after applying simple case folding as described by the Unicode standard.

anyChar :: REText Char Source #

Parse any Char.

oneOf :: CharSet -> REText Char Source #

Parse a Char if it is a member of the CharSet.

text :: Text -> REText Text Source #

Parse the given Text.

textIgnoreCase :: Text -> REText Text Source #

Parse the given Text, ignoring case.

Comparisons are performed after applying simple case folding as described by the Unicode standard.

manyText :: REText Text Source #

Parse any Text. Biased towards matching more.

someText :: REText Text Source #

Parse any non-empty Text. Biased towards matching more.

manyTextMin :: REText Text Source #

Parse any Text. Minimal, i.e. biased towards matching less.

someTextMin :: REText Text Source #

Parse any non-empty Text. Minimal, i.e. biased towards matching less.

manyTextOf :: CharSet -> REText Text Source #

Parse any Text containing members of the CharSet. Biased towards matching more.

someTextOf :: CharSet -> REText Text Source #

Parse any non-empty Text containing members of the CharSet. Biased towards matching more.

manyTextOfMin :: CharSet -> REText Text Source #

Parse any Text containing members of the CharSet. Minimal, i.e. biased towards matching less.

someTextOfMin :: CharSet -> REText Text Source #

Parse any non-empty Text containing members of the CharSet. Minimal, i.e. biased towards matching less.

naturalDec :: REText Natural Source #

Parse a decimal Natural. Leading zeros are not accepted. Biased towards matching more.

integerDec :: REText a -> REText Integer Source #

Parse a decimal Integer. Parse an optional sign, '-' or '+', followed by the given RE, followed by the absolute value of the integer. Leading zeros are not accepted. Biased towards matching more.

naturalHex :: REText Natural Source #

Parse a hexadecimal Natural. Both uppercase 'A'..'F' and lowercase 'a'..'f' are accepted. Leading zeros are not accepted. Biased towards matching more.

integerHex :: REText a -> REText Integer Source #

Parse a hexadecimal Integer. Parse an optional sign, '-' or '+', followed by the given RE, followed by the absolute value of the integer. Both uppercase 'A'..'F' and lowercase 'a'..'f' are accepted. Leading zeros are not accepted. Biased towards matching more.

wordRangeDec :: (Word, Word) -> REText Word Source #

Parse a decimal Word in the range [low..high]. Leading zeros are not accepted. Biased towards matching more.

intRangeDec :: REText a -> (Int, Int) -> REText Int Source #

Parse a decimal Int in the range [low..high]. Parse an optional sign, '-' or '+', followed by the given RE, followed by the absolute value of the integer. Leading zeros are not accepted. Biased towards matching more.

wordRangeHex :: (Word, Word) -> REText Word Source #

Parse a hexadecimal Word in the range [low..high]. Both uppercase 'A'..'F' and lowercase 'a'..'f' are accepted. Leading zeros are not accepted. Biased towards matching more.

intRangeHex :: REText a -> (Int, Int) -> REText Int Source #

Parse a hexadecimal Int in the range [low..high]. Parse an optional sign, '-' or '+', followed by the given RE, followed by the absolute value of the integer. Both uppercase 'A'..'F' and lowercase 'a'..'f' are accepted. Leading zeros are not accepted. Biased towards matching more.

wordDecN :: Int -> REText Word Source #

Parse a Word of exactly n decimal digits, including any leading zeros. Will not parse values that do not fit in a Word. Biased towards matching more.

wordHexN :: Int -> REText Word Source #

Parse a Word of exactly n hexadecimal digits, including any leading zeros. Both uppercase 'A'..'F' and lowercase 'a'..'f' are accepted. Will not parse values that do not fit in a Word. Biased towards matching more.

toMatch :: REText a -> REText Text Source #

Rebuild the RE such that the result is the matched Text instead.

withMatch :: REText a -> REText (Text, a) Source #

Rebuild the RE to include the matched Text alongside the result.

reParse :: REText a -> Text -> Maybe a Source #

\(O(mn \log m)\). Parse a Text with a REText.

Parses the entire Text, not just a prefix or a substring.

Uses compile, see the note there.

If parsing multiple Texts using the same RE, it is wasteful to compile the RE every time. So, prefer to

Compile once with compile or compileBounded and use the compiled ParserText with parse as many times as required.
Alternately, partially apply this function to a RE and use the function as many times as required.

type ParserText = Parser TextToken Source #

A type alias for convenience.

A function which accepts a Parser c a will accept a ParserText a.

parse :: ParserText a -> Text -> Maybe a Source #

\(O(mn \log m)\). Parse a Text with a ParserText.

Parses the entire Text, not just a prefix or a substring.

parseSure :: ParserText a -> Text -> a Source #

\(O(mn \log m)\). Parse a Text with a ParserText. Calls error on parse failure.

For use with parsers that are known to never fail.

Parses the entire Text, not just a prefix or a substring.

find :: REText a -> Text -> Maybe a Source #

\(O(mn \log m)\). Find the first occurence of the given RE in a Text.

Examples

Expand

>>> find (text "meow") "homeowner"
Just "meow"

To test whether a Text is present in another Text, like above, prefer Data.Text.isInfixOf.

>>> find (textIgnoreCase "haskell") "Look I'm Haskelling!"
Just "Haskell"
>>> find (text "backtracking") "parser-regex"
Nothing

findAll :: REText a -> Text -> [a] Source #

\(O(mn \log m)\). Find all non-overlapping occurences of the given RE in the Text.

Examples

Expand

>>> findAll (text "ana") "banananana"
["ana","ana"]

data Roll = Roll
  Natural -- ^ Rolls
  Natural -- ^ Faces on the die
  deriving Show

roll :: REText Roll
roll = Roll <$> (naturalDec <|> pure 1) <* char 'd' <*> naturalDec

>>> findAll roll "3d6, d10, 2d10"
[Roll 3 6,Roll 1 10,Roll 2 10]

splitOn :: REText a -> Text -> [Text] Source #

\(O(mn \log m)\). Split a Text at occurences of the given RE.

Examples

Expand

>>> splitOn (char ' ') "Glasses are really versatile"
["Glasses","are","really","versatile"]

For simple splitting, like above, prefer Data.Text.words, Data.Text.lines, Data.Text.split or Data.Text.splitOn, whichever is applicable.

>>> splitOn (char ' ' *> oneOf "+-=" *> char ' ') "3 - 1 + 1/2 - 2 = 0"
["3","1","1/2","2","0"]

If the Text starts or ends with a delimiter, the result will contain empty Texts at those positions.

>>> splitOn (char 'a') "ayaya"
["","y","y",""]

replace :: REText Text -> Text -> Maybe Text Source #

\(O(mn \log m)\). Replace the first match of the given RE with its result. If there is no match, the result is Nothing.

Examples

Expand

>>> replace ("world" <$ text "Haskell") "Hello, Haskell!"
Just "Hello, world!"

>>> replace ("," <$ some (char '.')) "one...two...ten"
Just "one,two...ten"

replaceAll :: REText Text -> Text -> Text Source #

\(O(mn \log m)\). Replace all non-overlapping matches of the given RE with their results.

Examples

Expand

>>> replaceAll (" and " <$ text ", ") "red, blue, green"
"red and blue and green"

For simple replacements like above, prefer Data.Text.replace.

>>> replaceAll ("Fruit" <$ text "Time" <|> "a banana" <$ text "an arrow") "Time flies like an arrow"
"Fruit flies like a banana"

sep = oneOf "-./"
digits n = toMatch (replicateM_ n (oneOf digit))
toYmd d m y = mconcat [y, "-", m, "-", d]
date = toYmd <$> digits 2 <* sep
             <*> digits 2 <* sep
             <*> digits 4

>>> replaceAll date "01/01/1970, 01-04-1990, 03.07.2011"
"1970-01-01, 1990-04-01, 2011-07-03"