{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Char
(
newline,
crlf,
eol,
tab,
space,
hspace,
space1,
hspace1,
controlChar,
spaceChar,
upperChar,
lowerChar,
letterChar,
alphaNumChar,
printChar,
digitChar,
binDigitChar,
octDigitChar,
hexDigitChar,
markChar,
numberChar,
punctuationChar,
symbolChar,
separatorChar,
asciiChar,
latin1Char,
charCategory,
categoryName,
char,
char',
string,
string',
)
where
import Control.Applicative
import Data.Char
import Data.Functor (void)
import Data.Proxy
import Text.Megaparsec
import Text.Megaparsec.Common
newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
newline = char '\n'
{-# INLINE newline #-}
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
crlf = string (tokensToChunk (Proxy :: Proxy s) "\r\n")
{-# INLINE crlf #-}
eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
eol =
(tokenToChunk (Proxy :: Proxy s) <$> newline)
<|> crlf
<?> "end of line"
{-# INLINE eol #-}
tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
tab = char '\t'
{-# INLINE tab #-}
space :: (MonadParsec e s m, Token s ~ Char) => m ()
space = void $ takeWhileP (Just "white space") isSpace
{-# INLINE space #-}
hspace :: (MonadParsec e s m, Token s ~ Char) => m ()
hspace = void $ takeWhileP (Just "white space") isHSpace
{-# INLINE hspace #-}
space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
space1 = void $ takeWhile1P (Just "white space") isSpace
{-# INLINE space1 #-}
hspace1 :: (MonadParsec e s m, Token s ~ Char) => m ()
hspace1 = void $ takeWhile1P (Just "white space") isHSpace
{-# INLINE hspace1 #-}
controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
controlChar = satisfy isControl <?> "control character"
{-# INLINE controlChar #-}
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
spaceChar = satisfy isSpace <?> "white space"
{-# INLINE spaceChar #-}
upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
upperChar = satisfy isUpper <?> "uppercase letter"
{-# INLINE upperChar #-}
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
lowerChar = satisfy isLower <?> "lowercase letter"
{-# INLINE lowerChar #-}
letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
letterChar = satisfy isLetter <?> "letter"
{-# INLINE letterChar #-}
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
alphaNumChar = satisfy isAlphaNum <?> "alphanumeric character"
{-# INLINE alphaNumChar #-}
printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
printChar = satisfy isPrint <?> "printable character"
{-# INLINE printChar #-}
digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
digitChar = satisfy isDigit <?> "digit"
{-# INLINE digitChar #-}
binDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
binDigitChar = satisfy isBinDigit <?> "binary digit"
where
isBinDigit x = x == '0' || x == '1'
{-# INLINE binDigitChar #-}
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
octDigitChar = satisfy isOctDigit <?> "octal digit"
{-# INLINE octDigitChar #-}
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
hexDigitChar = satisfy isHexDigit <?> "hexadecimal digit"
{-# INLINE hexDigitChar #-}
markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
markChar = satisfy isMark <?> "mark character"
{-# INLINE markChar #-}
numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
numberChar = satisfy isNumber <?> "numeric character"
{-# INLINE numberChar #-}
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
punctuationChar = satisfy isPunctuation <?> "punctuation"
{-# INLINE punctuationChar #-}
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
symbolChar = satisfy isSymbol <?> "symbol"
{-# INLINE symbolChar #-}
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
separatorChar = satisfy isSeparator <?> "separator"
{-# INLINE separatorChar #-}
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
asciiChar = satisfy isAscii <?> "ASCII character"
{-# INLINE asciiChar #-}
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
latin1Char = satisfy isLatin1 <?> "Latin-1 character"
{-# INLINE latin1Char #-}
charCategory ::
(MonadParsec e s m, Token s ~ Char) =>
GeneralCategory ->
m (Token s)
charCategory cat = satisfy ((== cat) . generalCategory) <?> categoryName cat
{-# INLINE charCategory #-}
categoryName :: GeneralCategory -> String
categoryName = \case
UppercaseLetter -> "uppercase letter"
LowercaseLetter -> "lowercase letter"
TitlecaseLetter -> "titlecase letter"
ModifierLetter -> "modifier letter"
OtherLetter -> "other letter"
NonSpacingMark -> "non-spacing mark"
SpacingCombiningMark -> "spacing combining mark"
EnclosingMark -> "enclosing mark"
DecimalNumber -> "decimal number character"
LetterNumber -> "letter number character"
OtherNumber -> "other number character"
ConnectorPunctuation -> "connector punctuation"
DashPunctuation -> "dash punctuation"
OpenPunctuation -> "open punctuation"
ClosePunctuation -> "close punctuation"
InitialQuote -> "initial quote"
FinalQuote -> "final quote"
OtherPunctuation -> "other punctuation"
MathSymbol -> "math symbol"
CurrencySymbol -> "currency symbol"
ModifierSymbol -> "modifier symbol"
OtherSymbol -> "other symbol"
Space -> "white space"
LineSeparator -> "line separator"
ParagraphSeparator -> "paragraph separator"
Control -> "control character"
Format -> "format character"
Surrogate -> "surrogate character"
PrivateUse -> "private-use Unicode character"
NotAssigned -> "non-assigned Unicode character"
char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char = single
{-# INLINE char #-}
char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char' c =
choice
[ char (toLower c),
char (toUpper c),
char (toTitle c)
]
{-# INLINE char' #-}
isHSpace :: Char -> Bool
isHSpace x = isSpace x && x /= '\n' && x /= '\r'