module Data.Attoparsec.Text.Internal
(
Parser
, Result
, parse
, parseOnly
, module Data.Attoparsec.Combinator
, satisfy
, satisfyWith
, anyChar
, skip
, char
, notChar
, peekChar
, peekChar'
, inClass
, notInClass
, skipWhile
, string
, stringCI
, asciiCI
, take
, scan
, runScanner
, takeWhile
, takeWhile1
, takeTill
, takeText
, takeLazyText
, endOfLine
, endOfInput
, match
, atEnd
) where
import Control.Applicative ((<|>), (<$>), pure, (*>))
import Control.Monad (when)
import Data.Attoparsec.Combinator ((<?>))
import Data.Attoparsec.Internal
import Data.Attoparsec.Internal.Types hiding (Parser, Failure, Success)
import qualified Data.Attoparsec.Text.Buffer as Buf
import Data.Attoparsec.Text.Buffer (Buffer, buffer)
import Data.Char (isAsciiUpper, isAsciiLower, toUpper, toLower)
import Data.List (intercalate)
import Data.String (IsString(..))
import Data.Text.Internal (Text(..))
import Prelude hiding (getChar, succ, take, takeWhile)
import qualified Data.Attoparsec.Internal.Types as T
import qualified Data.Attoparsec.Text.FastSet as Set
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Unsafe as T
type Parser = T.Parser Text
type Result = IResult Text
type Failure r = T.Failure Text Buffer r
type Success a r = T.Success Text Buffer a r
instance (a ~ Text) => IsString (Parser a) where
fromString = string . T.pack
satisfy :: (Char -> Bool) -> Parser Char
satisfy p = do
(k,c) <- ensure 1
let !h = T.unsafeHead c
if p h
then advance k >> return h
else fail "satisfy"
skip :: (Char -> Bool) -> Parser ()
skip p = do
(k,s) <- ensure 1
if p (T.unsafeHead s)
then advance k
else fail "skip"
satisfyWith :: (Char -> a) -> (a -> Bool) -> Parser a
satisfyWith f p = do
(k,s) <- ensure 1
let c = f $! T.unsafeHead s
if p c
then advance k >> return c
else fail "satisfyWith"
takeWith :: Int -> (Text -> Bool) -> Parser Text
takeWith n p = do
(k,s) <- ensure n
if p s
then advance k >> return s
else fail "takeWith"
take :: Int -> Parser Text
take n = takeWith (max n 0) (const True)
string :: Text -> Parser Text
string s = string_ (stringSuspended id) id s
string_ :: (forall r. Text -> Text -> Buffer -> Pos -> More
-> Failure r -> Success Text r -> Result r)
-> (Text -> Text)
-> Text -> Parser Text
string_ suspended f s0 = T.Parser $ \t pos more lose succ ->
let s = f s0
ft = f (Buf.unbufferAt (fromPos pos) t)
in case T.commonPrefixes s ft of
Nothing
| T.null s -> succ t pos more T.empty
| T.null ft -> suspended s s t pos more lose succ
| otherwise -> lose t pos more [] "string"
Just (pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 pfx)
in succ t (pos + l) more (substring pos l t)
| not (T.null tsfx) -> lose t pos more [] "string"
| otherwise -> suspended s ssfx t pos more lose succ
stringSuspended :: (Text -> Text)
-> Text -> Text -> Buffer -> Pos -> More
-> Failure r
-> Success Text r
-> Result r
stringSuspended f s000 s0 t0 pos0 more0 lose0 succ0 =
runParser (demandInput_ >>= go) t0 pos0 more0 lose0 succ0
where
go s' = T.Parser $ \t pos more lose succ ->
let s = f s'
in case T.commonPrefixes s0 s of
Nothing -> lose t pos more [] "string"
Just (_pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 s000)
in succ t (pos + l) more (substring pos l t)
| T.null tsfx -> stringSuspended f s000 ssfx t pos more lose succ
| otherwise -> lose t pos more [] "string"
stringCI :: Text -> Parser Text
stringCI s = go 0
where
go !n
| n > T.length fs = fail "stringCI"
| otherwise = do
(k,t) <- ensure n
if T.toCaseFold t == fs
then advance k >> return t
else go (n+1)
fs = T.toCaseFold s
asciiCI :: Text -> Parser Text
asciiCI s = fmap fst $ match $ T.foldr ((*>) . asciiCharCI) (pure ()) s
asciiCharCI :: Char -> Parser Char
asciiCharCI c
| isAsciiUpper c = char c <|> char (toLower c)
| isAsciiLower c = char c <|> char (toUpper c)
| otherwise = char c
skipWhile :: (Char -> Bool) -> Parser ()
skipWhile p = go
where
go = do
t <- T.takeWhile p <$> get
continue <- inputSpansChunks (size t)
when continue go
takeTill :: (Char -> Bool) -> Parser Text
takeTill p = takeWhile (not . p)
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile p = do
h <- T.takeWhile p <$> get
continue <- inputSpansChunks (size h)
if continue
then takeWhileAcc p [h]
else return h
takeWhileAcc :: (Char -> Bool) -> [Text] -> Parser Text
takeWhileAcc p = go
where
go acc = do
h <- T.takeWhile p <$> get
continue <- inputSpansChunks (size h)
if continue
then go (h:acc)
else return $ concatReverse (h:acc)
takeRest :: Parser [Text]
takeRest = go []
where
go acc = do
input <- wantInput
if input
then do
s <- get
advance (size s)
go (s:acc)
else return (reverse acc)
takeText :: Parser Text
takeText = T.concat `fmap` takeRest
takeLazyText :: Parser L.Text
takeLazyText = L.fromChunks `fmap` takeRest
data Scan s = Continue s
| Finished s !Int Text
scan_ :: (s -> [Text] -> Parser r) -> s -> (s -> Char -> Maybe s) -> Parser r
scan_ f s0 p = go [] s0
where
scanner s !n t =
case T.uncons t of
Just (c,t') -> case p s c of
Just s' -> scanner s' (n+1) t'
Nothing -> Finished s n t
Nothing -> Continue s
go acc s = do
input <- get
case scanner s 0 input of
Continue s' -> do continue <- inputSpansChunks (size input)
if continue
then go (input : acc) s'
else f s' (input : acc)
Finished s' n t -> do advance (size input size t)
f s' (T.take n input : acc)
scan :: s -> (s -> Char -> Maybe s) -> Parser Text
scan = scan_ $ \_ chunks -> return $! concatReverse chunks
runScanner :: s -> (s -> Char -> Maybe s) -> Parser (Text, s)
runScanner = scan_ $ \s xs -> let !sx = concatReverse xs in return (sx, s)
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 p = do
(`when` demandInput) =<< endOfChunk
h <- T.takeWhile p <$> get
let size' = size h
when (size' == 0) $ fail "takeWhile1"
advance size'
eoc <- endOfChunk
if eoc
then takeWhileAcc p [h]
else return h
inClass :: String -> Char -> Bool
inClass s = (`Set.member` mySet)
where mySet = Set.charClass s
notInClass :: String -> Char -> Bool
notInClass s = not . inClass s
anyChar :: Parser Char
anyChar = satisfy $ const True
char :: Char -> Parser Char
char c = satisfy (== c) <?> show c
notChar :: Char -> Parser Char
notChar c = satisfy (/= c) <?> "not " ++ show c
peekChar :: Parser (Maybe Char)
peekChar = T.Parser $ \t pos more _lose succ ->
case () of
_| pos < lengthOf t ->
let T.Iter !c _ = Buf.iter t (fromPos pos)
in succ t pos more (Just c)
| more == Complete ->
succ t pos more Nothing
| otherwise ->
let succ' t' pos' more' =
let T.Iter !c _ = Buf.iter t' (fromPos pos')
in succ t' pos' more' (Just c)
lose' t' pos' more' = succ t' pos' more' Nothing
in prompt t pos more lose' succ'
peekChar' :: Parser Char
peekChar' = do
(_,s) <- ensure 1
return $! T.unsafeHead s
endOfLine :: Parser ()
endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())
failK :: Failure a
failK t (Pos pos) _more stack msg = Fail (Buf.dropWord16 pos t) stack msg
successK :: Success a a
successK t (Pos pos) _more a = Done (Buf.dropWord16 pos t) a
parse :: Parser a -> Text -> Result a
parse m s = runParser m (buffer s) 0 Incomplete failK successK
parseOnly :: Parser a -> Text -> Either String a
parseOnly m s = case runParser m (buffer s) 0 Complete failK successK of
Fail _ [] err -> Left err
Fail _ ctxs err -> Left (intercalate " > " ctxs ++ ": " ++ err)
Done _ a -> Right a
_ -> error "parseOnly: impossible error!"
get :: Parser Text
get = T.Parser $ \t pos more _lose succ ->
succ t pos more (Buf.dropWord16 (fromPos pos) t)
endOfChunk :: Parser Bool
endOfChunk = T.Parser $ \t pos more _lose succ ->
succ t pos more (pos == lengthOf t)
inputSpansChunks :: Pos -> Parser Bool
inputSpansChunks i = T.Parser $ \t pos_ more _lose succ ->
let pos = pos_ + i
in if pos < lengthOf t || more == Complete
then succ t pos more False
else let lose' t' pos' more' = succ t' pos' more' False
succ' t' pos' more' = succ t' pos' more' True
in prompt t pos more lose' succ'
advance :: Pos -> Parser ()
advance n = T.Parser $ \t pos more _lose succ -> succ t (pos+n) more ()
ensureSuspended :: Int -> Buffer -> Pos -> More
-> Failure r -> Success (Pos, Text) r
-> Result r
ensureSuspended n t pos more lose succ =
runParser (demandInput >> go) t pos more lose succ
where go = T.Parser $ \t' pos' more' lose' succ' ->
case lengthAtLeast pos' n t' of
Just n' -> succ' t' pos' more' (n', substring pos n' t')
Nothing -> runParser (demandInput >> go) t' pos' more' lose' succ'
ensure :: Int -> Parser (Pos, Text)
ensure n = T.Parser $ \t pos more lose succ ->
case lengthAtLeast pos n t of
Just n' -> succ t pos more (n', substring pos n' t)
Nothing -> ensureSuspended n t pos more lose succ
match :: Parser a -> Parser (Text, a)
match p = T.Parser $ \t pos more lose succ ->
let succ' t' pos' more' a = succ t' pos' more'
(substring pos (pos'pos) t', a)
in runParser p t pos more lose succ'
lengthAtLeast :: Pos -> Int -> Buffer -> Maybe Pos
lengthAtLeast pos n t = go 0 (fromPos pos)
where go i !p
| i == n = Just (Pos p pos)
| p == len = Nothing
| otherwise = go (i+1) (p + Buf.iter_ t p)
Pos len = lengthOf t
substring :: Pos -> Pos -> Buffer -> Text
substring (Pos pos) (Pos n) = Buf.substring pos n
lengthOf :: Buffer -> Pos
lengthOf = Pos . Buf.length
size :: Text -> Pos
size (Text _ _ l) = Pos l