{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Data.Conduit.Attoparsec
(
sinkParser
, sinkParserEither
, conduitParser
, conduitParserEither
, ParseError (..)
, Position (..)
, PositionRange (..)
, AttoparsecInput
) where
import Control.Exception (Exception)
import Control.Monad (unless)
import qualified Data.ByteString as B
import qualified Data.Text as T
import qualified Data.Text.Internal as TI
import Data.Typeable (Typeable)
import Prelude hiding (lines)
import qualified Data.Attoparsec.ByteString
import qualified Data.Attoparsec.Text
import qualified Data.Attoparsec.Types as A
import Data.Conduit
import Control.Monad.Trans.Resource (MonadThrow, throwM)
data ParseError = ParseError
{ errorContexts :: [String]
, errorMessage :: String
, errorPosition :: Position
} | DivergentParser
deriving (Show, Typeable)
instance Exception ParseError
data Position = Position
{ posLine :: {-# UNPACK #-} !Int
, posCol :: {-# UNPACK #-} !Int
, posOffset :: {-# UNPACK #-} !Int
}
deriving (Eq, Ord)
instance Show Position where
show (Position l c off) = show l ++ ':' : show c ++ " (" ++ show off ++ ")"
data PositionRange = PositionRange
{ posRangeStart :: {-# UNPACK #-} !Position
, posRangeEnd :: {-# UNPACK #-} !Position
}
deriving (Eq, Ord)
instance Show PositionRange where
show (PositionRange s e) = show s ++ '-' : show e
class AttoparsecInput a where
parseA :: A.Parser a b -> a -> A.IResult a b
feedA :: A.IResult a b -> a -> A.IResult a b
empty :: a
isNull :: a -> Bool
getLinesCols :: a -> Position
stripFromEnd :: a -> a -> a
instance AttoparsecInput B.ByteString where
parseA = Data.Attoparsec.ByteString.parse
feedA = Data.Attoparsec.ByteString.feed
empty = B.empty
isNull = B.null
getLinesCols = B.foldl' f (Position 0 0 0)
where
f (Position l c o) ch
| ch == 10 = Position (l + 1) 0 (o + 1)
| otherwise = Position l (c + 1) (o + 1)
stripFromEnd b1 b2 = B.take (B.length b1 - B.length b2) b1
instance AttoparsecInput T.Text where
parseA = Data.Attoparsec.Text.parse
feedA = Data.Attoparsec.Text.feed
empty = T.empty
isNull = T.null
getLinesCols = T.foldl' f (Position 0 0 0)
where
f (Position l c o) ch
| ch == '\n' = Position (l + 1) 0 (o + 1)
| otherwise = Position l (c + 1) (o + 1)
stripFromEnd (TI.Text arr1 off1 len1) (TI.Text _ _ len2) =
TI.text arr1 off1 (len1 - len2)
sinkParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a o m b
sinkParser = fmap snd . sinkParserPosErr (Position 1 1 0)
sinkParserEither :: (AttoparsecInput a, Monad m) => A.Parser a b -> ConduitT a o m (Either ParseError b)
sinkParserEither = (fmap.fmap) snd . sinkParserPos (Position 1 1 0)
conduitParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a (PositionRange, b) m ()
conduitParser parser =
conduit $ Position 1 1 0
where
conduit !pos = await >>= maybe (return ()) go
where
go x = do
leftover x
(!pos', !res) <- sinkParserPosErr pos parser
yield (PositionRange pos pos', res)
conduit pos'
{-# SPECIALIZE conduitParser
:: MonadThrow m
=> A.Parser T.Text b
-> ConduitT T.Text (PositionRange, b) m () #-}
{-# SPECIALIZE conduitParser
:: MonadThrow m
=> A.Parser B.ByteString b
-> ConduitT B.ByteString (PositionRange, b) m () #-}
conduitParserEither
:: (Monad m, AttoparsecInput a)
=> A.Parser a b
-> ConduitT a (Either ParseError (PositionRange, b)) m ()
conduitParserEither parser =
conduit $ Position 1 1 0
where
conduit !pos = await >>= maybe (return ()) go
where
go x = do
leftover x
eres <- sinkParserPos pos parser
case eres of
Left e -> yield $ Left e
Right (!pos', !res) -> do
yield $! Right (PositionRange pos pos', res)
conduit pos'
{-# SPECIALIZE conduitParserEither
:: Monad m
=> A.Parser T.Text b
-> ConduitT T.Text (Either ParseError (PositionRange, b)) m () #-}
{-# SPECIALIZE conduitParserEither
:: Monad m
=> A.Parser B.ByteString b
-> ConduitT B.ByteString (Either ParseError (PositionRange, b)) m () #-}
sinkParserPosErr
:: (AttoparsecInput a, MonadThrow m)
=> Position
-> A.Parser a b
-> ConduitT a o m (Position, b)
sinkParserPosErr pos0 p = sinkParserPos pos0 p >>= f
where
f (Left e) = throwM e
f (Right a) = return a
{-# INLINE sinkParserPosErr #-}
sinkParserPos
:: (AttoparsecInput a, Monad m)
=> Position
-> A.Parser a b
-> ConduitT a o m (Either ParseError (Position, b))
sinkParserPos pos0 p = sink empty pos0 (parseA p)
where
sink prev pos parser = await >>= maybe close push
where
push c
| isNull c = sink prev pos parser
| otherwise = go False c $ parser c
close = go True prev (feedA (parser empty) empty)
go end c (A.Done lo x) = do
let pos'
| end = pos
| otherwise = addLinesCols prev pos
y = stripFromEnd c lo
pos'' = addLinesCols y pos'
unless (isNull lo) $ leftover lo
pos'' `seq` return $! Right (pos'', x)
go end c (A.Fail rest contexts msg) =
let x = stripFromEnd c rest
pos'
| end = pos
| otherwise = addLinesCols prev pos
pos'' = addLinesCols x pos'
in pos'' `seq` return $! Left (ParseError contexts msg pos'')
go end c (A.Partial parser')
| end = return $! Left DivergentParser
| otherwise =
pos' `seq` sink c pos' parser'
where
pos' = addLinesCols prev pos
addLinesCols :: AttoparsecInput a => a -> Position -> Position
addLinesCols x (Position lines cols off) =
lines' `seq` cols' `seq` off' `seq` Position lines' cols' off'
where
Position dlines dcols doff = getLinesCols x
lines' = lines + dlines
cols' = (if dlines > 0 then 1 else cols) + dcols
off' = off + doff
{-# INLINE sinkParserPos #-}