-- | @pipes@ utilities for incrementally running @attoparsec@-based parsers.
--
-- This module assumes familiarity with @pipes-parse@, you can learn about it in
-- "Pipes.Parse.Tutorial".

{-# LANGUAGE BangPatterns       #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleInstances  #-}
{-# LANGUAGE RankNTypes         #-}

module Pipes.Attoparsec (
    -- * Parsing
      parse
    , parsed

    -- ** Including input length
    --
    -- $lengths
    , parseL
    , parsedL

    -- * Utils
    , isEndOfParserInput

    -- * Types
    , ParserInput
    , ParsingError(..)
    ) where

import           Control.Exception                (Exception)
import           Control.Monad.Trans.Error        (Error)
import qualified Control.Monad.Trans.State.Strict as S
import qualified Data.Attoparsec.ByteString
import qualified Data.Attoparsec.Text
import           Data.Attoparsec.Types            (IResult (..))
import qualified Data.Attoparsec.Types            as Attoparsec
import           Data.ByteString                  (ByteString)
import qualified Data.ByteString
import           Data.Data                        (Data, Typeable)
import           Data.Monoid                      (Monoid (mempty))
import           Data.Text                        (Text)
import qualified Data.Text
import           Pipes
import qualified Pipes.Parse                      as Pipes (Parser)

--------------------------------------------------------------------------------

-- | Convert an @attoparsec@ 'Attoparsec.Parser' to a @pipes-parse@
-- 'Pipes.Parser'.
--
-- This 'Pipes.Parser' is compatible with the tools from "Pipes.Parse".
--
-- It returns 'Nothing' if the underlying 'Producer' is exhausted, otherwise
-- it attempts to run the given attoparsec 'Attoparsec.Parser' on the underlying
-- 'Producer', possibly failing with 'ParsingError'.
parse
  :: (Monad m, ParserInput a)
  => Attoparsec.Parser a b                            -- ^ Attoparsec parser
  -> Pipes.Parser a m (Maybe (Either ParsingError b)) -- ^ Pipes parser
parse parser = S.StateT $ \p0 -> do
    x <- nextSkipEmpty p0
    case x of
      Left r       -> return (Nothing, return r)
      Right (a,p1) -> step (yield a >>) (_parse parser a) p1
  where
    step diffP res p0 = case res of
      Fail _ c m -> return (Just (Left (ParsingError c m)), diffP p0)
      Done a b   -> return (Just (Right b), yield a >> p0)
      Partial k  -> do
        x <- nextSkipEmpty p0
        case x of
          Left e -> step diffP (k mempty) (return e)
          Right (a,p1) -> step (diffP . (yield a >>)) (k a) p1
{-# INLINABLE parse #-}


-- | Convert a producer of 'ParserInput' to a producer of parsed values.
--
-- This producer returns 'Right' when end-of-input is reached sucessfully,
-- otherwise it returns a 'ParsingError' and the leftovers including
-- the malformed input that couldn't be parsed. You can use 'Pipes.Lift.errorP'
-- to promote the 'Either' return value to an 'Control.Monad.Trans.Error.ErrorT'
-- monad transformer.
parsed
  :: (Monad m, ParserInput a)
  => Attoparsec.Parser a b  -- ^ Attoparsec parser
  -> Producer a m r         -- ^ Raw input
  -> Producer b m (Either (ParsingError, Producer a m r) r)
parsed parser = go
  where
    go p0 = do
      x <- lift (nextSkipEmpty p0)
      case x of
        Left r       -> return (Right r)
        Right (a,p1) -> step (yield a >>) (_parse parser a) p1
    step diffP res p0 = case res of
      Fail _ c m -> return (Left (ParsingError c m, diffP p0))
      Done a b   -> yield b >> go (yield a >> p0)
      Partial k  -> do
        x <- lift (nextSkipEmpty p0)
        case x of
          Left e -> step diffP (k mempty) (return e)
          Right (a,p1) -> step (diffP . (yield a >>)) (k a) p1
{-# INLINABLE parsed #-}

--------------------------------------------------------------------------------
-- $lengths
-- Like the functions above, but these also provide information about
-- the length of input consumed in order to fully parse each value.
--------------------------------------------------------------------------------

-- | Like 'parse', but also returns the length of input consumed to parse the
-- value.
parseL
    :: (Monad m, ParserInput a)
    => Attoparsec.Parser a b                           -- ^ Attoparsec parser
    -> Pipes.Parser a m (Maybe (Either ParsingError (Int, b))) -- ^ Pipes parser
parseL parser = S.StateT $ \p0 -> do
    x <- nextSkipEmpty p0
    case x of
      Left r       -> return (Nothing, return r)
      Right (a,p1) -> step (yield a >>) (_parse parser a) p1 (_length a)
  where
    step diffP res p0 !len = case res of
      Fail _ c m -> return (Just (Left (ParsingError c m)), diffP p0)
      Done a b   -> return (Just (Right (len - _length a, b)), yield a >> p0)
      Partial k  -> do
        x <- nextSkipEmpty p0
        case x of
          Left e -> step diffP (k mempty) (return e) len
          Right (a,p1) -> step (diffP . (yield a >>)) (k a) p1 (len + _length a)
{-# INLINABLE parseL #-}


-- | Like 'parsed', except this tags each parsed value with the length of input
-- consumed to parse the value.
parsedL
    :: (Monad m, ParserInput a)
    => Attoparsec.Parser a b    -- ^ Attoparsec parser
    -> Producer a m r           -- ^ Raw input
    -> Producer (Int, b) m (Either (ParsingError, Producer a m r) r)
parsedL parser = go
  where
    go p0 = do
      x <- lift (nextSkipEmpty p0)
      case x of
        Left r       -> return (Right r)
        Right (a,p1) -> step (yield a >>) (_parse parser a) p1 (_length a)
    step diffP res p0 !len = case res of
      Fail _ c m -> return (Left (ParsingError c m, diffP p0))
      Done a b   -> yield (len - _length a, b) >> go (yield a >> p0)
      Partial k  -> do
        x <- lift (nextSkipEmpty p0)
        case x of
          Left e -> step diffP (k mempty) (return e) len
          Right (a,p1) -> step (diffP . (yield a >>)) (k a) p1 (len + _length a)
{-# INLINABLE parsedL #-}

--------------------------------------------------------------------------------

-- | Like 'Pipes.Parse.isEndOfInput', except that it also consumes and discards
-- leading empty chunks.
isEndOfParserInput :: (Monad m, ParserInput a) => Pipes.Parser a m Bool
isEndOfParserInput = S.StateT $ \p0 -> do
    x <- nextSkipEmpty p0
    case x of
       Left r        -> return (True,  return r)
       Right (a, p1) -> return (False, yield a >> p1)
{-# INLINABLE isEndOfParserInput #-}

--------------------------------------------------------------------------------

-- | A class for valid @attoparsec@ input types
class (Eq a, Monoid a) => ParserInput a where
    _parse  :: Attoparsec.Parser a b -> a -> IResult a b
    _length :: a -> Int

-- | Strict 'ByteString'.
instance ParserInput ByteString where
    _parse  = Data.Attoparsec.ByteString.parse
    {-# INLINE _parse #-}
    _length = Data.ByteString.length
    {-# INLINE _length #-}

-- | Strict 'Text'.
instance ParserInput Text where
    _parse  = Data.Attoparsec.Text.parse
    {-# INLINE _parse #-}
    _length = Data.Text.length
    {-# INLINE _length #-}

--------------------------------------------------------------------------------

-- | A parsing error report, as provided by Attoparsec's 'Fail'.
data ParsingError = ParsingError
    { peContexts :: [String]  -- ^ Contexts where the parsing error occurred.
    , peMessage  ::  String   -- ^ Parsing error description message.
    } deriving (Show, Read, Eq, Data, Typeable)

instance Exception ParsingError
instance Error     ParsingError

-- | This instance allows using 'Pipes.Lift.errorP' with 'parsed' and 'parsedL'
instance Error (ParsingError, Producer a m r)

--------------------------------------------------------------------------------
-- Internal stuff

-- | Like 'Pipes.next', except it skips leading 'mempty' chunks.
nextSkipEmpty
  :: (Monad m, Eq a, Monoid a)
  => Producer a m r
  -> m (Either r (a, Producer a m r))
nextSkipEmpty = go where
    go p0 = do
      x <- next p0
      case x of
         Left  _        -> return x
         Right (a,p1)
          | a == mempty -> go p1
          | otherwise   -> return x
{-# INLINABLE nextSkipEmpty #-}