{-# LANGUAGE Haskell2010 #-} -- | -- Module : Data.Picoparsec -- Copyright : Bryan O'Sullivan 2007-2011, Mario Blažević 2014 -- License : BSD3 -- -- Maintainer : Mario Blažević -- Stability : experimental -- Portability : unknown -- -- Simple, efficient combinator parsing for -- 'Data.Monoid.Cancellative.LeftGCDMonoid' and -- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on -- Parsec and derived from Attoparsec. module Data.Picoparsec ( -- * Differences from Parsec -- $parsec -- * Differences from Attoparsec -- $attoparsec -- * Incremental input -- $incremental -- * Performance considerations -- $performance -- * Parser types I.Parser , Result , T.IResult(..) , I.compareResults -- * Running parsers , parse , feed , I.parseOnly , parseWith , parseTest -- ** Result conversion , maybeResult , eitherResult -- * Parsing individual tokens , I.anyToken , I.notToken , I.peekToken , I.satisfy , I.satisfyWith , I.skip -- ** Parsing individual characters , I.anyChar , I.char , I.notChar , I.peekChar , I.peekChar' , I.satisfyChar -- * Efficient string handling , I.scan , I.string , I.skipWhile , I.take , I.takeWhile , I.takeWhile1 , I.takeTill -- ** Efficient character string handling , I.scanChars , I.skipCharsWhile , I.takeCharsWhile , I.takeCharsWhile1 , I.takeCharsTill , I.takeTillChar , I.takeTillChar1 -- ** Consume all remaining input , I.takeRest -- * Text parsing , I.endOfLine -- * Combinators , try , (<?>) , choice , count , option , many' , many1 , many1' , manyTill , manyTill' , sepBy , sepBy' , sepBy1 , sepBy1' , skipMany , skipMany1 , eitherP -- * State observation and manipulation functions , I.endOfInput , I.atEnd ) where import Data.Monoid (Monoid) import Data.Picoparsec.Combinator import qualified Data.Picoparsec.Monoid.Internal as I import qualified Data.Picoparsec.Internal as I import Data.Picoparsec.Monoid.Internal (Result, parse) import qualified Data.Picoparsec.Internal.Types as T -- $parsec -- -- Compared to Parsec 3, Picoparsec makes several tradeoffs. It is not intended for, or ideal for, all possible uses. -- -- * While Picoparsec can consume input incrementally, Parsec cannot. Incremental input is a huge deal for efficient -- and secure network and system programming, since it gives much more control to users of the library over matters such -- as resource usage and the I/O model to use. -- -- * Much of the performance advantage of Picoparsec is gained via high-performance parsers such as 'I.takeWhile' and -- 'I.string'. If you use complicated combinators that return lists of bytes or characters, there is less performance -- difference between the two libraries. -- -- * Unlike Parsec 3, Picoparsec does not support being used as a monad transformer. -- -- * Parsec parsers can produce more helpful error messages than Picoparsec parsers. This is a matter of focus: -- Picoparsec avoids the extra book-keeping in favour of higher performance. -- -- * Parsec comes with built-in support for user state. Picoparsec does not maintain any state by default, in order to -- maximize performance. If your parsing logic needs depends on it, you can track the state by wrapping your input in a -- 'Stateful' monoid. -- $attoparsec -- -- Compared to Attoparsec, Picoparsec trades away some performance for generality. Attoparsec works only with -- 'ByteString' and 'Text' inputs. If your input type is one of these two, Attoparsec is the better choice. Use -- Picoparsec if you want your parser to be applicable to a different input type, especially if you wish to leave the -- choice of that input type to the end user. -- -- Some Attoparsec primitives like 'word8' are missing because they are specific to ByteString inputs. Picoparsec is -- otherwise largely compatible with Attoparsec, having copied from it both the core logic and the full set of parsing -- combinators. -- $incremental -- -- Picoparsec supports incremental input, meaning that you can feed it a chunk of input that represents only part of the -- expected total amount of data to parse. If your parser reaches the end of a fragment of input and could consume more -- input, it will suspend parsing and return a 'T.Partial' continuation. -- -- Supplying the 'T.Partial' continuation with another string will resume parsing at the point where it was -- suspended. You must be prepared for the result of the resumed parse to be another 'T.Partial' continuation. -- -- To indicate that you have no more input, supply the 'T.Partial' continuation with an empty string. -- -- Remember that some parsing combinators will not return a result until they reach the end of input. They may thus -- cause 'T.Partial' results to be returned. -- -- If you do not need support for incremental input, consider using the 'I.parseOnly' function to run your parser. It -- will never prompt for more input. -- $performance -- -- A Picoparsec-based parser applied to a strict ByteString or Text input will generally be somewhat slower than -- Attoparsec, but if properly optimized and specialized the difference should be less than 50%. -- -- To actually achieve high performance, there are a few guidelines that it is useful to follow. -- -- * Use the input-returning parsers whenever possible, e.g. 'I.takeWhile1' instead of 'many1' 'I.anyToken'. There is a -- large difference in performance between the two kinds of parsers. -- -- * If you are parsing textual inputs, use the specialized character parsers; e.g. 'I.takeCharsWhile1' instead of -- 'I.takeWhile1'. -- -- * If the 'mappend' operation is slow for the input monoid type, it may drastically slow down the parsing of large -- inputs. Try wrapping the input with the 'Concat' newtype to make the 'mappend' time constant. -- -- * Use the INLINE, INLINABLE, and SPECIALIZE pragmas to optimize the more important parts of your parser for the likely -- input types. -- -- * Make active use of benchmarking and profiling tools to measure, find the problems with, and improve the performance -- of your parser. -- | Run a parser and print its result to standard output. parseTest :: (Monoid t, Show t, Show a) => I.Parser t a -> t -> IO () parseTest p s = print (parse p s) -- | Run a parser with an initial input string, and a monadic action -- that can supply more input if needed. parseWith :: (Monoid t, Monad m) => m t -- ^ An action that will be executed to provide the parser -- with more input, if necessary. The action must return an -- 'mempty' string when there is no more input available. -> I.Parser t a -> t -- ^ Initial input for the parser. -> m (Result t a) parseWith refill p s = step $ parse p s where step (T.Partial k) = (step . k) =<< refill step r = return r {-# INLINE parseWith #-} -- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result -- is treated as failure. maybeResult :: Result t r -> Maybe r maybeResult (T.Done _ r) = Just r maybeResult _ = Nothing -- | Convert a 'Result' value to an 'Either' value. A 'T.Partial' -- result is treated as failure. eitherResult :: Result t r -> Either String r eitherResult (T.Done _ r) = Right r eitherResult (T.Fail _ _ msg) = Left msg eitherResult _ = Left "Result: incomplete input"