{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Stream
( Stream (..),
VisualStream (..),
TraversableStream (..),
)
where
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
import Data.Char (chr)
import Data.Foldable (foldl', toList)
import Data.Kind (Type)
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.List.NonEmpty as NE
import Data.Maybe (fromMaybe)
import Data.Proxy
import qualified Data.Sequence as S
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Word (Word8)
import Text.Megaparsec.Pos
import Text.Megaparsec.State
class (Ord (Token s), Ord (Tokens s)) => Stream s where
type Token s :: Type
type Tokens s :: Type
tokenToChunk :: Proxy s -> Token s -> Tokens s
tokenToChunk pxy = tokensToChunk pxy . pure
tokensToChunk :: Proxy s -> [Token s] -> Tokens s
chunkToTokens :: Proxy s -> Tokens s -> [Token s]
chunkLength :: Proxy s -> Tokens s -> Int
chunkEmpty :: Proxy s -> Tokens s -> Bool
chunkEmpty pxy ts = chunkLength pxy ts <= 0
take1_ :: s -> Maybe (Token s, s)
takeN_ :: Int -> s -> Maybe (Tokens s, s)
takeWhile_ :: (Token s -> Bool) -> s -> (Tokens s, s)
instance Ord a => Stream [a] where
type Token [a] = a
type Tokens [a] = [a]
tokenToChunk Proxy = pure
tokensToChunk Proxy = id
chunkToTokens Proxy = id
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ [] = Nothing
take1_ (t : ts) = Just (t, ts)
takeN_ n s
| n <= 0 = Just ([], s)
| null s = Nothing
| otherwise = Just (splitAt n s)
takeWhile_ = span
instance Ord a => Stream (S.Seq a) where
type Token (S.Seq a) = a
type Tokens (S.Seq a) = S.Seq a
tokenToChunk Proxy = pure
tokensToChunk Proxy = S.fromList
chunkToTokens Proxy = toList
chunkLength Proxy = length
chunkEmpty Proxy = null
take1_ S.Empty = Nothing
take1_ (t S.:<| ts) = Just (t, ts)
takeN_ n s
| n <= 0 = Just (S.empty, s)
| null s = Nothing
| otherwise = Just (S.splitAt n s)
takeWhile_ = S.spanl
instance Stream B.ByteString where
type Token B.ByteString = Word8
type Tokens B.ByteString = B.ByteString
tokenToChunk Proxy = B.singleton
tokensToChunk Proxy = B.pack
chunkToTokens Proxy = B.unpack
chunkLength Proxy = B.length
chunkEmpty Proxy = B.null
take1_ = B.uncons
takeN_ n s
| n <= 0 = Just (B.empty, s)
| B.null s = Nothing
| otherwise = Just (B.splitAt n s)
takeWhile_ = B.span
instance Stream BL.ByteString where
type Token BL.ByteString = Word8
type Tokens BL.ByteString = BL.ByteString
tokenToChunk Proxy = BL.singleton
tokensToChunk Proxy = BL.pack
chunkToTokens Proxy = BL.unpack
chunkLength Proxy = fromIntegral . BL.length
chunkEmpty Proxy = BL.null
take1_ = BL.uncons
takeN_ n s
| n <= 0 = Just (BL.empty, s)
| BL.null s = Nothing
| otherwise = Just (BL.splitAt (fromIntegral n) s)
takeWhile_ = BL.span
instance Stream T.Text where
type Token T.Text = Char
type Tokens T.Text = T.Text
tokenToChunk Proxy = T.singleton
tokensToChunk Proxy = T.pack
chunkToTokens Proxy = T.unpack
chunkLength Proxy = T.length
chunkEmpty Proxy = T.null
take1_ = T.uncons
takeN_ n s
| n <= 0 = Just (T.empty, s)
| T.null s = Nothing
| otherwise = Just (T.splitAt n s)
takeWhile_ = T.span
instance Stream TL.Text where
type Token TL.Text = Char
type Tokens TL.Text = TL.Text
tokenToChunk Proxy = TL.singleton
tokensToChunk Proxy = TL.pack
chunkToTokens Proxy = TL.unpack
chunkLength Proxy = fromIntegral . TL.length
chunkEmpty Proxy = TL.null
take1_ = TL.uncons
takeN_ n s
| n <= 0 = Just (TL.empty, s)
| TL.null s = Nothing
| otherwise = Just (TL.splitAt (fromIntegral n) s)
takeWhile_ = TL.span
class Stream s => VisualStream s where
showTokens :: Proxy s -> NonEmpty (Token s) -> String
tokensLength :: Proxy s -> NonEmpty (Token s) -> Int
tokensLength Proxy = NE.length
instance VisualStream String where
showTokens Proxy = stringPretty
instance VisualStream B.ByteString where
showTokens Proxy = stringPretty . fmap (chr . fromIntegral)
instance VisualStream BL.ByteString where
showTokens Proxy = stringPretty . fmap (chr . fromIntegral)
instance VisualStream T.Text where
showTokens Proxy = stringPretty
instance VisualStream TL.Text where
showTokens Proxy = stringPretty
class Stream s => TraversableStream s where
{-# MINIMAL reachOffset | reachOffsetNoLine #-}
reachOffset ::
Int ->
PosState s ->
(Maybe String, PosState s)
reachOffset o pst =
(Nothing, reachOffsetNoLine o pst)
reachOffsetNoLine ::
Int ->
PosState s ->
PosState s
reachOffsetNoLine o pst =
snd (reachOffset o pst)
instance TraversableStream String where
reachOffset o pst =
reachOffset' splitAt foldl' id id ('\n', '\t') o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' splitAt foldl' ('\n', '\t') o pst
instance TraversableStream B.ByteString where
reachOffset o pst =
reachOffset' B.splitAt B.foldl' B8.unpack (chr . fromIntegral) (10, 9) o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' B.splitAt B.foldl' (10, 9) o pst
instance TraversableStream BL.ByteString where
reachOffset o pst =
reachOffset' splitAtBL BL.foldl' BL8.unpack (chr . fromIntegral) (10, 9) o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' splitAtBL BL.foldl' (10, 9) o pst
instance TraversableStream T.Text where
reachOffset o pst =
reachOffset' T.splitAt T.foldl' T.unpack id ('\n', '\t') o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' T.splitAt T.foldl' ('\n', '\t') o pst
instance TraversableStream TL.Text where
reachOffset o pst =
reachOffset' splitAtTL TL.foldl' TL.unpack id ('\n', '\t') o pst
reachOffsetNoLine o pst =
reachOffsetNoLine' splitAtTL TL.foldl' ('\n', '\t') o pst
data St = St SourcePos ShowS
reachOffset' ::
forall s.
Stream s =>
(Int -> s -> (Tokens s, s)) ->
(forall b. (b -> Token s -> b) -> b -> Tokens s -> b) ->
(Tokens s -> String) ->
(Token s -> Char) ->
(Token s, Token s) ->
Int ->
PosState s ->
(Maybe String, PosState s)
reachOffset'
splitAt'
foldl''
fromToks
fromTok
(newlineTok, tabTok)
o
PosState {..} =
( Just $ case expandTab pstateTabWidth
. addPrefix
. f
. fromToks
. fst
$ takeWhile_ (/= newlineTok) post of
"" -> "<empty line>"
xs -> xs,
PosState
{ pstateInput = post,
pstateOffset = max pstateOffset o,
pstateSourcePos = spos,
pstateTabWidth = pstateTabWidth,
pstateLinePrefix =
if sameLine
then
pstateLinePrefix ++ f ""
else f ""
}
)
where
addPrefix xs =
if sameLine
then pstateLinePrefix ++ xs
else xs
sameLine = sourceLine spos == sourceLine pstateSourcePos
(pre, post) = splitAt' (o - pstateOffset) pstateInput
St spos f = foldl'' go (St pstateSourcePos id) pre
go (St apos g) ch =
let SourcePos n l c = apos
c' = unPos c
w = unPos pstateTabWidth
in if
| ch == newlineTok ->
St
(SourcePos n (l <> pos1) pos1)
id
| ch == tabTok ->
St
(SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w)))
(g . (fromTok ch :))
| otherwise ->
St
(SourcePos n l (c <> pos1))
(g . (fromTok ch :))
{-# INLINE reachOffset' #-}
reachOffsetNoLine' ::
forall s.
Stream s =>
(Int -> s -> (Tokens s, s)) ->
(forall b. (b -> Token s -> b) -> b -> Tokens s -> b) ->
(Token s, Token s) ->
Int ->
PosState s ->
PosState s
reachOffsetNoLine'
splitAt'
foldl''
(newlineTok, tabTok)
o
PosState {..} =
( PosState
{ pstateInput = post,
pstateOffset = max pstateOffset o,
pstateSourcePos = spos,
pstateTabWidth = pstateTabWidth,
pstateLinePrefix = pstateLinePrefix
}
)
where
spos = foldl'' go pstateSourcePos pre
(pre, post) = splitAt' (o - pstateOffset) pstateInput
go (SourcePos n l c) ch =
let c' = unPos c
w = unPos pstateTabWidth
in if
| ch == newlineTok ->
SourcePos n (l <> pos1) pos1
| ch == tabTok ->
SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w))
| otherwise ->
SourcePos n l (c <> pos1)
{-# INLINE reachOffsetNoLine' #-}
splitAtBL :: Int -> BL.ByteString -> (BL.ByteString, BL.ByteString)
splitAtBL n = BL.splitAt (fromIntegral n)
{-# INLINE splitAtBL #-}
splitAtTL :: Int -> TL.Text -> (TL.Text, TL.Text)
splitAtTL n = TL.splitAt (fromIntegral n)
{-# INLINE splitAtTL #-}
stringPretty :: NonEmpty Char -> String
stringPretty (x :| []) = charPretty x
stringPretty ('\r' :| "\n") = "crlf newline"
stringPretty xs = "\"" <> concatMap f (NE.toList xs) <> "\""
where
f ch =
case charPretty' ch of
Nothing -> [ch]
Just pretty -> "<" <> pretty <> ">"
charPretty :: Char -> String
charPretty ' ' = "space"
charPretty ch = fromMaybe ("'" <> [ch] <> "'") (charPretty' ch)
charPretty' :: Char -> Maybe String
charPretty' = \case
'\NUL' -> Just "null"
'\SOH' -> Just "start of heading"
'\STX' -> Just "start of text"
'\ETX' -> Just "end of text"
'\EOT' -> Just "end of transmission"
'\ENQ' -> Just "enquiry"
'\ACK' -> Just "acknowledge"
'\BEL' -> Just "bell"
'\BS' -> Just "backspace"
'\t' -> Just "tab"
'\n' -> Just "newline"
'\v' -> Just "vertical tab"
'\f' -> Just "form feed"
'\r' -> Just "carriage return"
'\SO' -> Just "shift out"
'\SI' -> Just "shift in"
'\DLE' -> Just "data link escape"
'\DC1' -> Just "device control one"
'\DC2' -> Just "device control two"
'\DC3' -> Just "device control three"
'\DC4' -> Just "device control four"
'\NAK' -> Just "negative acknowledge"
'\SYN' -> Just "synchronous idle"
'\ETB' -> Just "end of transmission block"
'\CAN' -> Just "cancel"
'\EM' -> Just "end of medium"
'\SUB' -> Just "substitute"
'\ESC' -> Just "escape"
'\FS' -> Just "file separator"
'\GS' -> Just "group separator"
'\RS' -> Just "record separator"
'\US' -> Just "unit separator"
'\DEL' -> Just "delete"
'\160' -> Just "non-breaking space"
_ -> Nothing
expandTab ::
Pos ->
String ->
String
expandTab w' = go 0
where
go 0 [] = []
go 0 ('\t' : xs) = go w xs
go 0 (x : xs) = x : go 0 xs
go n xs = ' ' : go (n - 1) xs
w = unPos w'