{-# LANGUAGE OverloadedStrings #-}
module Pencil.Internal.Parser where
import Text.ParserCombinators.Parsec
import qualified Data.List as DL
import qualified Data.Text as T
import qualified Text.Parsec as P
-- Doctest setup.
--
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Either (isLeft)
-- | Pencil's @Page@ AST.
data PNode =
PText T.Text
| PVar T.Text
| PFor T.Text [PNode]
| PIf T.Text [PNode]
| PPartial T.Text
| PPreamble T.Text
-- Signals a If/For expression in the stack waiting for expressions. So that we
-- can find the next unused open if/for-statement in nested if/for-statements.
| PMetaIf T.Text
| PMetaFor T.Text
-- A terminating node that represents the end of the program, to help with AST
-- converstion
| PMetaEnd
deriving (Show, Eq)
-- | Pencil's tokens for content.
data Token =
TokText T.Text
| TokVar T.Text
| TokFor T.Text
| TokIf T.Text
| TokPartial T.Text
| TokPreamble T.Text
| TokEnd
deriving (Show, Eq)
-- | Convert Tokens to PNode AST.
--
-- >>> transform [TokText "hello", TokText "world"]
-- [PText "hello",PText "world"]
--
-- >>> transform [TokIf "title", TokEnd]
-- [PIf "title" []]
--
-- >>> transform [TokIf "title", TokText "hello", TokText "world", TokEnd]
-- [PIf "title" [PText "hello",PText "world"]]
--
-- > ${if(title)}
-- > ${for(posts)}
-- > world
-- > ${end}
-- > ${end}
--
-- >>> transform [TokIf "title", TokFor "posts", TokText "world", TokEnd, TokEnd]
-- [PIf "title" [PFor "posts" [PText "world"]]]
--
-- > begin
-- > now
-- > ${if(title)}
-- > hello
-- > world
-- > ${if(body)}
-- > ${body}
-- > ${someothervar}
-- > wahh
-- > ${end}
-- > final
-- > thing
-- > ${end}
-- > the
-- > lastline
--
-- >>> transform [TokText "begin", TokText "now", TokIf "title", TokText "hello", TokText "world", TokIf "body", TokVar "body", TokVar "someothervar", TokText "wahh", TokEnd, TokText "final", TokText "thing", TokEnd, TokText "the", TokText "lastline"]
-- [PText "begin",PText "now",PIf "title" [PText "hello",PText "world",PIf "body" [PVar "body",PVar "someothervar",PText "wahh"],PText "final",PText "thing"],PText "the",PText "lastline"]
--
-- >
-- > Hello world ${foo}
--
-- >>> transform [TokPreamble "foo: bar\ndo:\n - re\n -me", TokText "Hello world ", TokVar "foo"]
-- [PPreamble "foo: bar\ndo:\n - re\n -me",PText "Hello world ",PVar "foo"]
--
transform :: [Token] -> [PNode]
transform toks =
let stack = ast [] toks
in reverse stack
-- | Converts Tokens, which is just the raw list of parsed tokens, into PNodes
-- which are the tree-structure expressions (i.e. if/for nesting)
--
-- This function works by using a stack to keep track of where we are for nested
-- expressions such as if and for statements. When a token that starts a nesting
-- is found (like a TokIf), a "meta" expression (PMetaIf) is pushed into the
-- stack. When we finally see an end token (TokEnd), we pop all the expressions
-- off the stack until the first meta tag (e.g PMetaIf) is reached. All the
-- expressions popped off are now known to be nested inside that if statement.
--
ast :: [PNode] -- stack
-> [Token] -- remaining
-> [PNode] -- (AST, remaining)
ast stack [] = stack
ast stack (TokText t : toks) = ast (PText t : stack) toks
ast stack (TokVar t : toks) = ast (PVar t : stack) toks
ast stack (TokPartial fp : toks) = ast (PPartial fp : stack) toks
ast stack (TokPreamble t : toks) = ast (PPreamble t : stack) toks
ast stack (TokIf t : toks) = ast (PMetaIf t : stack) toks
ast stack (TokFor t : toks) = ast (PMetaFor t : stack) toks
ast stack (TokEnd : toks) =
let (node, popped, remaining) = popNodes stack
-- ^ Find the last unused if/for statement, and grab all the expressions
-- in-between this TokEnd and the opening if/for keyword.
n = case node of
PMetaIf t -> PIf t popped
PMetaFor t -> PFor t popped
_ -> PMetaEnd
-- Push the statement into the stack
in ast (n : remaining) toks
-- | Pop nodes until we hit a If/For statement.
-- Return pair (constructor found, nodes popped, remaining stack)
popNodes :: [PNode] -> (PNode, [PNode], [PNode])
popNodes = popNodes_ []
-- | Helper for 'popNodes'.
popNodes_ :: [PNode] -> [PNode] -> (PNode, [PNode], [PNode])
popNodes_ popped [] = (PMetaEnd, popped, [])
popNodes_ popped (PMetaIf t : rest) = (PMetaIf t, popped, rest)
popNodes_ popped (PMetaFor t : rest) = (PMetaFor t, popped, rest)
popNodes_ popped (t : rest) = popNodes_ (t : popped) rest
-- | Render nodes as string.
renderNodes :: [PNode] -> T.Text
renderNodes = DL.foldl' (\str n -> (T.append str (renderNode n))) ""
-- | Render node as string.
renderNode :: PNode -> T.Text
renderNode (PText t) = t
renderNode (PVar t) = T.append (T.append "${" t) "}"
renderNode (PFor t nodes) =
let for = T.append (T.append "${for(" t) ")}"
body = renderNodes nodes
end = "${end}"
in T.append (T.append for body) end
renderNode (PIf t nodes) =
let for = T.append (T.append "${if(" t) ")}"
body = renderNodes nodes
end = "${end}"
in T.append (T.append for body) end
renderNode (PPartial file) = T.append (T.append "${partial(" file) ")}"
renderNode (PMetaIf v) = renderNode (PIf v [])
renderNode (PMetaFor v) = renderNode (PFor v [])
renderNode PMetaEnd = ""
renderNode (PPreamble _) = "" -- Don't render the PREAMBLE
-- | Render tokens.
renderTokens :: [Token] -> T.Text
renderTokens = DL.foldl' (\str n -> (T.append str (renderToken n))) ""
-- | Render token.
renderToken :: Token -> T.Text
renderToken (TokText t) = t
renderToken (TokVar t) = T.append (T.append "${" t) "}"
renderToken (TokPartial fp) = T.append (T.append "${partial(\"" fp) "\"}"
renderToken (TokFor t) = T.append (T.append "${for(" t) ")}"
renderToken (TokEnd) = "${end}"
renderToken (TokIf t) = T.append (T.append "${if(" t) ")}"
renderToken (TokPreamble _) = "" -- Hide preamble content
-- | Parse text.
parseText :: T.Text -> Either ParseError [PNode]
parseText text = do
toks <- parse parseEverything (T.unpack "") (T.unpack text)
return $ transform toks
-- | Parse everything.
--
-- >>> parse parseEverything "" "Hello ${man} and ${woman}."
-- Right [TokText "Hello ",TokVar "man",TokText " and ",TokVar "woman",TokText "."]
--
-- >>> parse parseEverything "" "Hello ${man} and ${if(woman)} text here ${end}."
-- Right [TokText "Hello ",TokVar "man",TokText " and ",TokIf "woman",TokText " text here ",TokEnd,TokText "."]
--
-- >>> parse parseEverything "" "Hi ${for(people)} ${name}, ${end} everyone!"
-- Right [TokText "Hi ",TokFor "people",TokText " ",TokVar "name",TokText ", ",TokEnd,TokText " everyone!"]
--
-- >>> parse parseEverything "" "${realvar} $.get(javascript) $$ $$$ $} $( $45.50 $$escape $${escape2} wonderful life! ${truth}"
-- Right [TokVar "realvar",TokText " $.get(javascript) $$ $$$ $} $( $45.50 $$escape ",TokText "${",TokText "escape2} wonderful life! ",TokVar "truth"]
--
-- >>> parse parseEverything "" "waffle house ${lyfe}"
-- Right [TokPreamble " \n foo: bar\ndo:\n - re\n -me\n ",TokText "waffle house ",TokVar "lyfe"]
--
-- >>> parse parseEverything "" "YO ${foo} waffle house ${lyfe}"
-- Right [TokText "YO ",TokVar "foo",TokText " ",TokPreamble " \n ${foo}: bar\ndo:\n - re\n -me\n ",TokText "waffle house ",TokVar "lyfe"]
--
-- This is a degenerate case that we will just allow (for now) to go sideways:
-- >>> parse parseEverything "" "this ${var never closes ${realvar}"
-- Right [TokText "this ",TokVar "var never closes ${realvar"]
--
parseEverything :: Parser [Token]
parseEverything =
-- Note that order matters here. We want "most general" to be last (variable
-- names).
many1 (try parsePreamble
<|> try parseEscape
<|> try parseContent
<|> try parseEnd
<|> try parseFor
<|> try parseIf
<|> try parseEnd
<|> try parsePartial
<|> parseVar)
-- >>> parse parseVar "" "${ffwe} yep"
-- Right (TokVar "ffwe")
--
-- >>> parse parseVar "" "${spaces technically allowed}"
-- Right (TokVar "spaces technically allowed")
--
-- >>> isLeft $ parse parseVar "" "Hello ${name}"
-- True
--
-- >>> isLeft $ parse parseVar "" "${}"
-- True
--
-- | Parse variables.
parseVar :: Parser Token
parseVar = try $ do
_ <- char '$'
_ <- char '{'
varName <- many1 (noneOf "}")
_ <- char '}'
return $ TokVar (T.pack varName)
-- | Parse preamble.
parsePreamble :: Parser Token
parsePreamble = do
_ <- parsePreambleStart
-- "Note the overlapping parsers anyChar and string "-->", and therefore the
-- use of the try combinator."
-- (https://hackage.haskell.org/package/parsec-3.1.11/docs/Text-Parsec.html)
content <- manyTill anyChar (try (string "-->"))
return $ TokPreamble (T.pack content)
-- | Parse the start of a PREAMBLE.
parsePreambleStart :: Parser String
parsePreambleStart = string "