{-# LANGUAGE PackageImports #-}
module Data.String.Interpolate.Parse
( InterpSegment(..), parseInterpSegments, dosToUnix )
where
import Data.Char
import qualified "base" Numeric as N
data InterpSegment
= Expression String
| Verbatim String
| Newline
| Spaces Int
| Tabs Int
deriving (Eq, Show)
parseInterpSegments :: String -> Either String [InterpSegment]
parseInterpSegments = switch
where switch :: String -> Either String [InterpSegment]
switch "" = pure []
switch ('#':'{':rest) = expr rest
switch ('#':_) = Left "unescaped # symbol without interpolation brackets"
switch ('\n':rest) = newline rest
switch (' ':rest) = spaces 1 rest
switch ('\t':rest) = tabs 1 rest
switch other = verbatim "" other
verbatim :: String -> String -> Either String [InterpSegment]
verbatim acc parsee = case parsee of
"" ->
((Verbatim . reverse) acc :) <$> switch parsee
(c:_) | c `elem` ['#', ' ', '\t', '\n'] ->
((Verbatim . reverse) acc :) <$> switch parsee
('\\':'#':rest) ->
verbatim ('#':acc) rest
('\\':_) -> case unescapeChar parsee of
(FoundChar c, rest) -> verbatim (c:acc) rest
(EscapeEmpty, rest) -> verbatim acc rest
(EscapeUnterminated, _) -> Left "unterminated backslash escape at end of string"
(UnknownEscape esc, _) -> Left ("unknown escape character: " ++ [esc])
c:cs ->
verbatim (c:acc) cs
expr :: String -> Either String [InterpSegment]
expr parsee = case span (/= '}') parsee of
(_, "") -> Left "unterminated #{...} interpolation"
(expr, _:rest) -> (Expression expr :) <$> switch rest
newline :: String -> Either String [InterpSegment]
newline parsee = (Newline :) <$> switch parsee
spaces :: Int -> String -> Either String [InterpSegment]
spaces n (' ':rest) = spaces (n+1) rest
spaces n other = (Spaces n :) <$> switch other
tabs :: Int -> String -> Either String [InterpSegment]
tabs n ('\t':rest) = tabs (n+1) rest
tabs n other = (Tabs n :) <$> switch other
dosToUnix :: String -> String
dosToUnix = go
where go xs = case xs of
'\r' : '\n' : ys -> '\n' : go ys
y : ys -> y : go ys
[] -> []
data EscapeResult
= FoundChar Char
| EscapeEmpty
| EscapeUnterminated
| UnknownEscape Char
unescapeChar :: String -> (EscapeResult, String)
unescapeChar input = case input of
"" -> (EscapeEmpty, input)
'\\' : 'x' : x : xs | isHexDigit x -> case span isHexDigit xs of
(ys, zs) -> ((FoundChar . chr . readHex $ x:ys), zs)
'\\' : 'o' : x : xs | isOctDigit x -> case span isOctDigit xs of
(ys, zs) -> ((FoundChar . chr . readOct $ x:ys), zs)
'\\' : x : xs | isDigit x -> case span isDigit xs of
(ys, zs) -> ((FoundChar . chr . read $ x:ys), zs)
'\\' : input_ -> case input_ of
'\\' : xs -> (FoundChar ('\\'), xs)
'a' : xs -> (FoundChar ('\a'), xs)
'b' : xs -> (FoundChar ('\b'), xs)
'f' : xs -> (FoundChar ('\f'), xs)
'n' : xs -> (FoundChar ('\n'), xs)
'r' : xs -> (FoundChar ('\r'), xs)
't' : xs -> (FoundChar ('\t'), xs)
'v' : xs -> (FoundChar ('\v'), xs)
'&' : xs -> (EscapeEmpty, xs)
'N':'U':'L' : xs -> (FoundChar ('\NUL'), xs)
'S':'O':'H' : xs -> (FoundChar ('\SOH'), xs)
'S':'T':'X' : xs -> (FoundChar ('\STX'), xs)
'E':'T':'X' : xs -> (FoundChar ('\ETX'), xs)
'E':'O':'T' : xs -> (FoundChar ('\EOT'), xs)
'E':'N':'Q' : xs -> (FoundChar ('\ENQ'), xs)
'A':'C':'K' : xs -> (FoundChar ('\ACK'), xs)
'B':'E':'L' : xs -> (FoundChar ('\BEL'), xs)
'B':'S' : xs -> (FoundChar ('\BS'), xs)
'H':'T' : xs -> (FoundChar ('\HT'), xs)
'L':'F' : xs -> (FoundChar ('\LF'), xs)
'V':'T' : xs -> (FoundChar ('\VT'), xs)
'F':'F' : xs -> (FoundChar ('\FF'), xs)
'C':'R' : xs -> (FoundChar ('\CR'), xs)
'S':'O' : xs -> (FoundChar ('\SO'), xs)
'S':'I' : xs -> (FoundChar ('\SI'), xs)
'D':'L':'E' : xs -> (FoundChar ('\DLE'), xs)
'D':'C':'1' : xs -> (FoundChar ('\DC1'), xs)
'D':'C':'2' : xs -> (FoundChar ('\DC2'), xs)
'D':'C':'3' : xs -> (FoundChar ('\DC3'), xs)
'D':'C':'4' : xs -> (FoundChar ('\DC4'), xs)
'N':'A':'K' : xs -> (FoundChar ('\NAK'), xs)
'S':'Y':'N' : xs -> (FoundChar ('\SYN'), xs)
'E':'T':'B' : xs -> (FoundChar ('\ETB'), xs)
'C':'A':'N' : xs -> (FoundChar ('\CAN'), xs)
'E':'M' : xs -> (FoundChar ('\EM'), xs)
'S':'U':'B' : xs -> (FoundChar ('\SUB'), xs)
'E':'S':'C' : xs -> (FoundChar ('\ESC'), xs)
'F':'S' : xs -> (FoundChar ('\FS'), xs)
'G':'S' : xs -> (FoundChar ('\GS'), xs)
'R':'S' : xs -> (FoundChar ('\RS'), xs)
'U':'S' : xs -> (FoundChar ('\US'), xs)
'S':'P' : xs -> (FoundChar ('\SP'), xs)
'D':'E':'L' : xs -> (FoundChar ('\DEL'), xs)
'^':'@' : xs -> (FoundChar ('\^@'), xs)
'^':'A' : xs -> (FoundChar ('\^A'), xs)
'^':'B' : xs -> (FoundChar ('\^B'), xs)
'^':'C' : xs -> (FoundChar ('\^C'), xs)
'^':'D' : xs -> (FoundChar ('\^D'), xs)
'^':'E' : xs -> (FoundChar ('\^E'), xs)
'^':'F' : xs -> (FoundChar ('\^F'), xs)
'^':'G' : xs -> (FoundChar ('\^G'), xs)
'^':'H' : xs -> (FoundChar ('\^H'), xs)
'^':'I' : xs -> (FoundChar ('\^I'), xs)
'^':'J' : xs -> (FoundChar ('\^J'), xs)
'^':'K' : xs -> (FoundChar ('\^K'), xs)
'^':'L' : xs -> (FoundChar ('\^L'), xs)
'^':'M' : xs -> (FoundChar ('\^M'), xs)
'^':'N' : xs -> (FoundChar ('\^N'), xs)
'^':'O' : xs -> (FoundChar ('\^O'), xs)
'^':'P' : xs -> (FoundChar ('\^P'), xs)
'^':'Q' : xs -> (FoundChar ('\^Q'), xs)
'^':'R' : xs -> (FoundChar ('\^R'), xs)
'^':'S' : xs -> (FoundChar ('\^S'), xs)
'^':'T' : xs -> (FoundChar ('\^T'), xs)
'^':'U' : xs -> (FoundChar ('\^U'), xs)
'^':'V' : xs -> (FoundChar ('\^V'), xs)
'^':'W' : xs -> (FoundChar ('\^W'), xs)
'^':'X' : xs -> (FoundChar ('\^X'), xs)
'^':'Y' : xs -> (FoundChar ('\^Y'), xs)
'^':'Z' : xs -> (FoundChar ('\^Z'), xs)
'^':'[' : xs -> (FoundChar ('\^['), xs)
'^':'\\' : xs -> (FoundChar ('\^\'), xs)
'^':']' : xs -> (FoundChar ('\^]'), xs)
'^':'^' : xs -> (FoundChar ('\^^'), xs)
'^':'_' : xs -> (FoundChar ('\^_'), xs)
x:xs -> (UnknownEscape x, xs)
"" -> (EscapeUnterminated, "")
x:xs -> (FoundChar x, xs)
where readHex :: String -> Int
readHex xs = case N.readHex xs of
[(n, "")] -> n
_ -> error "Data.String.Interpolate.Util.readHex: no parse"
readOct :: String -> Int
readOct xs = case N.readOct xs of
[(n, "")] -> n
_ -> error "Data.String.Interpolate.Util.readHex: no parse"