{-# LANGUAGE PackageImports #-}
module Data.String.Interpolate.Parse
( InterpSegment(..), parseInterpSegments, dosToUnix )
where
import Data.Char
import qualified "base" Numeric as N
data InterpSegment
= Expression String
| Verbatim String
| Newline
| Spaces Int
| Tabs Int
deriving (Eq, Show)
parseInterpSegments :: String -> Either String [InterpSegment]
parseInterpSegments = switch
where switch :: String -> Either String [InterpSegment]
switch "" = pure []
switch ('#':'{':rest) = expr rest
switch ('#':rest) = verbatim "#" rest
switch ('\n':rest) = newline rest
switch (' ':rest) = spaces 1 rest
switch ('\t':rest) = tabs 1 rest
switch other = verbatim "" other
verbatim :: String -> String -> Either String [InterpSegment]
verbatim acc parsee = case parsee of
"" ->
((Verbatim . reverse) acc :) <$> switch parsee
(c:_) | c `elem` ['#', ' ', '\t', '\n'] ->
((Verbatim . reverse) acc :) <$> switch parsee
('\\':'#':rest) ->
verbatim ('#':acc) rest
('\\':_) -> case unescapeChar parsee of
(Nothing, rest) -> verbatim acc rest
(Just c, rest) -> verbatim (c:acc) rest
c:cs ->
verbatim (c:acc) cs
expr :: String -> Either String [InterpSegment]
expr parsee = case span (/= '}') parsee of
(_, "") -> Left "unterminated #{...} interpolation"
(expr, _:rest) -> (Expression expr :) <$> switch rest
newline :: String -> Either String [InterpSegment]
newline parsee = (Newline :) <$> switch parsee
spaces :: Int -> String -> Either String [InterpSegment]
spaces n (' ':rest) = spaces (n+1) rest
spaces n other = (Spaces n :) <$> switch other
tabs :: Int -> String -> Either String [InterpSegment]
tabs n ('\t':rest) = tabs (n+1) rest
tabs n other = (Tabs n :) <$> switch other
dosToUnix :: String -> String
dosToUnix = go
where go xs = case xs of
'\r' : '\n' : ys -> '\n' : go ys
y : ys -> y : go ys
[] -> []
unescapeChar :: String -> (Maybe Char, String)
unescapeChar input = case input of
"" -> (Nothing, input)
'\\' : 'x' : x : xs | isHexDigit x -> case span isHexDigit xs of
(ys, zs) -> ((Just . chr . readHex $ x:ys), zs)
'\\' : 'o' : x : xs | isOctDigit x -> case span isOctDigit xs of
(ys, zs) -> ((Just . chr . readOct $ x:ys), zs)
'\\' : x : xs | isDigit x -> case span isDigit xs of
(ys, zs) -> ((Just . chr . read $ x:ys), zs)
'\\' : input_ -> case input_ of
'\\' : xs -> (Just ('\\'), xs)
'a' : xs -> (Just ('\a'), xs)
'b' : xs -> (Just ('\b'), xs)
'f' : xs -> (Just ('\f'), xs)
'n' : xs -> (Just ('\n'), xs)
'r' : xs -> (Just ('\r'), xs)
't' : xs -> (Just ('\t'), xs)
'v' : xs -> (Just ('\v'), xs)
'&' : xs -> (Nothing, xs)
'N':'U':'L' : xs -> (Just ('\NUL'), xs)
'S':'O':'H' : xs -> (Just ('\SOH'), xs)
'S':'T':'X' : xs -> (Just ('\STX'), xs)
'E':'T':'X' : xs -> (Just ('\ETX'), xs)
'E':'O':'T' : xs -> (Just ('\EOT'), xs)
'E':'N':'Q' : xs -> (Just ('\ENQ'), xs)
'A':'C':'K' : xs -> (Just ('\ACK'), xs)
'B':'E':'L' : xs -> (Just ('\BEL'), xs)
'B':'S' : xs -> (Just ('\BS'), xs)
'H':'T' : xs -> (Just ('\HT'), xs)
'L':'F' : xs -> (Just ('\LF'), xs)
'V':'T' : xs -> (Just ('\VT'), xs)
'F':'F' : xs -> (Just ('\FF'), xs)
'C':'R' : xs -> (Just ('\CR'), xs)
'S':'O' : xs -> (Just ('\SO'), xs)
'S':'I' : xs -> (Just ('\SI'), xs)
'D':'L':'E' : xs -> (Just ('\DLE'), xs)
'D':'C':'1' : xs -> (Just ('\DC1'), xs)
'D':'C':'2' : xs -> (Just ('\DC2'), xs)
'D':'C':'3' : xs -> (Just ('\DC3'), xs)
'D':'C':'4' : xs -> (Just ('\DC4'), xs)
'N':'A':'K' : xs -> (Just ('\NAK'), xs)
'S':'Y':'N' : xs -> (Just ('\SYN'), xs)
'E':'T':'B' : xs -> (Just ('\ETB'), xs)
'C':'A':'N' : xs -> (Just ('\CAN'), xs)
'E':'M' : xs -> (Just ('\EM'), xs)
'S':'U':'B' : xs -> (Just ('\SUB'), xs)
'E':'S':'C' : xs -> (Just ('\ESC'), xs)
'F':'S' : xs -> (Just ('\FS'), xs)
'G':'S' : xs -> (Just ('\GS'), xs)
'R':'S' : xs -> (Just ('\RS'), xs)
'U':'S' : xs -> (Just ('\US'), xs)
'S':'P' : xs -> (Just ('\SP'), xs)
'D':'E':'L' : xs -> (Just ('\DEL'), xs)
'^':'@' : xs -> (Just ('\^@'), xs)
'^':'A' : xs -> (Just ('\^A'), xs)
'^':'B' : xs -> (Just ('\^B'), xs)
'^':'C' : xs -> (Just ('\^C'), xs)
'^':'D' : xs -> (Just ('\^D'), xs)
'^':'E' : xs -> (Just ('\^E'), xs)
'^':'F' : xs -> (Just ('\^F'), xs)
'^':'G' : xs -> (Just ('\^G'), xs)
'^':'H' : xs -> (Just ('\^H'), xs)
'^':'I' : xs -> (Just ('\^I'), xs)
'^':'J' : xs -> (Just ('\^J'), xs)
'^':'K' : xs -> (Just ('\^K'), xs)
'^':'L' : xs -> (Just ('\^L'), xs)
'^':'M' : xs -> (Just ('\^M'), xs)
'^':'N' : xs -> (Just ('\^N'), xs)
'^':'O' : xs -> (Just ('\^O'), xs)
'^':'P' : xs -> (Just ('\^P'), xs)
'^':'Q' : xs -> (Just ('\^Q'), xs)
'^':'R' : xs -> (Just ('\^R'), xs)
'^':'S' : xs -> (Just ('\^S'), xs)
'^':'T' : xs -> (Just ('\^T'), xs)
'^':'U' : xs -> (Just ('\^U'), xs)
'^':'V' : xs -> (Just ('\^V'), xs)
'^':'W' : xs -> (Just ('\^W'), xs)
'^':'X' : xs -> (Just ('\^X'), xs)
'^':'Y' : xs -> (Just ('\^Y'), xs)
'^':'Z' : xs -> (Just ('\^Z'), xs)
'^':'[' : xs -> (Just ('\^['), xs)
'^':'\\' : xs -> (Just ('\^\'), xs)
'^':']' : xs -> (Just ('\^]'), xs)
'^':'^' : xs -> (Just ('\^^'), xs)
'^':'_' : xs -> (Just ('\^_'), xs)
xs -> (Nothing, xs)
x:xs -> (Just x, xs)
where readHex :: String -> Int
readHex xs = case N.readHex xs of
[(n, "")] -> n
_ -> error "Data.String.Interpolate.Util.readHex: no parse"
readOct :: String -> Int
readOct xs = case N.readOct xs of
[(n, "")] -> n
_ -> error "Data.String.Interpolate.Util.readHex: no parse"