module Language.Preprocessor.Cpphs.Tokenise
( linesCpp
, reslash
, tokenise
, WordStyle(..)
, deWordStyle
, parseMacroCall
) where
import Data.Char
import Language.Preprocessor.Cpphs.HashDefine
import Language.Preprocessor.Cpphs.Position
data Mode = Haskell | Cpp
linesCpp :: String -> [String]
linesCpp [] = []
linesCpp (x:xs) | x=='#' = tok Cpp ['#'] xs
| otherwise = tok Haskell [] (x:xs)
where
tok Cpp acc ('\\':'\n':ys) = tok Cpp ('\n':acc) ys
tok _ acc ('\n':'#':ys) = reverse acc: tok Cpp ['#'] ys
tok _ acc ('\n':ys) = reverse acc: tok Haskell [] ys
tok _ acc [] = reverse acc: []
tok mode acc (y:ys) = tok mode (y:acc) ys
reslash :: String -> String
reslash ('\n':xs) = '\\':'\n':reslash xs
reslash (x:xs) = x: reslash xs
reslash [] = []
data SubMode = Any | Pred (Char->Bool) (Posn->String->WordStyle)
| String Char | LineComment | NestComment Int
| CComment | CLineComment
data WordStyle = Ident Posn String | Other String | Cmd (Maybe HashDefine)
deriving (Eq,Show)
other :: Posn -> String -> WordStyle
other _ s = Other s
deWordStyle :: WordStyle -> String
deWordStyle (Ident _ i) = i
deWordStyle (Other i) = i
deWordStyle (Cmd _) = "\n"
tokenise :: Bool -> Bool -> Bool -> Bool -> [(Posn,String)] -> [WordStyle]
tokenise _ _ _ _ [] = []
tokenise stripEol stripComments ansi lang ((pos,str):pos_strs) =
(if lang then haskell else plaintext) Any [] pos pos_strs str
where
haskell :: SubMode -> String -> Posn -> [(Posn,String)]
-> String -> [WordStyle]
haskell Any acc p ls ('\n':'#':xs) = emit acc $
cpp Any haskell [] [] p ls xs
haskell Any acc p ls ('-':'-':xs) = emit acc $
haskell LineComment "--" p ls xs
haskell Any acc p ls ('{':'-':xs) = emit acc $
haskell (NestComment 0) "-{" p ls xs
haskell Any acc p ls ('/':'*':xs)
| stripComments = emit acc $
haskell CComment " " p ls xs
haskell Any acc p ls ('/':'/':xs)
| stripEol = emit acc $
haskell CLineComment " " p ls xs
haskell Any acc p ls ('"':xs) = emit acc $
haskell (String '"') ['"'] p ls xs
haskell Any acc p ls ('\'':'\'':xs) = emit acc $
haskell Any "''" p ls xs
haskell Any acc p ls ('\'':xs@('\\':_)) = emit acc $
haskell (String '\'') "'" p ls xs
haskell Any acc p ls ('\'':x:'\'':xs) = emit acc $
emit ['\'', x, '\''] $
haskell Any [] p ls xs
haskell Any acc p ls ('\'':xs) = emit acc $
haskell Any "'" p ls xs
haskell Any acc p ls (x:xs) | single x = emit acc $ emit [x] $
haskell Any [] p ls xs
haskell Any acc p ls (x:xs) | space x = emit acc $
haskell (Pred space other) [x]
p ls xs
haskell Any acc p ls (x:xs) | symbol x = emit acc $
haskell (Pred symbol other) [x]
p ls xs
haskell Any acc p ls (x:xs) | ident0 x = emit acc $
haskell (Pred ident1 Ident) [x]
p ls xs
haskell Any acc p ls (x:xs) = haskell Any (x:acc) p ls xs
haskell pre@(Pred pred ws) acc p ls (x:xs)
| pred x = haskell pre (x:acc) p ls xs
haskell (Pred _ ws) acc p ls xs = ws p (reverse acc):
haskell Any [] p ls xs
haskell (String c) acc p ls ('\\':x:xs)
| x=='\\' = haskell (String c) ('\\':'\\':acc) p ls xs
| x==c = haskell (String c) (c:'\\':acc) p ls xs
haskell (String c) acc p ls (x:xs)
| x==c = emit (c:acc) $ haskell Any [] p ls xs
| otherwise = haskell (String c) (x:acc) p ls xs
haskell LineComment acc p ls xs@('\n':_) = emit acc $ haskell Any [] p ls xs
haskell LineComment acc p ls (x:xs) = haskell LineComment (x:acc) p ls xs
haskell (NestComment n) acc p ls ('{':'-':xs)
= haskell (NestComment (n+1))
("-{"++acc) p ls xs
haskell (NestComment 0) acc p ls ('-':'}':xs)
= emit ("}-"++acc) $ haskell Any [] p ls xs
haskell (NestComment n) acc p ls ('-':'}':xs)
= haskell (NestComment (n1))
("}-"++acc) p ls xs
haskell (NestComment n) acc p ls (x:xs) = haskell (NestComment n) (x:acc)
p ls xs
haskell CComment acc p ls ('*':'/':xs) = emit (" "++acc) $
haskell Any [] p ls xs
haskell CComment acc p ls (x:xs) = haskell CComment (white x:acc) p ls xs
haskell CLineComment acc p ls xs@('\n':_)= emit acc $ haskell Any [] p ls xs
haskell CLineComment acc p ls (_:xs) = haskell CLineComment (' ':acc)
p ls xs
haskell mode acc _ ((p,l):ls) [] = haskell mode acc p ls ('\n':l)
haskell _ acc _ [] [] = emit acc $ []
cpp :: SubMode -> (SubMode -> String -> Posn -> [(Posn,String)]
-> String -> [WordStyle])
-> String -> [String] -> Posn -> [(Posn,String)]
-> String -> [WordStyle]
cpp mode next word line pos remaining input =
lexcpp mode word line remaining input
where
lexcpp Any w l ls ('/':'*':xs) = lexcpp (NestComment 0) "" (w*/*l) ls xs
lexcpp Any w l ls ('/':'/':xs) = lexcpp LineComment " " (w*/*l) ls xs
lexcpp Any w l ((p,l'):ls) ('\\':[]) = cpp Any next [] ("\n":w*/*l) p ls l'
lexcpp Any w l ls ('\\':'\n':xs) = lexcpp Any [] ("\n":w*/*l) ls xs
lexcpp Any w l ls xs@('\n':_) = Cmd (parseHashDefine ansi
(reverse (w*/*l))):
next Any [] pos ls xs
lexcpp Any w l ls ('"':xs) = lexcpp Any [] ("\"":(w*/*l)) ls xs
lexcpp Any w l ls ('\'':xs) = lexcpp Any [] ("'": (w*/*l)) ls xs
lexcpp Any [] l ls (x:xs)
| ident0 x = lexcpp (Pred ident1 Ident) [x] l ls xs
lexcpp Any w l ls (x:xs)
| single x = lexcpp Any [] ([x]:w*/*l) ls xs
| space x = lexcpp (Pred space other) [x] (w*/*l) ls xs
| symbol x = lexcpp (Pred symbol other) [x] (w*/*l) ls xs
| otherwise = lexcpp Any (x:w) l ls xs
lexcpp pre@(Pred pred _) w l ls (x:xs)
| pred x = lexcpp pre (x:w) l ls xs
lexcpp (Pred _ _) w l ls xs = lexcpp Any [] (w*/*l) ls xs
lexcpp (String c) w l ls ('\\':x:xs)
| x=='\\' = lexcpp (String c) ('\\':'\\':w) l ls xs
| x==c = lexcpp (String c) (c:'\\':w) l ls xs
lexcpp (String c) w l ls (x:xs)
| x==c = lexcpp Any [] ((c:w)*/*l) ls xs
| otherwise = lexcpp (String c) (x:w) l ls xs
lexcpp LineComment w l ((p,l'):ls) ('\\':[])
= cpp LineComment next [] (('\n':w)*/*l) pos ls l'
lexcpp LineComment w l ls ('\\':'\n':xs)
= lexcpp LineComment [] (('\n':w)*/*l) ls xs
lexcpp LineComment w l ls xs@('\n':_) = lexcpp Any w l ls xs
lexcpp LineComment w l ls (_:xs) = lexcpp LineComment (' ':w) l ls xs
lexcpp (NestComment _) w l ls ('*':'/':xs)
= lexcpp Any [] (w*/*l) ls xs
lexcpp (NestComment n) w l ls (x:xs) = lexcpp (NestComment n) (white x:w) l
ls xs
lexcpp mode w l ((p,l'):ls) [] = cpp mode next w l p ls ('\n':l')
lexcpp _ _ _ [] [] = []
plaintext :: SubMode -> String -> Posn -> [(Posn,String)]
-> String -> [WordStyle]
plaintext Any acc p ls ('\n':'#':xs) = emit acc $
cpp Any plaintext [] [] p ls xs
plaintext Any acc p ls ('/':'*':xs)
| stripComments = emit acc $
plaintext CComment " " p ls xs
plaintext Any acc p ls ('/':'/':xs)
| stripEol = emit acc $
plaintext CLineComment " " p ls xs
plaintext Any acc p ls (x:xs) | single x = emit acc $ emit [x] $
plaintext Any [] p ls xs
plaintext Any acc p ls (x:xs) | space x = emit acc $
plaintext (Pred space other) [x]
p ls xs
plaintext Any acc p ls (x:xs) | ident0 x = emit acc $
plaintext (Pred ident1 Ident) [x]
p ls xs
plaintext Any acc p ls (x:xs) = plaintext Any (x:acc) p ls xs
plaintext pre@(Pred pred ws) acc p ls (x:xs)
| pred x = plaintext pre (x:acc) p ls xs
plaintext (Pred _ ws) acc p ls xs = ws p (reverse acc):
plaintext Any [] p ls xs
plaintext CComment acc p ls ('*':'/':xs) = emit (" "++acc) $
plaintext Any [] p ls xs
plaintext CComment acc p ls (x:xs) = plaintext CComment (white x:acc) p ls xs
plaintext CLineComment acc p ls xs@('\n':_)
= emit acc $ plaintext Any [] p ls xs
plaintext CLineComment acc p ls (_:xs)= plaintext CLineComment (' ':acc)
p ls xs
plaintext mode acc _ ((p,l):ls) [] = plaintext mode acc p ls ('\n':l)
plaintext _ acc _ [] [] = emit acc $ []
ident0 x = isAlpha x || x `elem` "_`"
ident1 x = isAlphaNum x || x `elem` "'_`"
symbol x = x `elem` ":!#$%&*+./<=>?@\\^|-~"
single x = x `elem` "(),[];{}"
space x = x `elem` " \t"
white '\n' = '\n'
white '\r' = '\r'
white _ = ' '
emit "" = id
emit xs = (Other (reverse xs):)
"" */* l = l
w */* l = reverse w : l
parseMacroCall :: Posn -> [WordStyle] -> Maybe ([[WordStyle]],[WordStyle])
parseMacroCall p = call . skip
where
skip (Other x:xs) | all isSpace x = skip xs
skip xss = xss
call (Other "(":xs) = (args (0::Int) [] [] . skip) xs
call _ = Nothing
args 0 w acc ( Other ")" :xs) = Just (reverse (addone w acc), xs)
args 0 w acc ( Other "," :xs) = args 0 [] (addone w acc) (skip xs)
args n w acc (x@(Other "("):xs) = args (n+1) (x:w) acc xs
args n w acc (x@(Other ")"):xs) = args (n1) (x:w) acc xs
args n w acc ( Ident _ v :xs) = args n (Ident p v:w) acc xs
args n w acc (x@(Other _) :xs) = args n (x:w) acc xs
args _ _ _ _ = Nothing
addone w acc = reverse (skip w): acc