{-# OPTIONS #-}
module Language.Python.Common.LexerUtils where
import Control.Monad (liftM)
import Data.List (foldl')
import Data.Word (Word8)
import Language.Python.Common.Token as Token
import Language.Python.Common.ParserMonad hiding (location)
import Language.Python.Common.SrcLocation
import Codec.Binary.UTF8.String as UTF8 (encode)
type Byte = Word8
data BO = BOF | BOL
type StartCode = Int
type Action = SrcSpan -> Int -> String -> P Token
lineJoin :: Action
lineJoin span _len _str =
return $ LineJoinToken $ spanStartPoint span
endOfLine :: P Token -> Action
endOfLine lexToken span _len _str = do
setLastEOL $ spanStartPoint span
lexToken
bolEndOfLine :: P Token -> Int -> Action
bolEndOfLine lexToken bol span len inp = do
pushStartCode bol
endOfLine lexToken span len inp
dedentation :: P Token -> Action
dedentation lexToken span _len _str = do
topIndent <- getIndent
case compare (startCol span) topIndent of
EQ -> do popStartCode
lexToken
LT -> do popIndent
return dedentToken
GT -> spanError span "indentation error"
indentation :: P Token -> Int -> BO -> Action
indentation lexToken _dedentCode bo _loc _len [] = do
popStartCode
case bo of
BOF -> lexToken
BOL -> newlineToken
indentation lexToken dedentCode bo span _len _str = do
popStartCode
parenDepth <- getParenStackDepth
if parenDepth > 0
then lexToken
else do
topIndent <- getIndent
case compare (startCol span) topIndent of
EQ -> case bo of
BOF -> lexToken
BOL -> newlineToken
LT -> do pushStartCode dedentCode
newlineToken
GT -> do pushIndent (startCol span)
return indentToken
where
indentToken = IndentToken span
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken mkToken location _ _ = return (mkToken location)
token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token mkToken read location len str
= return $ mkToken location literal (read literal)
where
literal = take len str
endOfFileToken :: Token
endOfFileToken = EOFToken SpanEmpty
dedentToken = DedentToken SpanEmpty
newlineToken :: P Token
newlineToken = do
loc <- getLastEOL
return $ NewlineToken loc
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF _user _inputBeforeToken _tokenLength (_loc, _bs, inputAfterToken)
= null inputAfterToken || nextChar == '\n' || nextChar == '\r'
where
nextChar = head inputAfterToken
notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF _user _inputBeforeToken _tokenLength (_loc, _bs, inputAfterToken)
= not (null inputAfterToken)
delUnderscores :: String -> String
delUnderscores [] = []
delUnderscores ('_':xs) = delUnderscores xs
delUnderscores (x :xs) = x : delUnderscores xs
readBinary :: String -> Integer
readBinary
= toBinary . drop 2
where
toBinary = foldl' acc 0
acc b '0' = 2 * b
acc b '1' = 2 * b + 1
acc _ _ = error "Lexer ensures all digits passed to readBinary are 0 or 1."
readFloat :: String -> Double
readFloat str@('.':cs) = read ('0':readFloatRest str)
readFloat str = read (readFloatRest str)
readFloatRest :: String -> String
readFloatRest [] = []
readFloatRest ['.'] = ".0"
readFloatRest (c:cs) = c : readFloatRest cs
mkString :: (SrcSpan -> String -> Token) -> Action
mkString toToken loc len str = do
return $ toToken loc (take len str)
stringToken :: SrcSpan -> String -> Token
stringToken = StringToken
rawStringToken :: SrcSpan -> String -> Token
rawStringToken = StringToken
byteStringToken :: SrcSpan -> String -> Token
byteStringToken = ByteStringToken
formatStringToken :: SrcSpan -> String -> Token
formatStringToken = StringToken
formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken = StringToken
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken = UnicodeStringToken
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken = ByteStringToken
openParen :: (SrcSpan -> Token) -> Action
openParen mkToken loc _len _str = do
let token = mkToken loc
pushParen token
return token
closeParen :: (SrcSpan -> Token) -> Action
closeParen mkToken loc _len _str = do
let token = mkToken loc
topParen <- getParen
case topParen of
Nothing -> spanError loc err1
Just open -> if matchParen open token
then popParen >> return token
else spanError loc err2
where
err1 = "Lexical error ! unmatched closing paren"
err2 = "Lexical error ! unmatched closing paren"
matchParen :: Token -> Token -> Bool
matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = True
matchParen (LeftBraceToken {}) (RightBraceToken {}) = True
matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = True
matchParen _ _ = False
type AlexInput = (SrcLocation,
[Byte],
String)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = error "alexInputPrevChar not used"
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (loc, [], input)
| null input = Nothing
| otherwise = seq nextLoc (Just (nextChar, (nextLoc, [], rest)))
where
nextChar = head input
rest = tail input
nextLoc = moveChar nextChar loc
alexGetChar (loc, _:_, _) = error "alexGetChar called with non-empty byte buffer"
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte (loc, b:bs, input) = Just (b, (loc, bs, input))
alexGetByte (loc, [], []) = Nothing
alexGetByte (loc, [], nextChar:rest) =
seq nextLoc (Just (byte, (nextLoc, restBytes, rest)))
where
nextLoc = moveChar nextChar loc
byte:restBytes = UTF8.encode [nextChar]
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar '\n' = incLine 1
moveChar '\t' = incTab
moveChar '\r' = id
moveChar _ = incColumn 1
lexicalError :: P a
lexicalError = do
location <- getLocation
c <- liftM head getInput
throwError $ UnexpectedChar c location
readOctNoO :: String -> Integer
readOctNoO (zero:rest) = read (zero:'O':rest)
readOctNoO [] = error "Lexer ensures readOctNoO is never called on an empty string"