{-# LANGUAGE Trustworthy #-} {-# LANGUAGE NoImplicitPrelude #-} ----------------------------------------------------------------------------- -- | -- Module : GHC.Internal.Text.Read.Lex -- Copyright : (c) The University of Glasgow 2002 -- License : BSD-style (see the file libraries/base/LICENSE) -- -- Maintainer : libraries@haskell.org -- Stability : provisional -- Portability : non-portable (uses Text.ParserCombinators.ReadP) -- -- The cut-down Haskell lexer, used by GHC.Internal.Text.Read -- ----------------------------------------------------------------------------- module Text.Read.Lex -- lexing types ( Lexeme(..), Number , numberToInteger, numberToFixed, numberToRational, numberToRangedRational -- lexer , lex, expect , hsLex , lexChar , readBinP , readIntP , readOctP , readDecP , readHexP , isSymbolChar ) where import Prelude() import Control.Error import Control.Monad import Data.Char import Data.Bool import Data.Bounded import Data.Eq import Data.Function import Data.Int import Data.Integer import Data.Integral import Data.List import Data.Maybe import Data.Num import Data.Ord import Data.Ratio import Data.Tuple import Text.Show import Text.ParserCombinators.ReadP {- import Control.Monad import Data.Enum import Data.List import Data.Maybe import Data.Real -} -- ----------------------------------------------------------------------------- -- Lexing types -- ^ Haskell lexemes. data Lexeme = Char Char -- ^ Character literal | String String -- ^ String literal, with escapes interpreted | Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@ | Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@ | Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@ | Number Number -- ^ @since base-4.6.0.0 | EOF deriving ( Eq -- ^ @since base-2.01 , Show -- ^ @since base-2.01 ) -- | @since base-4.6.0.0 data Number = MkNumber Int -- Base Digits -- Integral part | MkDecimal Digits -- Integral part (Maybe Digits) -- Fractional part (Maybe Integer) -- Exponent deriving ( Eq -- ^ @since base-4.6.0.0 , Show -- ^ @since base-4.6.0.0 ) -- | @since base-4.5.1.0 numberToInteger :: Number -> Maybe Integer numberToInteger (MkNumber base iPart) = Just (val (fromIntegral base) iPart) numberToInteger (MkDecimal iPart Nothing Nothing) = Just (val 10 iPart) numberToInteger _ = Nothing -- | @since base-4.7.0.0 numberToFixed :: Integer -> Number -> Maybe (Integer, Integer) numberToFixed _ (MkNumber base iPart) = Just (val (fromIntegral base) iPart, 0) numberToFixed _ (MkDecimal iPart Nothing Nothing) = Just (val 10 iPart, 0) numberToFixed p (MkDecimal iPart (Just fPart) Nothing) = let i = val 10 iPart f = val 10 (integerTake p (fPart ++ repeat 0)) -- Sigh, we really want genericTake, but that's above us in -- the hierarchy, so we define our own version here (actually -- specialised to Integer) integerTake :: Integer -> [a] -> [a] integerTake n _ | n <= 0 = [] integerTake _ [] = [] integerTake n (x:xs) = x : integerTake (n-1) xs in Just (i, f) numberToFixed _ _ = Nothing -- This takes a floatRange, and if the Rational would be outside of -- the floatRange then it may return Nothing. Not that it will not -- /necessarily/ return Nothing, but it is good enough to fix the -- space problems in #5688 -- Ways this is conservative: -- * the floatRange is in base 2, but we pretend it is in base 10 -- * we pad the floatRange a bit, just in case it is very small -- and we would otherwise hit an edge case -- * We only worry about numbers that have an exponent. If they don't -- have an exponent then the Rational won't be much larger than the -- Number, so there is no problem -- | @since base-4.5.1.0 numberToRangedRational :: (Int, Int) -> Number -> Maybe Rational -- Nothing = Inf numberToRangedRational (neg, pos) n@(MkDecimal iPart mFPart (Just exp)) -- Calculate amount to increase/decrease the exponent, based on (non -- leading zero) places in the iPart, or leading zeros in the fPart. -- If iPart and fPart are all zeros, return Nothing. = let mFirstDigit = case dropWhile (0 ==) iPart of iPart'@(_ : _) -> Just (length iPart') [] -> case mFPart of Nothing -> Nothing Just fPart -> case span (0 ==) fPart of (_, []) -> Nothing (zeroes, _) -> Just (negate (length zeroes)) in case mFirstDigit of Nothing -> Just 0 Just firstDigit -> -- compare exp to bounds as Integer to avoid over/underflow let firstDigit' = toInteger firstDigit + exp in if firstDigit' > toInteger (pos + 3) then Nothing else if firstDigit' < toInteger (neg - 3) then Just 0 else Just (numberToRational n) numberToRangedRational _ n = Just (numberToRational n) -- | @since base-4.6.0.0 numberToRational :: Number -> Rational numberToRational (MkNumber base iPart) = val (fromIntegral base) iPart % 1 numberToRational (MkDecimal iPart mFPart mExp) = let i = val 10 iPart in case (mFPart, mExp) of (Nothing, Nothing) -> i % 1 (Nothing, Just exp) | exp >= 0 -> (i * (10 ^ exp)) % 1 | otherwise -> i % (10 ^ (- exp)) (Just fPart, Nothing) -> fracExp 0 i fPart (Just fPart, Just exp) -> fracExp exp i fPart -- fracExp is a bit more efficient in calculating the Rational. -- Instead of calculating the fractional part alone, then -- adding the integral part and finally multiplying with -- 10 ^ exp if an exponent was given, do it all at once. -- ----------------------------------------------------------------------------- -- Lexing lex :: ReadP Lexeme lex = skipSpaces >> lexToken -- | @since base-4.7.0.0 expect :: Lexeme -> ReadP () expect lexeme = do { skipSpaces ; thing <- lexToken ; if thing == lexeme then return () else pfail } hsLex :: ReadP String -- ^ Haskell lexer: returns the lexed string, rather than the lexeme hsLex = do skipSpaces (s,_) <- gather lexToken return s lexToken :: ReadP Lexeme lexToken = lexEOF +++ lexLitChar +++ lexString +++ lexPunc +++ lexSymbol +++ lexId +++ lexNumber -- ---------------------------------------------------------------------- -- End of file lexEOF :: ReadP Lexeme lexEOF = do s <- look guard (null s) return EOF -- --------------------------------------------------------------------------- -- Single character lexemes lexPunc :: ReadP Lexeme lexPunc = do c <- satisfy isPuncChar return (Punc [c]) -- | The @special@ character class as defined in the Haskell Report. isPuncChar :: Char -> Bool isPuncChar c = c `elem` ",;()[]{}`" -- ---------------------------------------------------------------------- -- Symbols lexSymbol :: ReadP Lexeme lexSymbol = do s <- munch1 isSymbolChar if s `elem` reserved_ops then return (Punc s) -- Reserved-ops count as punctuation else return (Symbol s) where reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"] :: [String] {- isSymbolChar :: Char -> Bool isSymbolChar c = not (isPuncChar c) && case generalCategory c of MathSymbol -> True CurrencySymbol -> True ModifierSymbol -> True OtherSymbol -> True DashPunctuation -> True OtherPunctuation -> not (c `elem` "'\"") ConnectorPunctuation -> c /= '_' _ -> False -} isSymbolChar :: Char -> Bool isSymbolChar c = c `elem` ("!@#$%&?+./<=>?\\^|:-~"::String) -- ---------------------------------------------------------------------- -- identifiers lexId :: ReadP Lexeme lexId = do c <- satisfy isIdsChar s <- munch isIdfChar return (Ident (c:s)) where -- Identifiers can start with a '_' isIdsChar c = isAlpha c || c == '_' isIdfChar c = isAlphaNum c || c `elem` "_'" -- --------------------------------------------------------------------------- -- Lexing character literals lexLitChar :: ReadP Lexeme lexLitChar = do _ <- char '\'' (c,esc) <- lexCharE guard (esc || c /= '\'') -- Eliminate '' possibility _ <- char '\'' return (Char c) lexChar :: ReadP Char lexChar = do { (c,_) <- lexCharE; consumeEmpties; return c } where -- Consumes the string "\&" repeatedly and greedily (will only produce one match) consumeEmpties :: ReadP () consumeEmpties = do rest <- look case rest of ('\\':'&':_) -> string "\\&" >> consumeEmpties _ -> return () lexCharE :: ReadP (Char, Bool) -- "escaped or not"? lexCharE = do c1 <- get if c1 == '\\' then do c2 <- lexEsc; return (c2, True) else return (c1, False) where lexEsc = lexEscChar +++ lexNumeric +++ lexCntrlChar +++ lexAscii lexEscChar = do c <- get case c of 'a' -> return '\a' 'b' -> return '\b' 'f' -> return '\f' 'n' -> return '\n' 'r' -> return '\r' 't' -> return '\t' 'v' -> return '\v' '\\' -> return '\\' '\"' -> return '\"' '\'' -> return '\'' _ -> pfail lexNumeric = do base <- lexBaseChar <++ return 10 n <- lexInteger base guard (n <= toInteger (ord maxBound)) return (chr (fromInteger n)) lexCntrlChar = do _ <- char '^' c <- get case c of '@' -> return '\^@' 'A' -> return '\^A' 'B' -> return '\^B' 'C' -> return '\^C' 'D' -> return '\^D' 'E' -> return '\^E' 'F' -> return '\^F' 'G' -> return '\^G' 'H' -> return '\^H' 'I' -> return '\^I' 'J' -> return '\^J' 'K' -> return '\^K' 'L' -> return '\^L' 'M' -> return '\^M' 'N' -> return '\^N' 'O' -> return '\^O' 'P' -> return '\^P' 'Q' -> return '\^Q' 'R' -> return '\^R' 'S' -> return '\^S' 'T' -> return '\^T' 'U' -> return '\^U' 'V' -> return '\^V' 'W' -> return '\^W' 'X' -> return '\^X' 'Y' -> return '\^Y' 'Z' -> return '\^Z' '[' -> return '\^[' '\\' -> return '\^\' ']' -> return '\^]' '^' -> return '\^^' '_' -> return '\^_' _ -> pfail lexAscii = choice [ (string "SOH" >> return '\SOH') <++ (string "SO" >> return '\SO') -- \SO and \SOH need maximal-munch treatment -- See the Haskell report Sect 2.6 , string "NUL" >> return '\NUL' , string "STX" >> return '\STX' , string "ETX" >> return '\ETX' , string "EOT" >> return '\EOT' , string "ENQ" >> return '\ENQ' , string "ACK" >> return '\ACK' , string "BEL" >> return '\BEL' , string "BS" >> return '\BS' , string "HT" >> return '\HT' , string "LF" >> return '\LF' , string "VT" >> return '\VT' , string "FF" >> return '\FF' , string "CR" >> return '\CR' , string "SI" >> return '\SI' , string "DLE" >> return '\DLE' , string "DC1" >> return '\DC1' , string "DC2" >> return '\DC2' , string "DC3" >> return '\DC3' , string "DC4" >> return '\DC4' , string "NAK" >> return '\NAK' , string "SYN" >> return '\SYN' , string "ETB" >> return '\ETB' , string "CAN" >> return '\CAN' , string "EM" >> return '\EM' , string "SUB" >> return '\SUB' , string "ESC" >> return '\ESC' , string "FS" >> return '\FS' , string "GS" >> return '\GS' , string "RS" >> return '\RS' , string "US" >> return '\US' , string "SP" >> return '\SP' , string "DEL" >> return '\DEL' ] -- --------------------------------------------------------------------------- -- string literal lexString :: ReadP Lexeme lexString = do _ <- char '"' body id where body f = do (c,esc) <- lexStrItem if c /= '"' || esc then body (f.(c:)) else let s = f "" in return (String s) lexStrItem = (lexEmpty >> lexStrItem) +++ lexCharE lexEmpty = do _ <- char '\\' c <- get case c of '&' -> return () _ | isSpace c -> do skipSpaces; _ <- char '\\'; return () _ -> pfail -- --------------------------------------------------------------------------- -- Lexing numbers type Base = Int type Digits = [Int] lexNumber :: ReadP Lexeme lexNumber = lexHexOct <++ -- First try for hex or octal 0x, 0o etc -- If that fails, try for a decimal number lexDecNumber -- Start with ordinary digits lexHexOct :: ReadP Lexeme lexHexOct = do _ <- char '0' base <- lexBaseChar digits <- lexDigits base return (Number (MkNumber base digits)) lexBaseChar :: ReadP Int -- Lex a single character indicating the base; fail if not there lexBaseChar = do c <- get case c of 'b' -> return 2 'B' -> return 2 'o' -> return 8 'O' -> return 8 'x' -> return 16 'X' -> return 16 _ -> pfail lexDecNumber :: ReadP Lexeme lexDecNumber = do xs <- lexDigits 10 mFrac <- lexFrac <++ return Nothing mExp <- lexExp <++ return Nothing return (Number (MkDecimal xs mFrac mExp)) lexFrac :: ReadP (Maybe Digits) -- Read the fractional part; fail if it doesn't -- start ".d" where d is a digit lexFrac = do _ <- char '.' fraction <- lexDigits 10 return (Just fraction) lexExp :: ReadP (Maybe Integer) lexExp = do _ <- char 'e' +++ char 'E' exp <- signedExp +++ lexInteger 10 return (Just exp) where signedExp = do c <- char '-' +++ char '+' n <- lexInteger 10 return (if c == '-' then -n else n) lexDigits :: Int -> ReadP Digits -- Lex a non-empty sequence of digits in specified base lexDigits base = do s <- look xs <- scan s id guard (not (null xs)) return xs where scan (c:cs) f = case valDig base c of Just n -> do _ <- get; scan cs (f.(n:)) Nothing -> return (f []) scan [] f = return (f []) lexInteger :: Base -> ReadP Integer lexInteger base = do xs <- lexDigits base return (val (fromIntegral base) xs) val :: Num a => a -> Digits -> a val = valSimple {-# RULES "val/Integer" val = valInteger #-} {-# INLINE [1] val #-} -- The following algorithm is only linear for types whose Num operations -- are in constant time. valSimple :: (Num a, Integral d) => a -> [d] -> a valSimple base = go 0 where go r [] = r go r (d : ds) = r' `seq` go r' ds where r' = r * base + fromIntegral d {-# INLINE valSimple #-} -- A sub-quadratic algorithm for Integer. Pairs of adjacent radix b -- digits are combined into a single radix b^2 digit. This process is -- repeated until we are left with a single digit. This algorithm -- performs well only on large inputs, so we use the simple algorithm -- for smaller inputs. valInteger :: Integer -> Digits -> Integer valInteger b0 ds0 = go b0 (length ds0) $ map fromIntegral ds0 where go _ _ [] = 0 go _ _ [d] = d go b l ds | l > 40 = b' `seq` go b' l' (combine b ds') | otherwise = valSimple b ds where -- ensure that we have an even number of digits -- before we call combine: ds' = if even l then ds else 0 : ds b' = b * b l' = (l + 1) `quot` 2 combine b (d1 : d2 : ds) = d `seq` (d : combine b ds) where d = d1 * b + d2 combine _ [] = [] combine _ [_] = errorWithoutStackTrace "this should not happen" -- Calculate a Rational from the exponent [of 10 to multiply with], -- the integral part of the mantissa and the digits of the fractional -- part. Leaving the calculation of the power of 10 until the end, -- when we know the effective exponent, saves multiplications. -- More importantly, this way we need at most one gcd instead of three. -- -- frac was never used with anything but Integer and base 10, so -- those are hardcoded now (trivial to change if necessary). fracExp :: Integer -> Integer -> Digits -> Rational fracExp exp mant [] | exp < 0 = mant % (10 ^ (-exp)) | otherwise = fromInteger (mant * 10 ^ exp) fracExp exp mant (d:ds) = exp' `seq` mant' `seq` fracExp exp' mant' ds where exp' = exp - 1 mant' = mant * 10 + fromIntegral d valDig :: (Eq a, Num a) => a -> Char -> Maybe Int valDig 2 c | '0' <= c && c <= '1' = Just (ord c - ord '0') | otherwise = Nothing valDig 8 c | '0' <= c && c <= '7' = Just (ord c - ord '0') | otherwise = Nothing valDig 10 c = valDecDig c valDig 16 c | '0' <= c && c <= '9' = Just (ord c - ord '0') | 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10) | 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10) | otherwise = Nothing valDig _ _ = errorWithoutStackTrace "valDig: Bad base" valDecDig :: Char -> Maybe Int valDecDig c | '0' <= c && c <= '9' = Just (ord c - ord '0') | otherwise = Nothing -- ---------------------------------------------------------------------- -- other numeric lexing functions readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a readIntP base isDigit valDigit = do s <- munch1 isDigit return (val base (map valDigit s)) {-# SPECIALISE readIntP :: Integer -> (Char -> Bool) -> (Char -> Int) -> ReadP Integer #-} readIntP' :: (Eq a, Num a) => a -> ReadP a readIntP' base = readIntP base isDigit valDigit where isDigit c = maybe False (const True) (valDig base c) valDigit c = maybe 0 id (valDig base c) {-# SPECIALISE readIntP' :: Integer -> ReadP Integer #-} readBinP, readOctP, readDecP, readHexP :: (Eq a, Num a) => ReadP a readBinP = readIntP' 2 readOctP = readIntP' 8 readDecP = readIntP' 10 readHexP = readIntP' 16 {-# SPECIALISE readBinP :: ReadP Integer #-} {-# SPECIALISE readOctP :: ReadP Integer #-} {-# SPECIALISE readDecP :: ReadP Integer #-} {-# SPECIALISE readHexP :: ReadP Integer #-}