{-# LANGUAGE PatternGuards #-}
module BNFC.Lexing
( mkLexer, LexType(..), mkRegMultilineComment ) where
import Prelude hiding ((<>))
import Data.List ( inits, tails )
import BNFC.Abs ( Reg(..) )
import BNFC.Print ( printTree )
import BNFC.CF
import BNFC.Regex ( simpReg )
import BNFC.Utils ( unless )
debugPrint :: Reg -> IO ()
debugPrint :: Reg -> IO ()
debugPrint = String -> IO ()
putStrLn (String -> IO ()) -> (Reg -> String) -> Reg -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String) -> (Reg -> [String]) -> Reg -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words (String -> [String]) -> (Reg -> String) -> Reg -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Reg -> String
forall a. Print a => a -> String
printTree
data LexType = | LexToken String | LexSymbols
mkLexer :: CF -> [(Reg, LexType)]
mkLexer :: CF -> [(Reg, LexType)]
mkLexer CF
cf = [[(Reg, LexType)]] -> [(Reg, LexType)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ [ (String -> Reg
mkRegSingleLineComment String
s, LexType
LexComment) | String
s <- ([(String, String)], [String]) -> [String]
forall a b. (a, b) -> b
snd (CF -> ([(String, String)], [String])
comments CF
cf) ]
, [ (String -> String -> Reg
mkRegMultilineComment String
b String
e, LexType
LexComment) | (String
b,String
e) <- ([(String, String)], [String]) -> [(String, String)]
forall a b. (a, b) -> a
fst (CF -> ([(String, String)], [String])
comments CF
cf) ]
, [ (Reg
reg, String -> LexType
LexToken String
name) | (String
name, Reg
reg) <- CF -> [(String, Reg)]
forall f. CFG f -> [(String, Reg)]
tokenPragmas CF
cf]
, [ ( Reg
regIdent, String -> LexType
LexToken String
"Ident" ) ]
, Bool -> [(Reg, LexType)] -> [(Reg, LexType)]
forall m. Monoid m => Bool -> m -> m
unless ([String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([String] -> Bool) -> [String] -> Bool
forall a b. (a -> b) -> a -> b
$ CF -> [String]
forall function. CFG function -> [String]
cfgSymbols CF
cf) [ ((Reg -> Reg -> Reg) -> [Reg] -> Reg
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1 Reg -> Reg -> Reg
RAlt ((String -> Reg) -> [String] -> [Reg]
forall a b. (a -> b) -> [a] -> [b]
map String -> Reg
RSeqs (CF -> [String]
forall function. CFG function -> [String]
cfgSymbols CF
cf)), LexType
LexSymbols ) ]
, [ ( Reg
regInteger, String -> LexType
LexToken String
"Integer")
, ( Reg
regDouble , String -> LexType
LexToken String
"Double" )
, ( Reg
regString , String -> LexType
LexToken String
"String" )
, ( Reg
regChar , String -> LexType
LexToken String
"Char" )
]
]
<> :: Reg -> Reg -> Reg
(<>) = Reg -> Reg -> Reg
RSeq
<|> :: Reg -> Reg -> Reg
(<|>) = Reg -> Reg -> Reg
RAlt
regIdent :: Reg
regIdent :: Reg
regIdent = Reg
RLetter Reg -> Reg -> Reg
<> Reg -> Reg
RStar (Reg
RLetter Reg -> Reg -> Reg
<|> Reg
RDigit Reg -> Reg -> Reg
<|> Char -> Reg
RChar Char
'_' Reg -> Reg -> Reg
<|> Char -> Reg
RChar Char
'\'')
regInteger :: Reg
regInteger :: Reg
regInteger = Reg -> Reg
RPlus Reg
RDigit
regString :: Reg
regString :: Reg
regString = Char -> Reg
RChar Char
'"'
Reg -> Reg -> Reg
<> Reg -> Reg
RStar ((Reg
RAny Reg -> Reg -> Reg
`RMinus` String -> Reg
RAlts String
"\"\\")
Reg -> Reg -> Reg
<|> (Char -> Reg
RChar Char
'\\' Reg -> Reg -> Reg
<> String -> Reg
RAlts String
"\"\\nt"))
Reg -> Reg -> Reg
<> Char -> Reg
RChar Char
'"'
regChar :: Reg
regChar :: Reg
regChar = Char -> Reg
RChar Char
'\''
Reg -> Reg -> Reg
<> (Reg -> Reg -> Reg
RMinus Reg
RAny (String -> Reg
RAlts String
"'\\") Reg -> Reg -> Reg
<|> (Char -> Reg
RChar Char
'\\' Reg -> Reg -> Reg
<> String -> Reg
RAlts String
"'\\nt"))
Reg -> Reg -> Reg
<> Char -> Reg
RChar Char
'\''
regDouble :: Reg
regDouble :: Reg
regDouble = Reg -> Reg
RPlus Reg
RDigit Reg -> Reg -> Reg
<> Char -> Reg
RChar Char
'.' Reg -> Reg -> Reg
<> Reg -> Reg
RPlus Reg
RDigit
Reg -> Reg -> Reg
<> Reg -> Reg
ROpt (Char -> Reg
RChar Char
'e' Reg -> Reg -> Reg
<> Reg -> Reg
ROpt (Char -> Reg
RChar Char
'-') Reg -> Reg -> Reg
<> Reg -> Reg
RPlus Reg
RDigit)
mkRegSingleLineComment :: String -> Reg
String
s = String -> Reg
RSeqs String
s Reg -> Reg -> Reg
<> Reg -> Reg
RStar Reg
RAny Reg -> Reg -> Reg
<> Char -> Reg
RChar Char
'\n'
mkRegMultilineComment :: String -> String -> Reg
String
begin String
end = Reg -> Reg
simpReg (Reg -> Reg) -> Reg -> Reg
forall a b. (a -> b) -> a -> b
$ Reg -> [Reg] -> Reg
joinSteps (String -> Reg
RSeqs String
begin) [Reg]
allSteps
where
joinSteps :: Reg -> [Reg] -> Reg
joinSteps :: Reg -> [Reg] -> Reg
joinSteps = (Reg -> Reg -> Reg) -> Reg -> [Reg] -> Reg
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((Reg -> Reg -> Reg) -> Reg -> Reg -> Reg
forall a b c. (a -> b -> c) -> b -> a -> c
flip Reg -> Reg -> Reg
RSeq)
allSteps :: [Reg]
allSteps :: [Reg]
allSteps = ([Reg], String) -> [Reg]
forall a b. (a, b) -> a
fst (([Reg], String) -> [Reg]) -> ([Reg], String) -> [Reg]
forall a b. (a -> b) -> a -> b
$ (([Reg], String) -> Char -> ([Reg], String))
-> ([Reg], String) -> String -> ([Reg], String)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl ([Reg], String) -> Char -> ([Reg], String)
next ([],[]) String
end
next :: ([Reg],[Char]) -> Char -> ([Reg],[Char])
next :: ([Reg], String) -> Char -> ([Reg], String)
next
( [Reg]
steps
, String
ys
) Char
x
= (Reg
step Reg -> [Reg] -> [Reg]
forall a. a -> [a] -> [a]
: [Reg]
steps, Char
xChar -> String -> String
forall a. a -> [a] -> [a]
:String
ys)
where
step :: Reg
step :: Reg
step = Reg -> Reg
RStar Reg
idle Reg -> Reg -> Reg
`RSeq` Char -> Reg
RChar Char
x
idle :: Reg
idle :: Reg
idle = (Reg -> Reg -> Reg) -> Reg -> [Reg] -> Reg
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Reg -> Reg -> Reg
RAlt Reg
toStart ([Reg] -> Reg) -> [Reg] -> Reg
forall a b. (a -> b) -> a -> b
$ ((Char, Reg) -> Reg) -> [(Char, Reg)] -> [Reg]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Reg) -> Reg
forall a b. (a, b) -> b
snd [(Char, Reg)]
possibilities
where
possibilities :: [(Char,Reg)]
possibilities :: [(Char, Reg)]
possibilities = ([(Char, Reg)] -> (Char, Bool, [Reg]) -> [(Char, Reg)])
-> [(Char, Reg)] -> [(Char, Bool, [Reg])] -> [(Char, Reg)]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl [(Char, Reg)] -> (Char, Bool, [Reg]) -> [(Char, Reg)]
addPoss [] (String -> [Bool] -> [[Reg]] -> [(Char, Bool, [Reg])]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 String
ys [Bool]
conds ([[Reg]] -> [(Char, Bool, [Reg])])
-> [[Reg]] -> [(Char, Bool, [Reg])]
forall a b. (a -> b) -> a -> b
$ [Reg] -> [[Reg]]
forall a. [a] -> [[a]]
inits [Reg]
steps)
toStart :: Reg
toStart :: Reg
toStart = Reg -> [Reg] -> Reg
joinSteps (Reg
RAny Reg -> Reg -> Reg
`RMinus` String -> Reg
RAlts (Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: ((Char, Reg) -> Char) -> [(Char, Reg)] -> String
forall a b. (a -> b) -> [a] -> [b]
map (Char, Reg) -> Char
forall a b. (a, b) -> a
fst [(Char, Reg)]
possibilities)) [Reg]
steps
addPoss :: [(Char,Reg)] -> (Char,Bool,[Reg]) -> [(Char,Reg)]
addPoss :: [(Char, Reg)] -> (Char, Bool, [Reg]) -> [(Char, Reg)]
addPoss
[(Char, Reg)]
poss
(Char
z, Bool
cond, [Reg]
steps)
| Bool
cond, Char
z Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
exclude = (Char
z, Reg -> [Reg] -> Reg
joinSteps (Char -> Reg
RChar Char
z) [Reg]
steps) (Char, Reg) -> [(Char, Reg)] -> [(Char, Reg)]
forall a. a -> [a] -> [a]
: [(Char, Reg)]
poss
| Bool
otherwise = [(Char, Reg)]
poss
where
exclude :: [Char]
exclude :: String
exclude = Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: ((Char, Reg) -> Char) -> [(Char, Reg)] -> String
forall a b. (a -> b) -> [a] -> [b]
map (Char, Reg) -> Char
forall a b. (a, b) -> a
fst [(Char, Reg)]
poss
conds :: [Bool]
conds :: [Bool]
conds = (String -> String -> Bool) -> [String] -> [String] -> [Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith String -> String -> Bool
forall a. Eq a => a -> a -> Bool
(==) ([String] -> [String]
forall a. [a] -> [a]
tail ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ [String] -> [String]
forall a. [a] -> [a]
reverse ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ String -> [String]
forall a. [a] -> [[a]]
inits String
ys) ([String] -> [String]
forall a. [a] -> [a]
tail ([String] -> [String]) -> [String] -> [String]
forall a b. (a -> b) -> a -> b
$ String -> [String]
forall a. [a] -> [[a]]
tails String
ys)