{-# LANGUAGE PatternGuards #-}
module BNFC.Lexing
( mkLexer, LexType(..), mkRegMultilineComment
, debugPrint
) where
import Data.List ( inits, tails )
import BNFC.Abs ( Reg(..) )
import BNFC.Print ( printTree )
import BNFC.CF
import BNFC.Regex ( simpReg )
import BNFC.Utils ( unless )
debugPrint :: Reg -> IO ()
debugPrint :: Reg -> IO ()
debugPrint = [Char] -> IO ()
putStrLn ([Char] -> IO ()) -> (Reg -> [Char]) -> Reg -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Char]] -> [Char]) -> (Reg -> [[Char]]) -> Reg -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [[Char]]
words ([Char] -> [[Char]]) -> (Reg -> [Char]) -> Reg -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Reg -> [Char]
forall a. Print a => a -> [Char]
printTree
data LexType = | LexToken String | LexSymbols
mkLexer :: CF -> [(Reg, LexType)]
mkLexer :: CF -> [(Reg, LexType)]
mkLexer CF
cf = [[(Reg, LexType)]] -> [(Reg, LexType)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ [ ([Char] -> Reg
mkRegSingleLineComment [Char]
s, LexType
LexComment) | [Char]
s <- ([([Char], [Char])], [[Char]]) -> [[Char]]
forall a b. (a, b) -> b
snd (CF -> ([([Char], [Char])], [[Char]])
comments CF
cf) ]
, [ ([Char] -> [Char] -> Reg
mkRegMultilineComment [Char]
b [Char]
e, LexType
LexComment) | ([Char]
b,[Char]
e) <- ([([Char], [Char])], [[Char]]) -> [([Char], [Char])]
forall a b. (a, b) -> a
fst (CF -> ([([Char], [Char])], [[Char]])
comments CF
cf) ]
, [ (Reg
reg, [Char] -> LexType
LexToken [Char]
name) | ([Char]
name, Reg
reg) <- CF -> [([Char], Reg)]
forall f. CFG f -> [([Char], Reg)]
tokenPragmas CF
cf]
, [ ( Reg
regIdent, [Char] -> LexType
LexToken [Char]
"Ident" ) ]
, Bool -> [(Reg, LexType)] -> [(Reg, LexType)]
forall m. Monoid m => Bool -> m -> m
unless ([[Char]] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([[Char]] -> Bool) -> [[Char]] -> Bool
forall a b. (a -> b) -> a -> b
$ CF -> [[Char]]
forall function. CFG function -> [[Char]]
cfgSymbols CF
cf) [ ((Reg -> Reg -> Reg) -> [Reg] -> Reg
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1 Reg -> Reg -> Reg
RAlt (([Char] -> Reg) -> [[Char]] -> [Reg]
forall a b. (a -> b) -> [a] -> [b]
map [Char] -> Reg
RSeqs (CF -> [[Char]]
forall function. CFG function -> [[Char]]
cfgSymbols CF
cf)), LexType
LexSymbols ) ]
, [ ( Reg
regInteger, [Char] -> LexType
LexToken [Char]
"Integer")
, ( Reg
regDouble , [Char] -> LexType
LexToken [Char]
"Double" )
, ( Reg
regString , [Char] -> LexType
LexToken [Char]
"String" )
, ( Reg
regChar , [Char] -> LexType
LexToken [Char]
"Char" )
]
]
(<&>) :: Reg -> Reg -> Reg
<&> :: Reg -> Reg -> Reg
(<&>) = Reg -> Reg -> Reg
RSeq
(<|>) :: Reg -> Reg -> Reg
<|> :: Reg -> Reg -> Reg
(<|>) = Reg -> Reg -> Reg
RAlt
regIdent :: Reg
regIdent :: Reg
regIdent = Reg
RLetter Reg -> Reg -> Reg
<&> Reg -> Reg
RStar (Reg
RLetter Reg -> Reg -> Reg
<|> Reg
RDigit Reg -> Reg -> Reg
<|> Char -> Reg
RChar Char
'_' Reg -> Reg -> Reg
<|> Char -> Reg
RChar Char
'\'')
regInteger :: Reg
regInteger :: Reg
regInteger = Reg -> Reg
RPlus Reg
RDigit
regString :: Reg
regString :: Reg
regString = Char -> Reg
RChar Char
'"'
Reg -> Reg -> Reg
<&> Reg -> Reg
RStar ((Reg
RAny Reg -> Reg -> Reg
`RMinus` [Char] -> Reg
RAlts [Char]
"\"\\")
Reg -> Reg -> Reg
<|> (Char -> Reg
RChar Char
'\\' Reg -> Reg -> Reg
<&> [Char] -> Reg
RAlts [Char]
"\"\\nt"))
Reg -> Reg -> Reg
<&> Char -> Reg
RChar Char
'"'
regChar :: Reg
regChar :: Reg
regChar = Char -> Reg
RChar Char
'\''
Reg -> Reg -> Reg
<&> (Reg -> Reg -> Reg
RMinus Reg
RAny ([Char] -> Reg
RAlts [Char]
"'\\") Reg -> Reg -> Reg
<|> (Char -> Reg
RChar Char
'\\' Reg -> Reg -> Reg
<&> [Char] -> Reg
RAlts [Char]
"'\\nt"))
Reg -> Reg -> Reg
<&> Char -> Reg
RChar Char
'\''
regDouble :: Reg
regDouble :: Reg
regDouble = Reg -> Reg
RPlus Reg
RDigit Reg -> Reg -> Reg
<&> Char -> Reg
RChar Char
'.' Reg -> Reg -> Reg
<&> Reg -> Reg
RPlus Reg
RDigit
Reg -> Reg -> Reg
<&> Reg -> Reg
ROpt (Char -> Reg
RChar Char
'e' Reg -> Reg -> Reg
<&> Reg -> Reg
ROpt (Char -> Reg
RChar Char
'-') Reg -> Reg -> Reg
<&> Reg -> Reg
RPlus Reg
RDigit)
mkRegSingleLineComment :: String -> Reg
[Char]
s = [Char] -> Reg
RSeqs [Char]
s Reg -> Reg -> Reg
<&> Reg -> Reg
RStar Reg
RAny Reg -> Reg -> Reg
<&> Char -> Reg
RChar Char
'\n'
mkRegMultilineComment :: String -> String -> Reg
[Char]
begin [Char]
end = Reg -> Reg
simpReg (Reg -> Reg) -> Reg -> Reg
forall a b. (a -> b) -> a -> b
$ Reg -> [Reg] -> Reg
joinSteps ([Char] -> Reg
RSeqs [Char]
begin) [Reg]
allSteps
where
joinSteps :: Reg -> [Reg] -> Reg
joinSteps :: Reg -> [Reg] -> Reg
joinSteps = (Reg -> Reg -> Reg) -> Reg -> [Reg] -> Reg
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((Reg -> Reg -> Reg) -> Reg -> Reg -> Reg
forall a b c. (a -> b -> c) -> b -> a -> c
flip Reg -> Reg -> Reg
RSeq)
allSteps :: [Reg]
allSteps :: [Reg]
allSteps = ([Reg], [Char]) -> [Reg]
forall a b. (a, b) -> a
fst (([Reg], [Char]) -> [Reg]) -> ([Reg], [Char]) -> [Reg]
forall a b. (a -> b) -> a -> b
$ (([Reg], [Char]) -> Char -> ([Reg], [Char]))
-> ([Reg], [Char]) -> [Char] -> ([Reg], [Char])
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl ([Reg], [Char]) -> Char -> ([Reg], [Char])
next ([],[]) [Char]
end
next :: ([Reg],[Char]) -> Char -> ([Reg],[Char])
next :: ([Reg], [Char]) -> Char -> ([Reg], [Char])
next
( [Reg]
steps
, [Char]
ys
) Char
x
= (Reg
step Reg -> [Reg] -> [Reg]
forall a. a -> [a] -> [a]
: [Reg]
steps, Char
xChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
ys)
where
step :: Reg
step :: Reg
step = Reg -> Reg
RStar Reg
idle Reg -> Reg -> Reg
`RSeq` Char -> Reg
RChar Char
x
idle :: Reg
idle :: Reg
idle = (Reg -> Reg -> Reg) -> Reg -> [Reg] -> Reg
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Reg -> Reg -> Reg
RAlt Reg
toStart ([Reg] -> Reg) -> [Reg] -> Reg
forall a b. (a -> b) -> a -> b
$ ((Char, Reg) -> Reg) -> [(Char, Reg)] -> [Reg]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Reg) -> Reg
forall a b. (a, b) -> b
snd [(Char, Reg)]
possibilities
where
possibilities :: [(Char,Reg)]
possibilities :: [(Char, Reg)]
possibilities = ([(Char, Reg)] -> (Char, Bool, [Reg]) -> [(Char, Reg)])
-> [(Char, Reg)] -> [(Char, Bool, [Reg])] -> [(Char, Reg)]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl [(Char, Reg)] -> (Char, Bool, [Reg]) -> [(Char, Reg)]
addPoss [] ([Char] -> [Bool] -> [[Reg]] -> [(Char, Bool, [Reg])]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 [Char]
ys [Bool]
conds ([[Reg]] -> [(Char, Bool, [Reg])])
-> [[Reg]] -> [(Char, Bool, [Reg])]
forall a b. (a -> b) -> a -> b
$ [Reg] -> [[Reg]]
forall a. [a] -> [[a]]
inits [Reg]
steps)
toStart :: Reg
toStart :: Reg
toStart = Reg -> [Reg] -> Reg
joinSteps (Reg
RAny Reg -> Reg -> Reg
`RMinus` [Char] -> Reg
RAlts (Char
x Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ((Char, Reg) -> Char) -> [(Char, Reg)] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Reg) -> Char
forall a b. (a, b) -> a
fst [(Char, Reg)]
possibilities)) [Reg]
steps
addPoss :: [(Char,Reg)] -> (Char,Bool,[Reg]) -> [(Char,Reg)]
addPoss :: [(Char, Reg)] -> (Char, Bool, [Reg]) -> [(Char, Reg)]
addPoss
[(Char, Reg)]
poss
(Char
z, Bool
cond, [Reg]
steps)
| Bool
cond, Char
z Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Char]
exclude = (Char
z, Reg -> [Reg] -> Reg
joinSteps (Char -> Reg
RChar Char
z) [Reg]
steps) (Char, Reg) -> [(Char, Reg)] -> [(Char, Reg)]
forall a. a -> [a] -> [a]
: [(Char, Reg)]
poss
| Bool
otherwise = [(Char, Reg)]
poss
where
exclude :: [Char]
exclude :: [Char]
exclude = Char
x Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ((Char, Reg) -> Char) -> [(Char, Reg)] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Char, Reg) -> Char
forall a b. (a, b) -> a
fst [(Char, Reg)]
poss
conds :: [Bool]
conds :: [Bool]
conds = ([Char] -> [Char] -> Bool) -> [[Char]] -> [[Char]] -> [Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
(==) ([[Char]] -> [[Char]]
forall a. [a] -> [a]
tail ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [[Char]] -> [[Char]]
forall a. [a] -> [a]
reverse ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [[Char]]
forall a. [a] -> [[a]]
inits [Char]
ys) ([[Char]] -> [[Char]]
forall a. [a] -> [a]
tail ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [[Char]]
forall a. [a] -> [[a]]
tails [Char]
ys)