{-# LANGUAGE ScopedTypeVariables, MonadComprehensions, DeriveGeneric
, DeriveAnyClass, FlexibleContexts, OverloadedStrings #-}
module Text.ANTLR.LL1
( recognize
, first, follow
, foldWhileEpsilon
, isLL1, parseTable
, predictiveParse
, removeEpsilons, removeEpsilons'
, leftFactor
, Prime(..), ParseTable, PTKey, PTValue
) where
import Text.ANTLR.Grammar
import Text.ANTLR.Pretty
import Text.ANTLR.Parser
import Text.ANTLR.Allstar.ATN
import Text.ANTLR.Set
( Set(..), singleton, fromList, union, empty, member, size, toList
, insert, delete, intersection, Hashable(..), Generic(..), maybeMin
)
import Data.List (maximumBy, isPrefixOf)
import Data.Ord (comparing)
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Debug.Trace as D
import System.IO.Unsafe (unsafePerformIO)
uPIO = unsafePerformIO
foldWhile :: (a -> b -> Bool) -> (a -> b -> b) -> b -> [a] -> b
foldWhile pred fncn = let
fW' b0 [] = b0
fW' b0 [a] = b0
fW' b0 (a:as)
| pred a b0 = fW' (fncn a b0) as
| otherwise = b0
in fW'
epsIn set _ = IconEps `member` set
foldWhileEpsilon fncn b0 [] = empty
foldWhileEpsilon fncn b0 [a] = fncn a b0
foldWhileEpsilon fncn b0 (a:as)
| epsIn a b0 = foldWhile epsIn fncn (fncn a b0) as
| otherwise = fncn a b0
first ::
forall sts nts dt. (Eq nts, Eq sts, Ord nts, Ord sts, Hashable nts, Hashable sts)
=> Grammar () nts sts dt -> [ProdElem nts sts] -> Set (Icon sts)
first g = let
firstOne :: Set (ProdElem nts sts) -> ProdElem nts sts -> Set (Icon sts)
firstOne _ t@(T x) = singleton $ Icon x
firstOne _ Eps = singleton IconEps
firstOne busy nts@(NT x)
| nts `member` busy = empty
| otherwise = foldr union empty
[ foldWhileEpsilon union empty
[ firstOne (insert nts busy) y
| y <- (\(Prod _ ss) -> ss) rhs
]
| Production _ rhs dt <- prodsFor g x ]
firstMany :: [Set (Icon sts)] -> Set (Icon sts)
firstMany [] = singleton IconEps
firstMany (ts:tss)
| IconEps `member` ts = ts `union` firstMany tss
| otherwise = ts
in firstMany . map (firstOne empty)
follow ::
forall nts sts dt. (Eq nts, Eq sts, Ord nts, Ord sts, Hashable nts, Hashable sts)
=> Grammar () nts sts dt -> nts -> Set (Icon sts)
follow g = let
follow' busy _B
| _B `member` busy = empty
| otherwise = let
busy' = insert _B busy
followProd :: nts -> ProdElems nts sts -> Set (Icon sts)
followProd _ [] = empty
followProd _A [s]
| s == NT _B = follow' busy' _A
| otherwise = empty
followProd _A (s:β)
| s /= NT _B = followProd _A β
| otherwise =
followProd _A β
`union`
(delete IconEps $ first g β)
`union`
(if IconEps `member` first g β
then follow' busy' _A
else empty
)
in (if _B == s0 g then singleton IconEOF else empty)
`union`
foldr union empty
[ followProd lhs_nts ss
| Production lhs_nts (Prod _ ss) dt <- ps g
]
in follow' empty
isLL1
:: (Eq nts, Eq sts, Ord nts, Ord sts, Hashable nts, Hashable sts)
=> Grammar () nts sts dt -> Bool
isLL1 g =
validGrammar g && and
[ (first g α `intersection` first g β == empty)
&& (not (IconEps `member` first g α)
|| ((first g α `intersection` follow g nts) == empty))
| nts <- toList $ ns g
, (Prod _ α) <- map getRHS $ prodsFor g nts
, (Prod _ β) <- map getRHS $ prodsFor g nts
, α /= β
]
type PTKey nts sts = (nts, Icon sts)
type PTValue nts sts = Set (ProdElems nts sts)
ambigVal
:: (Ord nts, Ord sts, Hashable nts, Hashable sts)
=> PTValue nts sts -> Bool
ambigVal = (1 >) . size
type ParseTable nts sts = M.Map (PTKey nts sts) (PTValue nts sts)
parseTable' ::
forall nts sts dt. (Eq nts, Eq sts, Ord nts, Ord sts, Eq nts, Hashable sts, Hashable nts)
=> (PTValue nts sts -> PTValue nts sts -> PTValue nts sts) -> Grammar () nts sts dt -> ParseTable nts sts
parseTable' fncn g = let
insertMe ::
(nts, Icon sts, ProdElems nts sts) -> (ParseTable nts sts -> ParseTable nts sts)
insertMe (_A, a, α) = M.insertWith fncn (_A, a) $ singleton α
in
foldr insertMe M.empty
[ (_A, Icon a, α)
| Production _A (Prod _ α) dt <- ps g
, Icon a <- toList $ first g α
]
`M.union`
foldr insertMe M.empty
[ (_A, Icon b, α)
| Production _A (Prod _ α) dt <- ps g
, IconEps `member` first g α
, Icon b <- toList $ follow g _A
]
`M.union`
foldr insertMe M.empty
[ (_A, IconEOF, α)
| Production _A (Prod _ α) dt <- ps g
, IconEps `member` first g α
, IconEOF `member` follow g _A
]
parseTable ::
forall nts sts dt. (Eq nts, Eq sts, Ord nts, Ord sts, Hashable sts, Hashable nts)
=> Grammar () nts sts dt -> ParseTable nts sts
parseTable = parseTable' union
data TreeNode ast nts sts =
Comp ast
| InComp nts (ProdElems nts sts) [ast] Int
deriving (Eq, Ord, Show)
instance (Prettify ast, Prettify nts, Prettify sts) => Prettify (TreeNode ast nts sts) where
prettify (Comp ast) = do
pStr "(Complete "
prettify ast
pStr ")"
prettify (InComp nts es asts i) = pParens $ do
pStr "InComp"
incrIndent 2
pLine ""
pStr "nts="
prettify nts
pLine ""
pStr "es="
prettify es
pLine ""
pStr "asts="
prettify asts
pLine ""
pStr "i="
prettify i
incrIndent (-2)
type StackTree ast nts ts = [TreeNode ast nts (StripEOF ts)]
isComp (Comp _) = True
isComp _ = False
isInComp = not . isComp
recognize ::
( Eq nts, Ref t, Eq (Sym t), HasEOF (Sym t)
, Ord nts, Ord t, Ord (Sym t), Ord (StripEOF (Sym t))
, Prettify nts, Prettify t, Prettify (Sym t), Prettify (StripEOF (Sym t))
, Hashable (Sym t), Hashable nts, Hashable (StripEOF (Sym t)))
=> Grammar () nts (StripEOF (Sym t)) dt -> [t] -> Bool
recognize g = (Nothing /=) . predictiveParse g (const ())
predictiveParse
:: forall nts t ast dt.
(Prettify nts, Prettify t, Prettify (Sym t), Prettify (StripEOF (Sym t)), Prettify ast
, Eq nts, Eq (Sym t)
, HasEOF (Sym t)
, Ord (Sym t), Ord nts, Ord t, Ord (StripEOF (Sym t))
, Hashable (Sym t), Hashable nts, Hashable (StripEOF (Sym t))
, Ref t)
=> Grammar () nts (StripEOF (Sym t)) dt -> Action ast nts t -> [t] -> Maybe ast
predictiveParse g act w0 = let
reduce :: StackTree ast nts (Sym t) -> StackTree ast nts (Sym t)
reduce stree@(InComp nts ss asts 0 : rst) = reduce $ Comp (act $ NonTE (nts, ss, reverse asts)) : rst
reduce stree@(InComp{}:_) = stree
reduce stree = let
cmps = map (\(Comp ast) -> ast) $ takeWhile isComp stree
(InComp nts ss asts i : rst) = dropWhile isComp stree
in case dropWhile isComp stree of
[] -> stree
(InComp nts ss asts i : rst) -> reduce (InComp nts ss (cmps ++ asts) (i - length cmps) : rst)
pushStack :: ProdElem nts t -> ProdElems nts (StripEOF (Sym t)) -> StackTree ast nts (Sym t) -> StackTree ast nts (Sym t)
pushStack (NT nts) ss stree = reduce $ InComp nts ss [] (length ss) : stree
pushStack (T t) _ (InComp nts ss asts i:stree) = reduce $ InComp nts ss (act (TermE t) : asts) (i - 1) : stree
pushStack Eps _ (InComp nts ss asts i:stree) = reduce $ InComp nts ss (act EpsE : asts) (i - 1) : stree
_M :: ParseTable nts (StripEOF (Sym t))
_M = parseTable g
parse' :: [t] -> ProdElems nts (StripEOF (Sym t)) -> StackTree ast nts (Sym t) -> Maybe (StackTree ast nts (Sym t))
parse' [] [] asts = Just asts
parse' [t] [] asts | isEOF $ getSymbol t = Just asts
parse' _ [] asts = Nothing
parse' (a:ws) (T x:xs) asts
| stripEOF (getSymbol a) == Just x = parse' ws xs $ pushStack (T a) [] asts
| otherwise = Nothing
parse' ws@(a:_) (NT _X:xs) asts = do
let sym = getSymbol a
sym' <- if isEOF sym then Just IconEOF else Icon <$> stripEOF (getSymbol a)
ss <- (_X, sym') `M.lookup` _M
ss' <- maybeMin ss
parse' ws (ss' ++ xs) (pushStack (NT _X) ss' asts)
parse' ws (Eps:xs) asts = parse' ws xs (pushStack Eps [] asts)
parse' ws xs asts = D.trace (T.unpack $ "Bug in parser: " `T.append` pshow (ws, xs, asts)) Nothing
in do asts <- parse' w0 [NT $ s0 g] []
case asts of
[Comp ast] -> Just ast
_ -> Nothing
removeEpsilons' ::
forall s nts t dt. (Eq t, Eq nts, Eq dt, Prettify t, Prettify nts, Prettify s, Ord t, Ord nts, Hashable t, Hashable nts)
=> [Production s nts t dt] -> [Production s nts t dt]
removeEpsilons' ps_init = let
epsNT :: Production s nts t dt -> [nts] -> [nts]
epsNT (Production nts (Prod _ []) dt) = (:) nts
epsNT (Production nts (Prod _ [Eps]) dt) = (:) nts
epsNT prod = id
epsNTs :: [nts]
epsNTs = foldr epsNT [] ps_init
replicateProd :: nts -> Production s nts t dt -> [Production s nts t dt]
replicateProd nts0 (Production nt1 (Prod sf es) dt) = let
rP :: ProdElems nts t -> ProdElems nts t -> [Production s nts t dt]
rP ys [] = [Production nt1 (Prod sf $ reverse ys) dt]
rP ys (x:xs)
| NT nts0 == x
= Production nt1 (Prod sf (reverse ys ++ xs)) dt
: Production nt1 (Prod sf (reverse ys ++ x:xs)) dt
: ( rP ys xs
++ rP (x:ys) xs)
| otherwise = rP (x:ys) xs
in rP [] es
orderNub ps p1
| p1 `elem` ps = ps
| otherwise = p1 : ps
ps' :: [Production s nts t dt]
ps' = case epsNTs of
[] -> ps_init
(nts:ntss) -> removeEpsilons' $
foldl orderNub []
[ p'
| p <- ps_init
, p' <- replicateProd nts p
, p' /= Production nts (Prod Pass []) (getDataType p')
, p' /= Production nts (Prod Pass [Eps]) (getDataType p')]
in ps'
removeEpsilons ::
forall s nts t dt. (Eq t, Eq nts, Eq dt, Prettify t, Prettify nts, Prettify s, Ord t, Ord nts, Ord dt, Hashable t, Hashable nts)
=> Grammar s nts t dt -> Grammar s nts t dt
removeEpsilons g = g { ps = removeEpsilons' $ ps g }
newtype Prime nts = Prime (nts, Int)
deriving (Eq, Ord, Generic, Hashable, Show)
instance (Prettify nts) => Prettify (Prime nts) where
prettify (Prime (nts,i)) = do
prettify nts
pStr $ T.replicate i (T.singleton '\'')
leftFactor ::
forall s nts t dt. (Eq t, Eq nts, Prettify t, Prettify nts, Ord t, Ord nts, Hashable nts)
=> Grammar s nts t dt -> Grammar s (Prime nts) t dt
leftFactor = let
primeify :: Grammar s nts t dt -> Grammar s (Prime nts) t dt
primeify g = G
{ ns = fromList $ [ Prime (nts, 0) | nts <- toList $ ns g ]
, ts = ts g
, ps = [ Production (Prime (nts, 0)) (Prod sf $ map prmPE ss) dt
| Production nts (Prod sf ss) dt <- ps g ]
, s0 = Prime (s0 g, 0)
, _πs = _πs g
, _μs = _μs g
}
prmPE :: ProdElem nts t -> ProdElem (Prime nts) t
prmPE (NT nts) = NT $ Prime (nts, 0)
prmPE (T x) = T x
prmPE Eps = Eps
lF :: Grammar s (Prime nts) t dt -> Grammar s (Prime nts) t dt
lF g = let
lcp :: ProdElems (Prime nts) t -> ProdElems (Prime nts) t -> ProdElems (Prime nts) t
lcp [] ys = []
lcp xs [] = []
lcp (x:xs) (y:ys)
| x == y = x : lcp xs ys
| otherwise = []
lcps :: [(Prime nts, ProdElems (Prime nts) t)]
lcps = [ (nts0, maximumBy (comparing length)
[ lcp xs ys
| Production _ (Prod _ xs) _ <- filter ((== nts0) . getLHS) (ps g)
, Production _ (Prod _ ys) _ <- filter ((== nts0) . getLHS) (ps g)
, xs /= ys
])
| nts0 <- toList $ ns g ]
incr :: Prime nts -> Prime nts
incr (Prime (nts, i)) = Prime (nts, i + 1)
ps' :: [(Prime nts, ProdElems (Prime nts) t)] -> [Production s (Prime nts) t dt]
ps' [] = ps g
ps' ((nts, xs):_) =
[ Production nts0 (Prod v rhs) dt
| Production nts0 (Prod v rhs) dt <- ps g
, nts0 /= nts
]
++
[ Production nts0 (Prod v rhs) dt
| Production nts0 (Prod v rhs) dt <- ps g
, nts == nts0 && not (xs `isPrefixOf` rhs)
]
++
[ Production (incr nts0) (Prod v (drop (length xs) rhs)) dt
| Production nts0 (Prod v rhs) dt <- ps g
, nts == nts0 && xs `isPrefixOf` rhs
]
++ [Production nts (Prod Pass $ xs ++ [NT $ incr nts]) Nothing]
in g { ps = ps' lcps }
in lF . primeify